diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-07-12 11:24:12 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-07-12 11:24:12 -0400 |
commit | c8b8b8190a80b591aa73c27c70a668799f8db547 (patch) | |
tree | 9d948c9aac89678abe64ac81f6c43348bf4b2091 /arch/loongarch | |
parent | f0a23883fad4ec8a63faddb9639a92be2e007624 (diff) | |
parent | 492ac37fa38faf520b5beae44c930063265ee183 (diff) | |
download | lwn-c8b8b8190a80b591aa73c27c70a668799f8db547.tar.gz lwn-c8b8b8190a80b591aa73c27c70a668799f8db547.zip |
Merge tag 'loongarch-kvm-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson into HEAD
LoongArch KVM changes for v6.11
1. Add ParaVirt steal time support.
2. Add some VM migration enhancement.
3. Add perf kvm-stat support for loongarch.
Diffstat (limited to 'arch/loongarch')
29 files changed, 551 insertions, 110 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e38139c576ee..b81d0eba5c7e 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -143,7 +143,7 @@ config LOONGARCH select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI - select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -261,6 +261,9 @@ config AS_HAS_EXPLICIT_RELOCS config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) +config AS_HAS_THIN_ADD_SUB + def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) + config AS_HAS_LSX_EXTENSION def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) @@ -646,6 +649,17 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + endmenu config ARCH_SELECT_MEMORY_MODEL diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 98d60630c3d4..8b2ce5b5d43e 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -28,6 +28,7 @@ config UNWINDER_PROLOGUE config UNWINDER_ORC bool "ORC unwinder" + depends on HAVE_OBJTOOL select OBJTOOL help This option enables the ORC (Oops Rewind Capability) unwinder for diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts index 8aefb0c12672..a34734a6c3ce 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts @@ -44,14 +44,14 @@ &gmac0 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; bus_id = <0x0>; }; &gmac1 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; bus_id = <0x1>; }; diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index 8463fe035386..23cf26cc3e5f 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -43,7 +43,7 @@ &gmac0 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&phy0>; mdio { compatible = "snps,dwmac-mdio"; @@ -58,7 +58,7 @@ &gmac1 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&phy1>; mdio { compatible = "snps,dwmac-mdio"; diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts index 74b99bd234cc..ea9e6985d0e9 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts @@ -92,7 +92,7 @@ &gmac2 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&phy2>; mdio { compatible = "snps,dwmac-mdio"; diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 21447fb1efc7..d78330916bd1 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -75,6 +75,8 @@ do { \ #define CSR_MWPC_NUM 0x3f #define CTRL_PLV_ENABLE 0x1e +#define CTRL_PLV0_ENABLE 0x02 +#define CTRL_PLV3_ENABLE 0x10 #define MWPnCFG3_LoadEn 8 #define MWPnCFG3_StoreEn 9 @@ -101,7 +103,7 @@ struct perf_event; struct perf_event_attr; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type, int *offset); + int *gen_len, int *gen_type); extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index c87b6ea0ec47..fe38f98eeff8 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -30,12 +30,17 @@ #define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1) #define KVM_GUESTDBG_SW_BP_MASK \ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) #define KVM_GUESTDBG_VALID_MASK \ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) +#define KVM_DIRTY_LOG_MANUAL_CAPS \ + (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET) + struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; @@ -190,6 +195,7 @@ struct kvm_vcpu_arch { /* vcpu's vpid */ u64 vpid; + gpa_t flush_gpa; /* Frequency of stable timer in Hz */ u64 timer_mhz; @@ -201,6 +207,13 @@ struct kvm_vcpu_arch { struct kvm_mp_state mp_state; /* cpucfg */ u32 cpucfg[KVM_MAX_CPUCFG_REGS]; + + /* paravirt steal time */ + struct { + u64 guest_addr; + u64 last_steal; + struct gfn_to_hva_cache cache; + } st; }; static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 4ba2312e5f8c..335fb86778e2 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -14,6 +14,7 @@ #define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE) #define KVM_HCALL_FUNC_IPI 1 +#define KVM_HCALL_FUNC_NOTIFY 2 #define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) @@ -24,6 +25,16 @@ #define KVM_HCALL_INVALID_CODE -1UL #define KVM_HCALL_INVALID_PARAMETER -2UL +#define KVM_STEAL_PHYS_VALID BIT_ULL(0) +#define KVM_STEAL_PHYS_MASK GENMASK_ULL(63, 6) + +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + /* * Hypercall interface for KVM hypervisor * diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 590a92cb5416..c416cb7125c0 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -120,4 +120,9 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v vcpu->arch.gprs[num] = val; } +static inline bool kvm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index eb09adda54b7..7a4633ef284b 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -169,6 +169,7 @@ #define KVM_SIGNATURE "KVM\0" #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) #define KVM_FEATURE_IPI BIT(1) +#define KVM_FEATURE_STEAL_TIME BIT(2) #ifndef __ASSEMBLY__ diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h index 27f319b49862..b5f9de9f102e 100644 --- a/arch/loongarch/include/asm/numa.h +++ b/arch/loongarch/include/asm/numa.h @@ -56,6 +56,7 @@ extern int early_cpu_to_node(int cpu); static inline void early_numa_add_cpu(int cpuid, s16 node) { } static inline void numa_add_cpu(unsigned int cpu) { } static inline void numa_remove_cpu(unsigned int cpu) { } +static inline void set_cpuid_to_node(int cpuid, s16 node) { } static inline int early_cpu_to_node(int cpu) { diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index 0965710f47f2..dddec49671ae 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -18,6 +18,7 @@ static inline u64 paravirt_steal_clock(int cpu) } int __init pv_ipi_init(void); +int __init pv_time_init(void); #else @@ -26,5 +27,9 @@ static inline int pv_ipi_init(void) return 0; } +static inline int pv_time_init(void) +{ + return 0; +} #endif // CONFIG_PARAVIRT #endif diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 45b507a7b06f..d9eafd3ee3d1 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -42,7 +42,7 @@ .macro JUMP_VIRT_ADDR temp1 temp2 li.d \temp1, CACHE_BASE pcaddi \temp2, 0 - or \temp1, \temp1, \temp2 + bstrins.d \temp1, \temp2, (DMW_PABITS - 1), 0 jirl zero, \temp1, 0xc .endm diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index f9abef382317..ddc5cab0ffd0 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -81,7 +81,11 @@ struct kvm_fpu { #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) + +/* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 +#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 struct kvm_debug_exit_arch { }; diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index c4f7de2e2805..4677ea8fa8e9 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -22,7 +22,7 @@ _head: .word MZ_MAGIC /* "MZ", MS-DOS header */ .org 0x8 - .dword kernel_entry /* Kernel entry point */ + .dword _kernel_entry /* Kernel entry point (physical address) */ .dword _kernel_asize /* Kernel image effective size */ .quad PHYS_LINK_KADDR /* Kernel image load offset from start of RAM */ .org 0x38 /* 0x20 ~ 0x37 reserved */ diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index fc55c4de2a11..621ad7634df7 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -174,11 +174,21 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) static int hw_breakpoint_control(struct perf_event *bp, enum hw_breakpoint_ops ops) { - u32 ctrl; + u32 ctrl, privilege; int i, max_slots, enable; + struct pt_regs *regs; struct perf_event **slots; struct arch_hw_breakpoint *info = counter_arch_bp(bp); + if (arch_check_bp_in_kernelspace(info)) + privilege = CTRL_PLV0_ENABLE; + else + privilege = CTRL_PLV3_ENABLE; + + /* Whether bp belongs to a task. */ + if (bp->hw.target) + regs = task_pt_regs(bp->hw.target); + if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { /* Breakpoint */ slots = this_cpu_ptr(bp_on_reg); @@ -197,31 +207,38 @@ static int hw_breakpoint_control(struct perf_event *bp, switch (ops) { case HW_BREAKPOINT_INSTALL: /* Set the FWPnCFG/MWPnCFG 1~4 register. */ - write_wb_reg(CSR_CFG_ADDR, i, 0, info->address); - write_wb_reg(CSR_CFG_ADDR, i, 1, info->address); - write_wb_reg(CSR_CFG_MASK, i, 0, info->mask); - write_wb_reg(CSR_CFG_MASK, i, 1, info->mask); - write_wb_reg(CSR_CFG_ASID, i, 0, 0); - write_wb_reg(CSR_CFG_ASID, i, 1, 0); if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { - write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE); + write_wb_reg(CSR_CFG_ADDR, i, 0, info->address); + write_wb_reg(CSR_CFG_MASK, i, 0, info->mask); + write_wb_reg(CSR_CFG_ASID, i, 0, 0); + write_wb_reg(CSR_CFG_CTRL, i, 0, privilege); } else { + write_wb_reg(CSR_CFG_ADDR, i, 1, info->address); + write_wb_reg(CSR_CFG_MASK, i, 1, info->mask); + write_wb_reg(CSR_CFG_ASID, i, 1, 0); ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE); + write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | privilege); } enable = csr_read64(LOONGARCH_CSR_CRMD); csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD); + if (bp->hw.target) + regs->csr_prmd |= CSR_PRMD_PWE; break; case HW_BREAKPOINT_UNINSTALL: /* Reset the FWPnCFG/MWPnCFG 1~4 register. */ - write_wb_reg(CSR_CFG_ADDR, i, 0, 0); - write_wb_reg(CSR_CFG_ADDR, i, 1, 0); - write_wb_reg(CSR_CFG_MASK, i, 0, 0); - write_wb_reg(CSR_CFG_MASK, i, 1, 0); - write_wb_reg(CSR_CFG_CTRL, i, 0, 0); - write_wb_reg(CSR_CFG_CTRL, i, 1, 0); - write_wb_reg(CSR_CFG_ASID, i, 0, 0); - write_wb_reg(CSR_CFG_ASID, i, 1, 0); + if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { + write_wb_reg(CSR_CFG_ADDR, i, 0, 0); + write_wb_reg(CSR_CFG_MASK, i, 0, 0); + write_wb_reg(CSR_CFG_CTRL, i, 0, 0); + write_wb_reg(CSR_CFG_ASID, i, 0, 0); + } else { + write_wb_reg(CSR_CFG_ADDR, i, 1, 0); + write_wb_reg(CSR_CFG_MASK, i, 1, 0); + write_wb_reg(CSR_CFG_CTRL, i, 1, 0); + write_wb_reg(CSR_CFG_ASID, i, 1, 0); + } + if (bp->hw.target) + regs->csr_prmd &= ~CSR_PRMD_PWE; break; } @@ -283,7 +300,7 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) * to generic breakpoint descriptions. */ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type, int *offset) + int *gen_len, int *gen_type) { /* Type */ switch (ctrl.type) { @@ -303,11 +320,6 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, return -EINVAL; } - if (!ctrl.len) - return -EINVAL; - - *offset = __ffs(ctrl.len); - /* Len */ switch (ctrl.len) { case LOONGARCH_BREAKPOINT_LEN_1: @@ -386,21 +398,17 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, struct arch_hw_breakpoint *hw) { int ret; - u64 alignment_mask, offset; + u64 alignment_mask; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; - if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE) - alignment_mask = 0x7; - else + if (hw->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { alignment_mask = 0x3; - offset = hw->address & alignment_mask; - - hw->address &= ~alignment_mask; - hw->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + } return 0; } @@ -471,12 +479,15 @@ void breakpoint_handler(struct pt_regs *regs) slots = this_cpu_ptr(bp_on_reg); for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) { - bp = slots[i]; - if (bp == NULL) - continue; - perf_bp_event(bp, regs); + if ((csr_read32(LOONGARCH_CSR_FWPS) & (0x1 << i))) { + bp = slots[i]; + if (bp == NULL) + continue; + perf_bp_event(bp, regs); + csr_write32(0x1 << i, LOONGARCH_CSR_FWPS); + update_bp_registers(regs, 0, 0); + } } - update_bp_registers(regs, 0, 0); } NOKPROBE_SYMBOL(breakpoint_handler); @@ -488,12 +499,15 @@ void watchpoint_handler(struct pt_regs *regs) slots = this_cpu_ptr(wp_on_reg); for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) { - wp = slots[i]; - if (wp == NULL) - continue; - perf_bp_event(wp, regs); + if ((csr_read32(LOONGARCH_CSR_MWPS) & (0x1 << i))) { + wp = slots[i]; + if (wp == NULL) + continue; + perf_bp_event(wp, regs); + csr_write32(0x1 << i, LOONGARCH_CSR_MWPS); + update_bp_registers(regs, 0, 1); + } } - update_bp_registers(regs, 0, 1); } NOKPROBE_SYMBOL(watchpoint_handler); diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 1633ed4f692f..9abe8b71aa48 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -4,11 +4,14 @@ #include <linux/interrupt.h> #include <linux/jump_label.h> #include <linux/kvm_para.h> +#include <linux/reboot.h> #include <linux/static_call.h> #include <asm/paravirt.h> +static int has_steal_clock; struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); static u64 native_steal_clock(int cpu) { @@ -17,6 +20,34 @@ static u64 native_steal_clock(int cpu) DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); +static bool steal_acc = true; + +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 paravt_steal_clock(int cpu) +{ + int version; + u64 steal; + struct kvm_steal_time *src; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + virt_rmb(); /* Make sure that the version is read before the steal */ + steal = src->steal; + virt_rmb(); /* Make sure that the steal is read before the next version */ + + } while ((version & 1) || (version != src->version)); + + return steal; +} + #ifdef CONFIG_SMP static void pv_send_ipi_single(int cpu, unsigned int action) { @@ -149,3 +180,117 @@ int __init pv_ipi_init(void) return 0; } + +static int pv_enable_steal_time(void) +{ + int cpu = smp_processor_id(); + unsigned long addr; + struct kvm_steal_time *st; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be in one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + + return 0; +} + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); +} + +#ifdef CONFIG_SMP +static int pv_time_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_enable_steal_time(); + local_irq_restore(flags); + + return 0; +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + + return 0; +} +#endif + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int r, feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + r = pv_enable_steal_time(); + if (r < 0) { + has_steal_clock = 0; + return 0; + } + register_reboot_notifier(&pv_reboot_nb); + +#ifdef CONFIG_SMP + r = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "loongarch/pv_time:online", + pv_time_cpu_online, pv_time_cpu_down_prepare); + if (r < 0) { + has_steal_clock = 0; + pr_err("Failed to install cpu hotplug callbacks\n"); + return r; + } +#endif + + static_call_update(pv_steal_clock, paravt_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif + + pr_info("Using paravirt steal-time\n"); + + return 0; +} diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index c114c5ef1332..200109de1971 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -494,28 +494,14 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, struct arch_hw_breakpoint_ctrl ctrl, struct perf_event_attr *attr) { - int err, len, type, offset; + int err, len, type; - err = arch_bp_generic_fields(ctrl, &len, &type, &offset); + err = arch_bp_generic_fields(ctrl, &len, &type); if (err) return err; - switch (note_type) { - case NT_LOONGARCH_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_LOONGARCH_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: - return -EINVAL; - } - attr->bp_len = len; attr->bp_type = type; - attr->bp_addr += offset; return 0; } @@ -609,10 +595,27 @@ static int ptrace_hbp_set_ctrl(unsigned int note_type, return PTR_ERR(bp); attr = bp->attr; - decode_ctrl_reg(uctrl, &ctrl); - err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); - if (err) - return err; + + switch (note_type) { + case NT_LOONGARCH_HW_BREAK: + ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE; + ctrl.len = LOONGARCH_BREAKPOINT_LEN_4; + break; + case NT_LOONGARCH_HW_WATCH: + decode_ctrl_reg(uctrl, &ctrl); + break; + default: + return -EINVAL; + } + + if (uctrl & CTRL_PLV_ENABLE) { + err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); + if (err) + return err; + attr.disabled = 0; + } else { + attr.disabled = 1; + } return modify_user_hw_breakpoint(bp, &attr); } @@ -643,6 +646,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type, struct perf_event *bp; struct perf_event_attr attr; + /* Kernel-space address cannot be monitored by user-space */ + if ((unsigned long)addr >= XKPRANGE) + return -EINVAL; + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); if (IS_ERR(bp)) return PTR_ERR(bp); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 60e0fe97f61a..3d048f1be143 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -282,7 +282,7 @@ static void __init fdt_setup(void) return; /* Prefer to use built-in dtb, checking its legality first. */ - if (!fdt_check_header(__dtb_start)) + if (IS_ENABLED(CONFIG_BUILTIN_DTB) && !fdt_check_header(__dtb_start)) fdt_pointer = __dtb_start; else fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */ @@ -351,10 +351,8 @@ void __init platform_init(void) arch_reserve_vmcore(); arch_reserve_crashkernel(); -#ifdef CONFIG_ACPI_TABLE_UPGRADE - acpi_table_upgrade(); -#endif #ifdef CONFIG_ACPI + acpi_table_upgrade(); acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 0dfe2388ef41..1436d2465939 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -273,7 +273,6 @@ static void __init fdt_smp_setup(void) if (cpuid == loongson_sysconf.boot_cpu_id) { cpu = 0; - numa_add_cpu(cpu); } else { cpu = cpumask_next_zero(-1, cpu_present_mask); } @@ -283,6 +282,9 @@ static void __init fdt_smp_setup(void) set_cpu_present(cpu, true); __cpu_number_map[cpuid] = cpu; __cpu_logical_map[cpu] = cpuid; + + early_numa_add_cpu(cpu, 0); + set_cpuid_to_node(cpuid, 0); } loongson_sysconf.nr_cpus = num_processors; @@ -468,6 +470,7 @@ void smp_prepare_boot_cpu(void) set_cpu_possible(0, true); set_cpu_online(0, true); set_my_cpu_offset(per_cpu_offset(0)); + numa_add_cpu(0); rr_node = first_node(node_online_map); for_each_possible_cpu(cpu) { diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index b4c5acd7aa3b..8801611143ab 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -22,7 +22,7 @@ #define __SYSCALL(nr, call) [nr] = (call), SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, - prot, unsigned long, flags, unsigned long, fd, off_t, offset) + prot, unsigned long, flags, unsigned long, fd, unsigned long, offset) { if (offset & ~PAGE_MASK) return -EINVAL; diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index fd5354f9be7c..46d7d40c87e3 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -15,6 +15,7 @@ #include <asm/cpu-features.h> #include <asm/loongarch.h> +#include <asm/paravirt.h> #include <asm/time.h> u64 cpu_clock_freq; @@ -214,4 +215,5 @@ void __init time_init(void) constant_clockevent_init(); constant_clocksource_init(); + pv_time_init(); } diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index e8e97dbf9ca4..3c7595342730 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -6,6 +6,7 @@ #define PAGE_SIZE _PAGE_SIZE #define RO_EXCEPTION_TABLE_ALIGN 4 +#define PHYSADDR_MASK 0xffffffffffff /* 48-bit */ /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will @@ -142,10 +143,11 @@ SECTIONS #ifdef CONFIG_EFI_STUB /* header symbols */ - _kernel_asize = _end - _text; - _kernel_fsize = _edata - _text; - _kernel_vsize = _end - __initdata_begin; - _kernel_rsize = _edata - __initdata_begin; + _kernel_entry = ABSOLUTE(kernel_entry & PHYSADDR_MASK); + _kernel_asize = ABSOLUTE(_end - _text); + _kernel_fsize = ABSOLUTE(_edata - _text); + _kernel_vsize = ABSOLUTE(_end - __initdata_begin); + _kernel_rsize = ABSOLUTE(_edata - __initdata_begin); #endif .gptab.sdata : { diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index c4ef2b4d9797..248744b4d086 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -29,6 +29,7 @@ config KVM select KVM_MMIO select HAVE_KVM_READONLY_MEM select KVM_XFER_TO_GUEST_WORK + select SCHED_INFO help Support hosting virtualized guest machines using hardware virtualization extensions. You will need diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c86e099af5ca..ea73f9dc2cc6 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -24,7 +24,7 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) { int rd, rj; - unsigned int index; + unsigned int index, ret; if (inst.reg2_format.opcode != cpucfg_op) return EMULATE_FAIL; @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; break; case CPUCFG_KVM_FEATURE: - vcpu->arch.gprs[rd] = KVM_FEATURE_IPI; + ret = KVM_FEATURE_IPI; + if (kvm_pvtime_supported()) + ret |= KVM_FEATURE_STEAL_TIME; + vcpu->arch.gprs[rd] = ret; break; default: vcpu->arch.gprs[rd] = 0; @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static long kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long id, data; + + id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); + data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); + switch (id) { + case KVM_FEATURE_STEAL_TIME: + if (!kvm_pvtime_supported()) + return KVM_HCALL_INVALID_CODE; + + if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return KVM_HCALL_INVALID_PARAMETER; + + vcpu->arch.st.guest_addr = data; + if (!(data & KVM_STEAL_PHYS_VALID)) + break; + + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + break; + default: + break; + }; + + return 0; +}; + /* * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. * @vcpu: Virtual CPU context. @@ -758,10 +789,13 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu) kvm_send_pv_ipi(vcpu); ret = KVM_HCALL_SUCCESS; break; + case KVM_HCALL_FUNC_NOTIFY: + ret = kvm_save_notify(vcpu); + break; default: ret = KVM_HCALL_INVALID_CODE; break; - }; + } kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 86a2f2d0cb27..844736b99d38 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -242,6 +242,7 @@ void kvm_check_vpid(struct kvm_vcpu *vcpu) kvm_update_vpid(vcpu, cpu); trace_kvm_vpid_change(vcpu, vcpu->arch.vpid); vcpu->cpu = cpu; + kvm_clear_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); } /* Restore GSTAT(0x50).vpid */ diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 98883aa23ab8..2634a9e8d82c 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -163,6 +163,7 @@ static kvm_pte_t *kvm_populate_gpa(struct kvm *kvm, child = kvm_mmu_memory_cache_alloc(cache); _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]); + smp_wmb(); /* Make pte visible before pmd */ kvm_set_pte(entry, __pa(child)); } else if (kvm_pte_huge(*entry)) { return entry; @@ -444,6 +445,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) { int needs_flush; + u32 old_flags = old ? old->flags : 0; + u32 new_flags = new ? new->flags : 0; + bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES; + + /* Only track memslot flags changed */ + if (change != KVM_MR_FLAGS_ONLY) + return; + + /* Discard dirty page tracking on readonly memslot */ + if ((old_flags & new_flags) & KVM_MEM_READONLY) + return; /* * If dirty page logging is enabled, write protect all pages in the slot @@ -454,9 +466,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * MOVE/DELETE: The old mappings will already have been cleaned up by * kvm_arch_flush_shadow_memslot() */ - if (change == KVM_MR_FLAGS_ONLY && - (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && - new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + if (!(old_flags & KVM_MEM_LOG_DIRTY_PAGES) && log_dirty_pages) { + /* + * Initially-all-set does not require write protecting any page + * because they're all assumed to be dirty. + */ + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + return; + spin_lock(&kvm->mmu_lock); /* Write protect GPA page table entries */ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, @@ -540,6 +557,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; struct kvm_memory_slot *slot; + struct page *page; spin_lock(&kvm->mmu_lock); @@ -551,10 +569,8 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ } /* Track access to pages marked old */ - new = *ptep; - if (!kvm_pte_young(new)) - new = kvm_pte_mkyoung(new); - /* call kvm_set_pfn_accessed() after unlock */ + new = kvm_pte_mkyoung(*ptep); + /* call kvm_set_pfn_accessed() after unlock */ if (write && !kvm_pte_dirty(new)) { if (!kvm_pte_write(new)) { @@ -582,19 +598,22 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ if (changed) { kvm_set_pte(ptep, new); pfn = kvm_pte_pfn(new); + page = kvm_pfn_to_refcounted_page(pfn); + if (page) + get_page(page); } spin_unlock(&kvm->mmu_lock); - /* - * Fixme: pfn may be freed after mmu_lock - * kvm_try_get_pfn(pfn)/kvm_release_pfn pair to prevent this? - */ - if (kvm_pte_young(changed)) - kvm_set_pfn_accessed(pfn); + if (changed) { + if (kvm_pte_young(changed)) + kvm_set_pfn_accessed(pfn); - if (kvm_pte_dirty(changed)) { - mark_page_dirty(kvm, gfn); - kvm_set_pfn_dirty(pfn); + if (kvm_pte_dirty(changed)) { + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + if (page) + put_page(page); } return ret; out: @@ -737,6 +756,7 @@ static kvm_pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, kvm_pte_t *ptep, gfn_t g val += PAGE_SIZE; } + smp_wmb(); /* Make pte visible before pmd */ /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */ kvm_set_pte(ptep, __pa(child)); @@ -858,11 +878,21 @@ retry: /* Disable dirty logging on HugePages */ level = 0; - if (!fault_supports_huge_mapping(memslot, hva, write)) { - level = 0; - } else { + if (fault_supports_huge_mapping(memslot, hva, write)) { + /* Check page level about host mmu*/ level = host_pfn_mapping_level(kvm, gfn, memslot); if (level == 1) { + /* + * Check page level about secondary mmu + * Disable hugepage if it is normal page on + * secondary mmu already + */ + ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); + if (ptep && !kvm_pte_huge(*ptep)) + level = 0; + } + + if (level == 1) { gfn = gfn & ~(PTRS_PER_PTE - 1); pfn = pfn & ~(PTRS_PER_PTE - 1); } @@ -892,7 +922,6 @@ retry: kvm_set_pfn_dirty(pfn); } - kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); out: srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -908,7 +937,8 @@ int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) return ret; /* Invalidate this entry in the TLB */ - kvm_flush_tlb_gpa(vcpu, gpa); + vcpu->arch.flush_gpa = gpa; + kvm_make_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); return 0; } diff --git a/arch/loongarch/kvm/tlb.c b/arch/loongarch/kvm/tlb.c index 02535df6b51f..ebdbe9264e9c 100644 --- a/arch/loongarch/kvm/tlb.c +++ b/arch/loongarch/kvm/tlb.c @@ -23,10 +23,7 @@ void kvm_flush_tlb_all(void) void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa) { - unsigned long flags; - - local_irq_save(flags); + lockdep_assert_irqs_disabled(); gpa &= (PAGE_MASK << 1); invtlb(INVTLB_GID_ADDR, read_csr_gstat() & CSR_GSTAT_GID, gpa); - local_irq_restore(flags); } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 9e8030d45129..bdfb1a910e27 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -31,6 +31,50 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + u32 version; + u64 steal; + gpa_t gpa; + struct kvm_memslots *slots; + struct kvm_steal_time __user *st; + struct gfn_to_hva_cache *ghc; + + ghc = &vcpu->arch.st.cache; + gpa = vcpu->arch.st.guest_addr; + if (!(gpa & KVM_STEAL_PHYS_VALID)) + return; + + gpa &= KVM_STEAL_PHYS_MASK; + slots = kvm_memslots(vcpu->kvm); + if (slots->generation != ghc->generation || gpa != ghc->gpa) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) { + ghc->gpa = INVALID_GPA; + return; + } + } + + st = (struct kvm_steal_time __user *)ghc->hva; + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; /* first time write, random junk */ + + version += 1; + unsafe_put_user(version, &st->version, out); + smp_wmb(); + + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); + + smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); +out: + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); +} + /* * kvm_check_requests - check and handle pending vCPU requests * @@ -48,9 +92,22 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return RESUME_HOST; + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) + kvm_update_stolen_time(vcpu); + return RESUME_GUEST; } +static void kvm_late_check_requests(struct kvm_vcpu *vcpu) +{ + lockdep_assert_irqs_disabled(); + if (kvm_check_request(KVM_REQ_TLB_FLUSH_GPA, vcpu)) + if (vcpu->arch.flush_gpa != INVALID_GPA) { + kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa); + vcpu->arch.flush_gpa = INVALID_GPA; + } +} + /* * Check and handle pending signal and vCPU requests etc * Run with irq enabled and preempt enabled @@ -101,6 +158,13 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) /* Make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); kvm_check_vpid(vcpu); + + /* + * Called after function kvm_check_vpid() + * Since it updates CSR.GSTAT used by kvm_flush_tlb_gpa(), + * and it may also clear KVM_REQ_TLB_FLUSH_GPA pending bit + */ + kvm_late_check_requests(vcpu); vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY); /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; @@ -354,6 +418,17 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) return -EINVAL; if (id == LOONGARCH_CSR_ESTAT) { + preempt_disable(); + vcpu_load(vcpu); + /* + * Sync pending interrupts into ESTAT so that interrupt + * remains during VM migration stage + */ + kvm_deliver_intr(vcpu); + vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; + vcpu_put(vcpu); + preempt_enable(); + /* ESTAT IP0~IP7 get from GINTC */ gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); @@ -662,6 +737,16 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + return 0; +} + static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -671,6 +756,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_has_attr(vcpu, attr); + break; default: break; } @@ -678,7 +766,7 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, return ret; } -static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, +static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int ret = 0; @@ -694,6 +782,23 @@ static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, return ret; } +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 gpa; + u64 __user *user = (u64 __user *)attr->addr; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + gpa = vcpu->arch.st.guest_addr; + if (put_user(gpa, user)) + return -EFAULT; + + return 0; +} + static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -701,7 +806,10 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_LOONGARCH_VCPU_CPUCFG: - ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); + ret = kvm_loongarch_cpucfg_get_attr(vcpu, attr); + break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); break; default: break; @@ -716,6 +824,43 @@ static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int idx, ret = 0; + u64 gpa, __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + if (get_user(gpa, user)) + return -EFAULT; + + if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return -EINVAL; + + if (!(gpa & KVM_STEAL_PHYS_VALID)) { + vcpu->arch.st.guest_addr = gpa; + return 0; + } + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) { + vcpu->arch.st.guest_addr = gpa; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + } + + return ret; +} + static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -725,6 +870,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, case KVM_LOONGARCH_VCPU_CPUCFG: ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); break; + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: + ret = kvm_loongarch_pvtime_set_attr(vcpu, attr); + break; default: break; } @@ -994,6 +1142,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct loongarch_csrs *csr; vcpu->arch.vpid = 0; + vcpu->arch.flush_gpa = INVALID_GPA; hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); vcpu->arch.swtimer.function = kvm_swtimer_wakeup; @@ -1084,6 +1233,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) |