From 4473b570a7ebb502f63f292ccfba7df622e5fdd3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 18 Aug 2014 17:50:28 +0800 Subject: KVM: x86: drop fpu_activate hook The only user of the fpu_activate hook was dropped in commit 2d04a05bd7e9 (KVM: x86 emulator: emulate CLTS internally, 2011-04-20). vmx_fpu_activate and svm_fpu_activate are still called on #NM (and for Intel CLTS), but never from common code; hence, there's no need for a hook. Reviewed-by: Yang Zhang Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7c492ed9087b..4bda61b582e6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -710,7 +710,6 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From ae97a3b818324b92b5b9cc885c63c3f4bd46ee9d Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 21 Aug 2014 18:08:06 +0200 Subject: KVM: x86: introduce sched_in to kvm_x86_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sched_in preempt notifier is available for x86, allow its use in specific virtualization technlogies as well. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 1 + 4 files changed, 15 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4bda61b582e6..ac0f90e26a0b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -771,6 +771,8 @@ struct kvm_x86_ops { bool (*mpx_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + + void (*sched_in)(struct kvm_vcpu *kvm, int cpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1f49c867e72e..1703aab84a6d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4305,6 +4305,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) local_irq_enable(); } +static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4405,6 +4409,8 @@ static struct kvm_x86_ops svm_x86_ops = { .check_intercept = svm_check_intercept, .handle_external_intr = svm_handle_external_intr, + + .sched_in = svm_sched_in, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 286c2835e25c..7c26533e149c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8848,6 +8848,10 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, return X86EMUL_CONTINUE; } +void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -8952,6 +8956,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .mpx_supported = vmx_mpx_supported, .check_nested_events = vmx_check_nested_events, + + .sched_in = vmx_sched_in, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d43dc7bb906..575d3fc67e7e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7173,6 +7173,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) { + kvm_x86_ops->sched_in(vcpu, cpu); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) -- cgit v1.2.3 From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 7 ++----- arch/arm64/include/asm/kvm_host.h | 6 ++---- arch/ia64/include/asm/kvm_host.h | 3 --- arch/mips/include/asm/kvm_host.h | 5 ----- arch/powerpc/include/asm/kvm_host.h | 5 ----- arch/s390/include/asm/kvm_host.h | 5 +++-- arch/x86/include/asm/kvm_host.h | 4 ---- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 9 files changed, 21 insertions(+), 34 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..4843397b812c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #include -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -149,20 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..766147baae0b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -22,6 +22,8 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include +#include #include #include #include @@ -41,7 +43,6 @@ #define KVM_VCPU_MAX_FEATURES 3 -struct kvm_vcpu; int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); @@ -164,18 +165,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index db95f570705f..fccc09d04649 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -234,9 +234,6 @@ struct kvm_vm_data { #define KVM_REQ_PTC_G 32 #define KVM_REQ_RESUME 33 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 7a3fc67bd7f9..b93bc80ed7e7 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -96,11 +96,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98d9dd50d063..0e597283c5c6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,7 +53,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 773bef7614d8..d71291d3fb6f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include #include #include +#include #include #include #include @@ -431,8 +434,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) } #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm_async_pf; struct kvm_arch_async_pf { unsigned long pfault_token; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ac0f90e26a0b..567fface45f8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..e1cb915a1096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* -- cgit v1.2.3 From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/mips.c | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 16 files changed, 27 insertions(+), 27 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index aea259e9431f..032a8538318a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 132bb0d9c5ad..005a7b5fd0aa 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b5045e3e05f8..be9970a59497 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5e14dcaf844e..ec6b9acb6bea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 0b24d6622ec1..f2c249796ea8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 0ec7490d70bd..e3b21e51ff7e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 237cc0cc80a2..604000882352 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -682,7 +682,7 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72c3fc085207..da505237a664 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, } EXPORT_SYMBOL_GPL(kvmppc_ld); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f6dd90684b97..a76a124dff48 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b8fe1ae777db..628e992eeded 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr) } /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 567fface45f8..73e4149eda33 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -661,8 +661,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7cd230e55118..8ef704037370 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -622,7 +622,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d70550d0bcff..671ca5edc709 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c10408ef9ab1..a375dfc42f6a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e1cb915a1096..e098dce179df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1d03967def40..7176929a4cda 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); - r = kvm_arch_hardware_enable(NULL); + r = kvm_arch_hardware_enable(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk) if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_disable(NULL); + kvm_arch_hardware_disable(); } static void hardware_disable(void) -- cgit v1.2.3 From 56f17dd3fbc44adcdbc3340fe3988ddb833a47a7 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 18 Aug 2014 15:46:07 -0700 Subject: kvm: x86: fix stale mmio cache bug The following events can lead to an incorrect KVM_EXIT_MMIO bubbling up to userspace: (1) Guest accesses gpa X without a memory slot. The gfn is cached in struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets the SPTE write-execute-noread so that future accesses cause EPT_MISCONFIGs. (2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION covering the page just accessed. (3) Guest attempts to read or write to gpa X again. On Intel, this generates an EPT_MISCONFIG. The memory slot generation number that was incremented in (2) would normally take care of this but we fast path mmio faults through quickly_check_mmio_pf(), which only checks the per-vcpu mmio cache. Since we hit the cache, KVM passes a KVM_EXIT_MMIO up to userspace. This patch fixes the issue by using the memslot generation number to validate the mmio cache. Cc: stable@vger.kernel.org Signed-off-by: David Matlack [xiaoguangrong: adjust the code to make it simpler for stable-tree fix.] Signed-off-by: Xiao Guangrong Reviewed-by: David Matlack Reviewed-by: Xiao Guangrong Tested-by: David Matlack Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/x86.h | 20 +++++++++++++++----- 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 73e4149eda33..08cc299ec6f4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -477,6 +477,7 @@ struct kvm_vcpu_arch { u64 mmio_gva; unsigned access; gfn_t mmio_gfn; + u64 mmio_gen; struct kvm_pmu pmu; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 96515957ba82..1cd2a5fbde07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3162,7 +3162,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - vcpu_clear_mmio_info(vcpu, ~0ul); + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 306a1b77581f..985fb2c006fa 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; + vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; +} + +static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation; } /* - * Clear the mmio cache info for the given gva, - * specially, if gva is ~0ul, we clear all mmio cache info. + * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we + * clear all mmio cache info. */ +#define MMIO_GVA_ANY (~(gva_t)0) + static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) { - if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) + if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) return; vcpu->arch.mmio_gva = 0; @@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) { - if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva && + vcpu->arch.mmio_gva == (gva & PAGE_MASK)) return true; return false; @@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn && + vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) return true; return false; -- cgit v1.2.3 From ef54bcfeea6c8b04e2a4f9396e16d88558aa2eee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Sep 2014 19:46:15 +0200 Subject: KVM: x86: skip writeback on injection of nested exception If a nested page fault happens during emulation, we will inject a vmexit, not a page fault. However because writeback happens after the injection, we will write ctxt->eip from L2 into the L1 EIP. We do not write back if an instruction caused an interception vmexit---do the same for page faults. Suggested-by: Gleb Natapov Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 15 ++++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 08cc299ec6f4..c9896518e54d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -893,7 +893,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); -void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); static inline int __kvm_irq_line_state(unsigned long *irq_state, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e4ed85e07a01..354194671902 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); -void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) +static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { if (mmu_is_nested(vcpu) && !fault->nested_page_fault) vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); else vcpu->arch.mmu.inject_page_fault(vcpu, fault); + + return fault->nested_page_fault; } void kvm_inject_nmi(struct kvm_vcpu *vcpu) @@ -4929,16 +4931,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) } } -static void inject_emulated_exception(struct kvm_vcpu *vcpu) +static bool inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; if (ctxt->exception.vector == PF_VECTOR) - kvm_propagate_fault(vcpu, &ctxt->exception); - else if (ctxt->exception.error_code_valid) + return kvm_propagate_fault(vcpu, &ctxt->exception); + + if (ctxt->exception.error_code_valid) kvm_queue_exception_e(vcpu, ctxt->exception.vector, ctxt->exception.error_code); else kvm_queue_exception(vcpu, ctxt->exception.vector); + return false; } static void init_emulate_ctxt(struct kvm_vcpu *vcpu) @@ -5300,8 +5304,9 @@ restart: } if (ctxt->have_exception) { - inject_emulated_exception(vcpu); r = EMULATE_DONE; + if (inject_emulated_exception(vcpu)) + return r; } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) { /* FIXME: return into emulator if single-stepping. */ -- cgit v1.2.3 From 54987b7afa902e886b3a751c056c2a4d4701020e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Sep 2014 13:23:06 +0200 Subject: KVM: x86: propagate exception from permission checks on the nested page fault Currently, if a permission error happens during the translation of the final GPA to HPA, walk_addr_generic returns 0 but does not fill in walker->fault. To avoid this, add an x86_exception* argument to the translate_gpa function, and let it fill in walker->fault. The nested_page_fault field will be true, since the walk_mmu is the nested_mmu and translate_gpu instead operates on the "outer" (NPT) instance. Reported-by: Valentine Sinitsyn Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 9 ++++++--- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/paging_tmpl.h | 7 ++++--- arch/x86/kvm/x86.c | 9 +++++---- 4 files changed, 16 insertions(+), 11 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c9896518e54d..2dbde3b7446c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -262,7 +262,8 @@ struct kvm_mmu { struct x86_exception *fault); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, struct x86_exception *exception); - gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); @@ -923,7 +924,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, @@ -943,7 +945,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); void kvm_enable_tdp(void); void kvm_disable_tdp(void); -static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception) { return gpa; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5b93a597e0c8..76398fe15df2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3200,7 +3200,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, { if (exception) exception->error_code = 0; - return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); + return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); } static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index df1a044d92de..0ab6c65a2821 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -321,7 +321,8 @@ retry_walk: walker->pte_gpa[walker->level - 1] = pte_gpa; real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), - PFERR_USER_MASK|PFERR_WRITE_MASK); + PFERR_USER_MASK|PFERR_WRITE_MASK, + &walker->fault); /* * FIXME: This can happen if emulation (for of an INS/OUTS @@ -334,7 +335,7 @@ retry_walk: * fields. */ if (unlikely(real_gfn == UNMAPPED_GVA)) - goto error; + return 0; real_gfn = gpa_to_gfn(real_gfn); @@ -376,7 +377,7 @@ retry_walk: if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault); if (real_gpa == UNMAPPED_GVA) return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 354194671902..7b25aa2725f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -459,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t ngfn, void *data, int offset, int len, u32 access) { + struct x86_exception exception; gfn_t real_gfn; gpa_t ngpa; ngpa = gfn_to_gpa(ngfn); - real_gfn = mmu->translate_gpa(vcpu, ngpa, access); + real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception); if (real_gfn == UNMAPPED_GVA) return -EFAULT; @@ -4065,16 +4066,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, kvm_x86_ops->get_segment(vcpu, var, seg); } -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception) { gpa_t t_gpa; - struct x86_exception exception; BUG_ON(!mmu_is_nested(vcpu)); /* NPT walks are always user-walks */ access |= PFERR_USER_MASK; - t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); + t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception); return t_gpa; } -- cgit v1.2.3 From a255d4795f83cf3e6a1c7d5ab998392d9413298c Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 16 Sep 2014 18:41:58 +0800 Subject: kvm: Remove ept_identity_pagetable from struct kvm_arch. kvm_arch->ept_identity_pagetable holds the ept identity pagetable page. But it is never used to refer to the page at all. In vcpu initialization, it indicates two things: 1. indicates if ept page is allocated 2. indicates if a memory slot for identity page is initialized Actually, kvm_arch->ept_identity_pagetable_done is enough to tell if the ept identity pagetable is initialized. So we can remove ept_identity_pagetable. NOTE: In the original code, ept identity pagetable page is pinned in memroy. As a result, it cannot be migrated/hot-removed. After this patch, since kvm_arch->ept_identity_pagetable is removed, ept identity pagetable page is no longer pinned in memory. And it can be migrated/hot-removed. Signed-off-by: Tang Chen Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/vmx.c | 47 +++++++++++++++++++---------------------- arch/x86/kvm/x86.c | 2 -- 3 files changed, 22 insertions(+), 28 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2dbde3b7446c..028df8dc538e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -578,7 +578,6 @@ struct kvm_arch { gpa_t wall_clock; - struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5db47db6574b..5de0d7b1d8a9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -767,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); +static int alloc_identity_pagetable(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -3962,21 +3963,27 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { - int i, idx, r, ret; + int i, idx, r, ret = 0; pfn_t identity_map_pfn; u32 tmp; if (!enable_ept) return 1; - if (unlikely(!kvm->arch.ept_identity_pagetable)) { - printk(KERN_ERR "EPT: identity-mapping pagetable " - "haven't been allocated!\n"); - return 0; + + /* Protect kvm->arch.ept_identity_pagetable_done. */ + mutex_lock(&kvm->slots_lock); + + if (likely(kvm->arch.ept_identity_pagetable_done)) { + ret = 1; + goto out2; } - if (likely(kvm->arch.ept_identity_pagetable_done)) - return 1; - ret = 0; + identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; + + r = alloc_identity_pagetable(kvm); + if (r) + goto out2; + idx = srcu_read_lock(&kvm->srcu); r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); if (r < 0) @@ -3994,6 +4001,9 @@ static int init_rmode_identity_map(struct kvm *kvm) ret = 1; out: srcu_read_unlock(&kvm->srcu, idx); + +out2: + mutex_unlock(&kvm->slots_lock); return ret; } @@ -4043,31 +4053,20 @@ out: static int alloc_identity_pagetable(struct kvm *kvm) { - struct page *page; + /* Called with kvm->slots_lock held. */ + struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; - mutex_lock(&kvm->slots_lock); - if (kvm->arch.ept_identity_pagetable) - goto out; + BUG_ON(kvm->arch.ept_identity_pagetable_done); + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); - if (r) - goto out; - - page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); - if (is_error_page(page)) { - r = -EFAULT; - goto out; - } - kvm->arch.ept_identity_pagetable = page; -out: - mutex_unlock(&kvm->slots_lock); return r; } @@ -7758,8 +7757,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; err = -ENOMEM; - if (alloc_identity_pagetable(kvm) != 0) - goto free_vmcs; if (!init_rmode_identity_map(kvm)) goto free_vmcs; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 68660b31457e..2d7f65daa8d0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7273,8 +7273,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); if (kvm->arch.apic_access_page) put_page(kvm->arch.apic_access_page); - if (kvm->arch.ept_identity_pagetable) - put_page(kvm->arch.ept_identity_pagetable); kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -- cgit v1.2.3 From 77c3913b74212a86027d311f5e81625736816620 Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Thu, 18 Sep 2014 12:38:37 -0400 Subject: KVM: x86: directly use kvm_make_request again A one-line wrapper around kvm_make_request is not particularly useful. Replace kvm_mmu_flush_tlb() with kvm_make_request(). Signed-off-by: Liang Chen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/mmu.c | 16 +++++----------- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 2 +- 4 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 028df8dc538e..eeeb573fcf6f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -914,7 +914,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); int fx_init(struct kvm_vcpu *vcpu); -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 90f0aceea4cc..045d592af1dd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1743,7 +1743,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return 1; } - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -1796,7 +1796,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); if (flush) - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } struct mmu_page_path { @@ -2530,7 +2530,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, true, host_writable)) { if (write_fault) *emulate = 1; - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (unlikely(is_mmio_spte(*sptep) && emulate)) @@ -3444,12 +3444,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->nx = false; } -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) -{ - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); -} -EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); - void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); @@ -3964,7 +3958,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, if (remote_flush) kvm_flush_remote_tlbs(vcpu->kvm); else if (local_flush) - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, @@ -4225,7 +4219,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { vcpu->arch.mmu.invlpg(vcpu, gva); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2ab504791190..ec389a6dc82a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6711,7 +6711,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: kvm_mmu_sync_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65b97d5567fe..47678d4e229a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -729,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } -- cgit v1.2.3 From c1118b3602c2329671ad5ec8bdf8e374323d6343 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 22 Sep 2014 13:17:48 +0200 Subject: x86: kvm: use alternatives for VMCALL vs. VMMCALL if kernel text is read-only On x86_64, kernel text mappings are mapped read-only with CONFIG_DEBUG_RODATA. In that case, KVM will fail to patch VMCALL instructions to VMMCALL as required on AMD processors. The failure mode is currently a divide-by-zero exception, which obviously is a KVM bug that has to be fixed. However, picking the right instruction between VMCALL and VMMCALL will be faster and will help if you cannot upgrade the hypervisor. Reported-by: Chris Webb Tested-by: Chris Webb Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Acked-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/kvm_para.h | 10 ++++++++-- arch/x86/kernel/cpu/amd.c | 7 +++++++ 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb9b258d60e7..2075e6c34c78 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -202,6 +202,7 @@ #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e43465b..e62cf897f781 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include +#include #include extern void kvmclock_init(void); @@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60e5497681f5..813d29d00a17 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) } #endif + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); -- cgit v1.2.3 From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Mon, 22 Sep 2014 14:54:42 -0700 Subject: kvm: Fix page ageing bugs 1. We were calling clear_flush_young_notify in unmap_one, but we are within an mmu notifier invalidate range scope. The spte exists no more (due to range_start) and the accessed bit info has already been propagated (due to kvm_pfn_set_accessed). Simply call clear_flush_young. 2. We clear_flush_young on a primary MMU PMD, but this may be mapped as a collection of PTEs by the secondary MMU (e.g. during log-dirty). This required expanding the interface of the clear_flush_young mmu notifier, so a lot of code has been trivially touched. 3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate the access bit by blowing the spte. This requires proper synchronizing with MMU notifier consumers, like every other removal of spte's does. Signed-off-by: Andres Lagar-Cavilla Acked-by: Rik van Riel Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 3 ++- arch/arm64/include/asm/kvm_host.h | 3 ++- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s.c | 4 ++-- arch/powerpc/kvm/book3s.h | 3 ++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++-- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/e500_mmu_host.c | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 38 ++++++++++++++++++++++--------------- drivers/iommu/amd_iommu_v2.c | 6 ++++-- include/linux/mmu_notifier.h | 24 ++++++++++++++++------- mm/mmu_notifier.c | 5 +++-- mm/rmap.c | 6 +++++- virt/kvm/kvm_main.c | 5 +++-- 16 files changed, 71 insertions(+), 41 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 032a8538318a..8c3f7eb62b54 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index be9970a59497..a3c671b3acc9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm, void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 604000882352..d329bc5543a2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -56,7 +56,7 @@ extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb86a2299d8a..d4a92d7cea6a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -243,7 +243,7 @@ struct kvmppc_ops { int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); - int (*age_hva)(struct kvm *kvm, unsigned long hva); + int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); void (*mmu_destroy)(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index dd03f6b299ba..c16cfbfeb781 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm->arch.kvm_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, start, end); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4bf956cf94d6..d2b3ec088b8c 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, + unsigned long end); extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 72c20bb16d26..81460c5359c0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, return ret; } -int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { if (!kvm->arch.using_mmu_notifiers) return 0; - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index faffb27badd9..852fcd8951c4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, return 0; } -static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, + unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 08f14bb57897..b1f3f630315e 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index eeeb573fcf6f..763d273cab1d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 47d534066325..3201e93ebd07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, struct rmap_iterator uninitialized_var(iter); int young = 0; - /* - * In case of absence of EPT Access and Dirty Bits supports, - * emulate the accessed bit for EPT, by checking if this page has - * an EPT mapping, and clearing it if it does. On the next access, - * a new EPT mapping will be established. - * This has some overhead, but not as much as the cost of swapping - * out actively used pages or breaking up actively used hugepages. - */ - if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data); - goto out; - } + BUG_ON(!shadow_accessed_mask); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, (unsigned long *)sptep); } } -out: trace_kvm_age_page(gfn, level, slot, young); return young; } @@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) kvm_flush_remote_tlbs(vcpu->kvm); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); + /* + * In case of absence of EPT Access and Dirty Bits supports, + * emulate the accessed bit for EPT, by checking if this page has + * an EPT mapping, and clearing it if it does. On the next access, + * a new EPT mapping will be established. + * This has some overhead, but not as much as the cost of swapping + * out actively used pages or breaking up actively used hugepages. + */ + if (!shadow_accessed_mask) { + /* + * We are holding the kvm->mmu_lock, and we are blowing up + * shadow PTEs. MMU notifier consumers need to be kept at bay. + * This is correct as long as we don't decouple the mmu_lock + * protected regions (like invalidate_range_start|end does). + */ + kvm->mmu_notifier_seq++; + return kvm_handle_hva_range(kvm, start, end, 0, + kvm_unmap_rmapp); + } + + return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5f578e850fc5..90d734bbf467 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn, static int mn_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { - __mn_flush_page(mn, address); + for (; start < end; start += PAGE_SIZE) + __mn_flush_page(mn, start); return 0; } diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241e..88787bb4b3b9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -57,10 +57,13 @@ struct mmu_notifier_ops { * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. + * Start-end is necessary in case the secondary MMU is mapping the page + * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { if (mm_has_notifiers(mm)) - return __mmu_notifier_clear_flush_young(mm, address); + return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } @@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PAGE_SIZE); \ __young; \ }) @@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PMD_SIZE); \ __young; \ }) @@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { return 0; } diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 950813b1eb36..2c8da9825fe3 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm) * existed or not. */ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct mmu_notifier *mn; int young = 0, id; @@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->clear_flush_young) - young |= mn->ops->clear_flush_young(mn, mm, address); + young |= mn->ops->clear_flush_young(mn, mm, start, end); } srcu_read_unlock(&srcu, id); diff --git a/mm/rmap.c b/mm/rmap.c index 3e8491c504f8..bc74e0012809 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, continue; /* don't unmap */ } - if (ptep_clear_flush_young_notify(vma, address, pte)) + /* + * No need for _notify because we're within an + * mmu_notifier_invalidate_range_ {start|end} scope. + */ + if (ptep_clear_flush_young(vma, address, pte)) continue; /* Nuke the page table entry. */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ff42b11d2b9c..0316314d48f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -369,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -377,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - young = kvm_age_hva(kvm, address); + young = kvm_age_hva(kvm, start, end); if (young) kvm_flush_remote_tlbs(kvm); -- cgit v1.2.3 From fe71557afbec641fee73711e40602bed37f6f33b Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:57 +0800 Subject: kvm: Add arch specific mmu notifier for page invalidation This will be used to let the guest run while the APIC access page is not pinned. Because subsequent patches will fill in the function for x86, place the (still empty) x86 implementation in the x86.c file instead of adding an inline function in kvm_host.h. Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm64/include/asm/kvm_host.h | 5 +++++ arch/powerpc/include/asm/kvm_host.h | 5 +++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 5 +++++ virt/kvm/kvm_main.c | 3 +++ 6 files changed, 25 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8c3f7eb62b54..155497c2b4da 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -181,6 +181,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a3c671b3acc9..992d9da88119 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -191,6 +191,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d329bc5543a2..2cf6c1587d43 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -60,6 +60,11 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 763d273cab1d..022c356e0fed 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1044,6 +1044,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6152aa0da7dd..142569e6f8f9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6026,6 +6026,11 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + /* * Returns 1 to let __vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b8ca365932a..3f16f569169e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -296,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + + kvm_arch_mmu_notifier_invalidate_page(kvm, address); + srcu_read_unlock(&kvm->srcu, idx); } -- cgit v1.2.3 From 4256f43f9fab91e1c17b5846a240cf4b66a768a8 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:54 +0800 Subject: kvm: x86: Add request bit to reload APIC access page address Currently, the APIC access page is pinned by KVM for the entire life of the guest. We want to make it migratable in order to make memory hot-unplug available for machines that run KVM. This patch prepares to handle this in generic code, through a new request bit (that will be set by the MMU notifier) and a new hook that is called whenever the request bit is processed. Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 14 ++++++++++++++ include/linux/kvm_host.h | 1 + 3 files changed, 17 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 022c356e0fed..60f9d73c6282 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -736,6 +736,7 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -1044,6 +1045,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 142569e6f8f9..c1412f5d93db 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6026,6 +6026,18 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +{ + if (!kvm_x86_ops->set_apic_access_page_addr) + return; + + vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm, + APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + kvm_x86_ops->set_apic_access_page_addr(vcpu, + page_to_phys(vcpu->kvm->arch.apic_access_page)); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); + void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { @@ -6091,6 +6103,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_deliver_pmi(vcpu); if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) + kvm_vcpu_reload_apic_access_page(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 12c591fc2571..d594f9f34429 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From c24ae0dcd3e8695efa43e71704d1fc4bc7e29e9b Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:58 +0800 Subject: kvm: x86: Unpin and remove kvm_arch->apic_access_page In order to make the APIC access page migratable, stop pinning it in memory. And because the APIC access page is not pinned in memory, we can remove kvm_arch->apic_access_page. When we need to write its physical address into vmcs, we use gfn_to_page() to get its page struct, which is needed to call page_to_phys(); the page is then immediately unpinned. Suggested-by: Gleb Natapov Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx.c | 9 +++++++-- arch/x86/kvm/x86.c | 22 ++++++++++++++++------ 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 60f9d73c6282..7d603a71ab3a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -574,7 +574,7 @@ struct kvm_arch { struct kvm_apic_map *apic_map; unsigned int tss_addr; - struct page *apic_access_page; + bool apic_access_page_done; gpa_t wall_clock; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 881d266eb3ae..04fa1b8298c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4033,7 +4033,7 @@ static int alloc_apic_access_page(struct kvm *kvm) int r = 0; mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page) + if (kvm->arch.apic_access_page_done) goto out; kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; kvm_userspace_mem.flags = 0; @@ -4049,7 +4049,12 @@ static int alloc_apic_access_page(struct kvm *kvm) goto out; } - kvm->arch.apic_access_page = page; + /* + * Do not pin the page in memory, so that memory hot-unplug + * is able to migrate it. + */ + put_page(page); + kvm->arch.apic_access_page_done = true; out: mutex_unlock(&kvm->slots_lock); return r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1412f5d93db..6857257f3810 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6028,19 +6028,31 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { + struct page *page = NULL; + if (!kvm_x86_ops->set_apic_access_page_addr) return; - vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm, - APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - kvm_x86_ops->set_apic_access_page_addr(vcpu, - page_to_phys(vcpu->kvm->arch.apic_access_page)); + page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); + + /* + * Do not pin apic access page in memory, the MMU notifier + * will call us again if it is migrated or swapped out. + */ + put_page(page); } EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } /* @@ -7297,8 +7309,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); kvm_free_vcpus(kvm); - if (kvm->arch.apic_access_page) - put_page(kvm->arch.apic_access_page); kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -- cgit v1.2.3