summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-06-10 10:53:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-06-10 10:53:14 -0700
commit7908a9e5fc3f9a679b1777ed231a03636c068446 (patch)
tree8d11c58a2b2550095f0945547e520062466b69f0
parent8fade6aff706b2ae3f02864b4023d34b002cd226 (diff)
parent07dc7263b99e4ddad2b4c69765a428ccb7d48938 (diff)
downloadlwn-7908a9e5fc3f9a679b1777ed231a03636c068446.tar.gz
lwn-7908a9e5fc3f9a679b1777ed231a03636c068446.zip
Merge branch 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: read apic->irr with ioapic lock held KVM: ia64: Add missing spin_unlock in kvm_arch_hardware_enable() KVM: Fix order passed to iommu_unmap KVM: MMU: Remove user access when allowing kernel access to gpte.w=0 page KVM: MMU: invalidate and flush on spte small->large page size change KVM: SVM: Implement workaround for Erratum 383 KVM: SVM: Handle MCEs early in the vmexit process KVM: powerpc: fix init/exit annotation
-rw-r--r--arch/ia64/kvm/kvm-ia64.c1
-rw-r--r--arch/powerpc/kvm/e500.c2
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/kvm/mmu.c5
-rw-r--r--arch/x86/kvm/svm.c96
-rw-r--r--virt/kvm/ioapic.c3
-rw-r--r--virt/kvm/iommu.c2
7 files changed, 106 insertions, 4 deletions
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d5f4e9161201..21b701374f72 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -144,6 +144,7 @@ int kvm_arch_hardware_enable(void *garbage)
VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
if (status != 0) {
+ spin_unlock(&vp_lock);
printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
return -EINVAL;
}
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index bc2b4004eb26..e8a00b0c4449 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -164,7 +164,7 @@ static int __init kvmppc_e500_init(void)
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
}
-static void __init kvmppc_e500_exit(void)
+static void __exit kvmppc_e500_exit(void)
{
kvmppc_booke_exit();
}
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b49d8ca228f6..8c7ae4318629 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -110,6 +110,7 @@
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 81563e76e28f..a6f695d76928 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= PT_WRITABLE_MASK;
+ if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK))
+ spte &= ~PT_USER_MASK;
+
/*
* Optimization: for pte sync, if spte was writable the hash
* lookup is unnecessary (and expensive). Write protection
@@ -1870,6 +1873,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
child = page_header(pte & PT64_BASE_ADDR_MASK);
mmu_page_remove_parent_pte(child, sptep);
+ __set_spte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
} else if (pfn != spte_to_pfn(*sptep)) {
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*sptep), pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 96dc232bfc56..ce438e0fdd26 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
+#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/virtext.h>
@@ -56,6 +57,8 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+static bool erratum_383_found __read_mostly;
+
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -374,6 +377,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
svm->vmcb->control.event_inj_err = error_code;
}
+static void svm_init_erratum_383(void)
+{
+ u32 low, high;
+ int err;
+ u64 val;
+
+ /* Only Fam10h is affected */
+ if (boot_cpu_data.x86 != 0x10)
+ return;
+
+ /* Use _safe variants to not break nested virtualization */
+ val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
+ if (err)
+ return;
+
+ val |= (1ULL << 47);
+
+ low = lower_32_bits(val);
+ high = upper_32_bits(val);
+
+ native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
+
+ erratum_383_found = true;
+}
+
static int has_svm(void)
{
const char *msg;
@@ -429,6 +457,8 @@ static int svm_hardware_enable(void *garbage)
wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+ svm_init_erratum_383();
+
return 0;
}
@@ -1410,8 +1440,59 @@ static int nm_interception(struct vcpu_svm *svm)
return 1;
}
-static int mc_interception(struct vcpu_svm *svm)
+static bool is_erratum_383(void)
{
+ int err, i;
+ u64 value;
+
+ if (!erratum_383_found)
+ return false;
+
+ value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
+ if (err)
+ return false;
+
+ /* Bit 62 may or may not be set for this mce */
+ value &= ~(1ULL << 62);
+
+ if (value != 0xb600000000010015ULL)
+ return false;
+
+ /* Clear MCi_STATUS registers */
+ for (i = 0; i < 6; ++i)
+ native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
+
+ value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
+ if (!err) {
+ u32 low, high;
+
+ value &= ~(1ULL << 2);
+ low = lower_32_bits(value);
+ high = upper_32_bits(value);
+
+ native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
+ }
+
+ /* Flush tlb to evict multi-match entries */
+ __flush_tlb_all();
+
+ return true;
+}
+
+static void svm_handle_mce(struct vcpu_svm *svm)
+{
+ if (is_erratum_383()) {
+ /*
+ * Erratum 383 triggered. Guest state is corrupt so kill the
+ * guest.
+ */
+ pr_err("KVM: Guest triggered AMD Erratum 383\n");
+
+ set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests);
+
+ return;
+ }
+
/*
* On an #MC intercept the MCE handler is not called automatically in
* the host. So do it by hand here.
@@ -1420,6 +1501,11 @@ static int mc_interception(struct vcpu_svm *svm)
"int $0x12\n");
/* not sure if we ever come back to this point */
+ return;
+}
+
+static int mc_interception(struct vcpu_svm *svm)
+{
return 1;
}
@@ -3088,6 +3174,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
}
+
+ /*
+ * We need to handle MC intercepts here before the vcpu has a chance to
+ * change the physical cpu
+ */
+ if (unlikely(svm->vmcb->control.exit_code ==
+ SVM_EXIT_EXCP_BASE + MC_VECTOR))
+ svm_handle_mce(svm);
}
#undef R
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 7c79c1d76d0c..3500dee9cf2b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -192,12 +192,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{
- u32 old_irr = ioapic->irr;
+ u32 old_irr;
u32 mask = 1 << irq;
union kvm_ioapic_redirect_entry entry;
int ret = 1;
spin_lock(&ioapic->lock);
+ old_irr = ioapic->irr;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index d2f06be63354..96048ee9e39e 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -271,7 +271,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
pfn = phys >> PAGE_SHIFT;
/* Unmap address from IO address space */
- order = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+ order = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
unmap_pages = 1ULL << order;
/* Unpin all pages we just unmapped to not leak any memory */