diff options
author | Alexander Graf <agraf@suse.de> | 2014-07-11 02:58:58 +0200 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2014-07-28 15:23:10 +0200 |
commit | c01e3f66cd5cdc1f727f4c7b0c10b3e3bdb91ba7 (patch) | |
tree | 01497e850838a4ac13a43b127b3b81413f1e9629 /arch/powerpc/kvm | |
parent | 2e27ecc961044a2c5c05a4283888352961886a87 (diff) | |
download | lwn-c01e3f66cd5cdc1f727f4c7b0c10b3e3bdb91ba7.tar.gz lwn-c01e3f66cd5cdc1f727f4c7b0c10b3e3bdb91ba7.zip |
KVM: PPC: Book3S: Add hack for split real mode
Today we handle split real mode by mapping both instruction and data faults
into a special virtual address space that only exists during the split mode
phase.
This is good enough to catch 32bit Linux guests that use split real mode for
copy_from/to_user. In this case we're always prefixed with 0xc0000000 for our
instruction pointer and can map the user space process freely below there.
However, that approach fails when we're running KVM inside of KVM. Here the 1st
level last_inst reader may well be in the same virtual page as a 2nd level
interrupt handler.
It also fails when running Mac OS X guests. Here we have a 4G/4G split, so a
kernel copy_from/to_user implementation can easily overlap with user space
addresses.
The architecturally correct way to fix this would be to implement an instruction
interpreter in KVM that kicks in whenever we go into split real mode. This
interpreter however would not receive a great amount of testing and be a lot of
bloat for a reasonably isolated corner case.
So I went back to the drawing board and tried to come up with a way to make
split real mode work with a single flat address space. And then I realized that
we could get away with the same trick that makes it work for Linux:
Whenever we see an instruction address during split real mode that may collide,
we just move it higher up the virtual address space to a place that hopefully
does not collide (keep your fingers crossed!).
That approach does work surprisingly well. I am able to successfully run
Mac OS X guests with KVM and QEMU (no split real mode hacks like MOL) when I
apply a tiny timing probe hack to QEMU. I'd say this is a win over even more
broken split real mode :).
Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 19 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 48 |
2 files changed, 67 insertions, 0 deletions
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9624c5644103..1d137642f1a2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} +EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { if (!is_kvmppc_hv_enabled(vcpu->kvm)) @@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { + kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); @@ -384,6 +396,13 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, pte->may_write = true; pte->may_execute = true; r = 0; + + if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR && + !data) { + if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte->raddr &= ~SPLIT_HACK_MASK; + } } return r; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 15fd6c25179c..6125f60a68f8 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define HW_PAGE_SIZE PAGE_SIZE #endif +static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + return (msr & (MSR_IR|MSR_DR)) == MSR_DR; +} + +static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + ulong pc = kvmppc_get_pc(vcpu); + + /* We are in DR only split real mode */ + if ((msr & (MSR_IR|MSR_DR)) != MSR_DR) + return; + + /* We have not fixed up the guest already */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) + return; + + /* The code is in fixupable address space */ + if (pc & SPLIT_HACK_MASK) + return; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK; + kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); +} + +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -81,6 +110,9 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; #endif + + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -95,6 +127,9 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) svcpu_put(svcpu); #endif + if (kvmppc_is_split_real(vcpu)) + kvmppc_unfixup_split_real(vcpu); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); @@ -322,6 +357,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) } } + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + else + kvmppc_unfixup_split_real(vcpu); + if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); @@ -522,6 +562,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; case MSR_DR: + if (!data && + (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte.raddr &= ~SPLIT_HACK_MASK; + /* fall through */ case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -886,6 +931,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong shadow_srr1 = vcpu->arch.shadow_srr1; vcpu->stat.pf_instruc++; + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ |