From cfac57847a67c4903f34a77e971521531bbc7c77 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:40 +0000 Subject: powerpc/booke: Provide exception macros with interrupt name DO_KVM will need to identify the particular exception type. There is an existing set of arbitrary numbers that Linux passes, but it's an undocumented mess that sort of corresponds to server/classic exception vectors but not really. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/head_44x.S | 23 +++++++++------- arch/powerpc/kernel/head_booke.h | 41 +++++++++++++++------------- arch/powerpc/kernel/head_fsl_booke.S | 52 ++++++++++++++++++++++-------------- 3 files changed, 68 insertions(+), 48 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 7dd2981bcc50..d1192c577ed3 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -248,10 +248,11 @@ _ENTRY(_start); interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ + machine_check_exception) MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception) /* Data Storage Interrupt */ @@ -261,7 +262,8 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \ + do_IRQ, EXC_XFER_LITE) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -273,29 +275,32 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ + FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ + AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ + unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) + CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) + CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 0e4175388f47..51fd0724e095 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -2,6 +2,8 @@ #define __HEAD_BOOKE_H__ #include /* for STACK_FRAME_REGS_MARKER */ +#include + /* * Macros used for common Book-e exception handling */ @@ -28,7 +30,7 @@ */ #define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) -#define NORMAL_EXCEPTION_PROLOG \ +#define NORMAL_EXCEPTION_PROLOG(intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ stw r11, THREAD_NORMSAVE(0)(r10); \ @@ -113,7 +115,7 @@ * registers as the normal prolog above. Instead we use a portion of the * critical/machine check exception stack at low physical addresses. */ -#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ @@ -162,12 +164,13 @@ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -#define CRITICAL_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define CRITICAL_EXCEPTION_PROLOG(intno) \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) #define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ + SPRN_MCSRR0, SPRN_MCSRR1) /* * Exception vectors. @@ -181,16 +184,16 @@ label: .long func; \ .long ret_from_except_full -#define EXCEPTION(n, label, hdlr, xfer) \ +#define EXCEPTION(n, intno, label, hdlr, xfer) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) -#define CRITICAL_EXCEPTION(n, label, hdlr) \ - START_EXCEPTION(label); \ - CRITICAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ +#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ + START_EXCEPTION(label); \ + CRITICAL_EXCEPTION_PROLOG(intno); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ NOCOPY, crit_transfer_to_handler, \ ret_from_crit_exc) @@ -302,7 +305,7 @@ label: #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ - CRITICAL_EXCEPTION_PROLOG; \ + CRITICAL_EXCEPTION_PROLOG(DEBUG); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -355,7 +358,7 @@ label: #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ @@ -363,7 +366,7 @@ label: #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ @@ -372,7 +375,7 @@ label: #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -380,7 +383,7 @@ label: #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -388,7 +391,7 @@ label: #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -396,7 +399,7 @@ label: #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \ beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 28e62598d0e8..7c406dd9fea6 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -301,19 +301,20 @@ _ENTRY(__early_start) interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ + machine_check_exception) #else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ @@ -328,7 +329,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -342,32 +343,36 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ + program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ + unknown_exception, EXC_XFER_EE) #endif #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(SYSCALL) EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ + unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ + unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) + CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) + CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -538,31 +543,38 @@ interrupt_base: #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) bne load_up_spe addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ + unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); + EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \ + SPEFloatingPointException, EXC_XFER_EE); /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ + SPEFloatingPointRoundException, EXC_XFER_EE) #else - EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \ + unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ + unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) + EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ + performance_monitor_exception, EXC_XFER_STD) - EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD) + EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD) - CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception) + CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \ + CriticalDoorbell, unknown_exception) /* Debug Interrupt */ DEBUG_DEBUG_EXCEPTION -- cgit v1.2.3 From d30f6e480055e5be12e7a03fd11ea912a451daa5 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:43 +0000 Subject: KVM: PPC: booke: category E.HV (GS-mode) support Chips such as e500mc that implement category E.HV in Power ISA 2.06 provide hardware virtualization features, including a new MSR mode for guest state. The guest OS can perform many operations without trapping into the hypervisor, including transitions to and from guest userspace. Since we can use SRR1[GS] to reliably tell whether an exception came from guest state, instead of messing around with IVPR, we use DO_KVM similarly to book3s. Current issues include: - Machine checks from guest state are not routed to the host handler. - The guest can cause a host oops by executing an emulated instruction in a page that lacks read permission. Existing e500/4xx support has the same problem. Includes work by Ashish Kalra , Varun Sethi , and Liu Yu . Signed-off-by: Scott Wood [agraf: remove pt_regs usage] Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/dbell.h | 1 + arch/powerpc/include/asm/kvm_asm.h | 8 + arch/powerpc/include/asm/kvm_booke_hv_asm.h | 49 +++ arch/powerpc/include/asm/kvm_host.h | 19 +- arch/powerpc/include/asm/kvm_ppc.h | 3 + arch/powerpc/include/asm/mmu-book3e.h | 6 + arch/powerpc/include/asm/processor.h | 3 + arch/powerpc/include/asm/reg.h | 2 + arch/powerpc/include/asm/reg_booke.h | 34 ++ arch/powerpc/kernel/asm-offsets.c | 15 +- arch/powerpc/kernel/head_booke.h | 28 +- arch/powerpc/kvm/Kconfig | 3 + arch/powerpc/kvm/booke.c | 309 ++++++++++++--- arch/powerpc/kvm/booke.h | 24 +- arch/powerpc/kvm/booke_emulate.c | 23 +- arch/powerpc/kvm/bookehv_interrupts.S | 587 ++++++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 5 + arch/powerpc/kvm/timing.h | 6 + 18 files changed, 1058 insertions(+), 67 deletions(-) create mode 100644 arch/powerpc/include/asm/kvm_booke_hv_asm.h create mode 100644 arch/powerpc/kvm/bookehv_interrupts.S (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index efa74ac44a35..d7365b01f0c4 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -19,6 +19,7 @@ #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) +#define PPC_DBELL_LPID(x) ((x) << (63 - 49)) enum ppc_dbell { PPC_DBELL = 0, /* doorbell */ PPC_DBELL_CRIT = 1, /* critical doorbell */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 7b1f0e0fc653..097815233284 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -48,6 +48,14 @@ #define BOOKE_INTERRUPT_SPE_FP_DATA 33 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 +#define BOOKE_INTERRUPT_DOORBELL 36 +#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 + +/* booke_hv */ +#define BOOKE_INTERRUPT_GUEST_DBELL 38 +#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39 +#define BOOKE_INTERRUPT_HV_SYSCALL 40 +#define BOOKE_INTERRUPT_HV_PRIV 41 /* book3s */ diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h new file mode 100644 index 000000000000..30a600fa1b6a --- /dev/null +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -0,0 +1,49 @@ +/* + * Copyright 2010-2011 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef ASM_KVM_BOOKE_HV_ASM_H +#define ASM_KVM_BOOKE_HV_ASM_H + +#ifdef __ASSEMBLY__ + +/* + * All exceptions from guest state must go through KVM + * (except for those which are delivered directly to the guest) -- + * there are no exceptions for which we fall through directly to + * the normal host handler. + * + * Expected inputs (normal exceptions): + * SCRATCH0 = saved r10 + * r10 = thread struct + * r11 = appropriate SRR1 variant (currently used as scratch) + * r13 = saved CR + * *(r10 + THREAD_NORMSAVE(0)) = saved r11 + * *(r10 + THREAD_NORMSAVE(2)) = saved r13 + * + * Expected inputs (crit/mcheck/debug exceptions): + * appropriate SCRATCH = saved r8 + * r8 = exception level stack frame + * r9 = *(r8 + _CCR) = saved CR + * r11 = appropriate SRR1 variant (currently used as scratch) + * *(r8 + GPR9) = saved r9 + * *(r8 + GPR10) = saved r10 (r10 not yet clobbered) + * *(r8 + GPR11) = saved r11 + */ +.macro DO_KVM intno srr1 +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, kvmppc_resume_\intno\()_\srr1 + b kvmppc_handler_\intno\()_\srr1 +kvmppc_resume_\intno\()_\srr1: +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif +.endm + +#endif /*__ASSEMBLY__ */ +#endif /* ASM_KVM_BOOKE_HV_ASM_H */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5b81cbc43a42..e645623728fc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -106,6 +106,8 @@ struct kvm_vcpu_stat { u32 dec_exits; u32 ext_intr_exits; u32 halt_wakeup; + u32 dbell_exits; + u32 gdbell_exits; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; @@ -140,6 +142,7 @@ enum kvm_exit_types { EMULATED_TLBSX_EXITS, EMULATED_TLBWE_EXITS, EMULATED_RFI_EXITS, + EMULATED_RFCI_EXITS, DEC_EXITS, EXT_INTR_EXITS, HALT_WAKEUP, @@ -147,6 +150,8 @@ enum kvm_exit_types { FP_UNAVAIL, DEBUG_EXITS, TIMEINGUEST, + DBELL_EXITS, + GDBELL_EXITS, __NUMBER_OF_KVM_EXIT_TYPES }; @@ -217,10 +222,10 @@ struct kvm_arch_memory_slot { }; struct kvm_arch { + unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; @@ -345,6 +350,17 @@ struct kvm_vcpu_arch { u64 vsr[64]; #endif +#ifdef CONFIG_KVM_BOOKE_HV + u32 host_mas4; + u32 host_mas6; + u32 shadow_epcr; + u32 epcr; + u32 shadow_msrp; + u32 eplc; + u32 epsc; + u32 oldpir; +#endif + #ifdef CONFIG_PPC_BOOK3S /* For Gekko paired singles */ u32 qpr[32]; @@ -428,6 +444,7 @@ struct kvm_vcpu_arch { ulong queued_esr; u32 tlbcfg[4]; u32 mmucfg; + u32 epr; #endif gpa_t paddr_accessed; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 731e920eda1e..e709975702a6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -139,6 +139,9 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); +extern int kvmppc_bookehv_init(void); +extern void kvmppc_bookehv_exit(void); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index cdb5421877e2..eeabcdbc30f7 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -104,6 +104,8 @@ #define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */ #define MAS4_TSIZED_SHIFT 7 +#define MAS5_SGS 0x80000000 + #define MAS6_SPID0 0x3FFF0000 #define MAS6_SPID1 0x00007FFE #define MAS6_ISIZE(x) MAS1_TSIZE(x) @@ -118,6 +120,10 @@ #define MAS7_RPN 0xFFFFFFFF +#define MAS8_TGS 0x80000000 /* Guest space */ +#define MAS8_VF 0x40000000 /* Virtualization Fault */ +#define MAS8_TLPID 0x000000ff + /* Bit definitions for MMUCFG */ #define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ #define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8e2d0371fe1e..2a25ab0f5896 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -243,6 +243,9 @@ struct thread_struct { #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *kvm_vcpu; +#endif #ifdef CONFIG_PPC64 unsigned long dscr; int dscr_inherit; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9d7f0fb69028..f0cb7f461b9d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -257,7 +257,9 @@ #define LPCR_LPES_SH 2 #define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ +#ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ +#endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hardware m? error recovery */ #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index b86faa9107da..815e404f8c18 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -61,18 +61,30 @@ extern u32 booke_wdt_period; #define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */ #define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ #define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ +#define SPRN_MSRP 0x137 /* MSR Protect Register */ #define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ #define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ #define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */ #define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */ +#define SPRN_LPID 0x152 /* Logical Partition ID */ #define SPRN_MAS8 0x155 /* MMU Assist Register 8 */ #define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */ #define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */ #define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */ #define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */ #define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */ +#define SPRN_GSPRG0 0x170 /* Guest SPRG0 */ +#define SPRN_GSPRG1 0x171 /* Guest SPRG1 */ +#define SPRN_GSPRG2 0x172 /* Guest SPRG2 */ +#define SPRN_GSPRG3 0x173 /* Guest SPRG3 */ #define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */ #define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */ +#define SPRN_GSRR0 0x17A /* Guest SRR0 */ +#define SPRN_GSRR1 0x17B /* Guest SRR1 */ +#define SPRN_GEPR 0x17C /* Guest EPR */ +#define SPRN_GDEAR 0x17D /* Guest DEAR */ +#define SPRN_GPIR 0x17E /* Guest PIR */ +#define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */ #define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */ #define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */ #define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */ @@ -93,6 +105,13 @@ extern u32 booke_wdt_period; #define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ #define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ #define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ +#define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */ +#define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */ +#define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */ +#define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */ +#define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */ +#define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */ +#define SPRN_GIVPR 0x1BF /* Guest IVPR */ #define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */ #define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */ #define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */ @@ -245,6 +264,10 @@ extern u32 booke_wdt_period; #define MCSR_LDG 0x00002000UL /* Guarded Load */ #define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */ #define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */ + +#define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */ +#define MSRP_DEP 0x00000200 /* Protect MSR[DE] */ +#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif #ifdef CONFIG_E200 @@ -599,6 +622,17 @@ extern u32 booke_wdt_period; #define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates * for hypervisor */ +/* Bit definitions for EPLC/EPSC */ +#define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */ +#define EPC_EPR_SHIFT 31 +#define EPC_EAS 0x40000000 /* Address Space */ +#define EPC_EAS_SHIFT 30 +#define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */ +#define EPC_EGS_SHIFT 29 +#define EPC_ELPID 0x00ff0000 +#define EPC_ELPID_SHIFT 16 +#define EPC_EPID 0x00003fff +#define EPC_EPID_SHIFT 0 /* * The IBM-403 is an even more odd special case, as it is much diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 34b8afe94a50..bbede5882c5b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -116,6 +116,9 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif +#ifdef CONFIG_KVM_BOOKE_HV + DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); +#endif DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); @@ -387,6 +390,7 @@ int main(void) #ifdef CONFIG_KVM DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); @@ -429,9 +433,11 @@ int main(void) DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + /* book3s */ #ifdef CONFIG_KVM_BOOK3S_64_HV - DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -446,7 +452,6 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); @@ -597,6 +602,12 @@ int main(void) DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); #endif +#ifdef CONFIG_KVM_BOOKE_HV + DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); + DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); + DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); +#endif + #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu)); diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 51fd0724e095..5f051eeb93a2 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -3,6 +3,7 @@ #include /* for STACK_FRAME_REGS_MARKER */ #include +#include /* * Macros used for common Book-e exception handling @@ -36,8 +37,9 @@ stw r11, THREAD_NORMSAVE(0)(r10); \ stw r13, THREAD_NORMSAVE(2)(r10); \ mfcr r13; /* save CR in r13 for now */\ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ + mfspr r11, SPRN_SRR1; \ + DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \ + andi. r11, r11, MSR_PR; /* check whether user or kernel */\ mr r11, r1; \ beq 1f; \ /* if from user, start at top of this thread's kernel stack */ \ @@ -123,8 +125,9 @@ stw r10,GPR10(r8); \ stw r11,GPR11(r8); \ stw r9,_CCR(r8); /* save CR on stack */\ - mfspr r10,exc_level_srr1; /* check whether user or kernel */\ - andi. r10,r10,MSR_PR; \ + mfspr r11,exc_level_srr1; /* check whether user or kernel */\ + DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ + andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ @@ -172,6 +175,23 @@ EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ SPRN_MCSRR0, SPRN_MCSRR1) +/* + * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite + * being delivered to the host. This exception can only happen + * inside a KVM guest -- so we just handle up to the DO_KVM rather + * than try to fit this into one of the existing prolog macros. + */ +#define GUEST_DOORBELL_EXCEPTION \ + START_EXCEPTION(GuestDoorbell); \ + mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ + mfspr r10, SPRN_SPRG_THREAD; \ + stw r11, THREAD_NORMSAVE(0)(r10); \ + mfspr r11, SPRN_SRR1; \ + stw r13, THREAD_NORMSAVE(2)(r10); \ + mfcr r13; /* save CR in r13 for now */\ + DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \ + trap + /* * Exception vectors. */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8f64709ae331..2c33cd336434 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -90,6 +90,9 @@ config KVM_BOOK3S_64_PR depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV select KVM_BOOK3S_PR +config KVM_BOOKE_HV + bool + config KVM_440 bool "KVM support for PowerPC 440 processors" depends on EXPERIMENTAL && 44x diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2ee9bae38328..75dbaeb2efa3 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -17,6 +17,8 @@ * * Authors: Hollis Blanchard * Christian Ehrhardt + * Scott Wood + * Varun Sethi */ #include @@ -30,9 +32,12 @@ #include #include #include -#include "timing.h" #include +#include +#include +#include +#include "timing.h" #include "booke.h" unsigned long kvmppc_booke_handlers; @@ -55,6 +60,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "doorbell", VCPU_STAT(dbell_exits) }, + { "guest doorbell", VCPU_STAT(gdbell_exits) }, { NULL } }; @@ -121,6 +128,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { u32 old_msr = vcpu->arch.shared->msr; +#ifdef CONFIG_KVM_BOOKE_HV + new_msr |= MSR_GS; +#endif + vcpu->arch.shared->msr = new_msr; kvmppc_mmu_msr_notify(vcpu, old_msr); @@ -195,6 +206,75 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GSRR0, srr0); + mtspr(SPRN_GSRR1, srr1); +#else + vcpu->arch.shared->srr0 = srr0; + vcpu->arch.shared->srr1 = srr1; +#endif +} + +static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + vcpu->arch.csrr0 = srr0; + vcpu->arch.csrr1 = srr1; +} + +static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) { + vcpu->arch.dsrr0 = srr0; + vcpu->arch.dsrr1 = srr1; + } else { + set_guest_csrr(vcpu, srr0, srr1); + } +} + +static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + vcpu->arch.mcsrr0 = srr0; + vcpu->arch.mcsrr1 = srr1; +} + +static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GDEAR); +#else + return vcpu->arch.shared->dar; +#endif +} + +static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GDEAR, dear); +#else + vcpu->arch.shared->dar = dear; +#endif +} + +static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GESR); +#else + return vcpu->arch.shared->esr; +#endif +} + +static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GESR, esr); +#else + vcpu->arch.shared->esr = esr; +#endif +} + /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -206,6 +286,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; bool keep_irq = false; + enum int_class int_class; /* Truncate crit indicators in 32 bit mode */ if (!(vcpu->arch.shared->msr & MSR_SF)) { @@ -241,16 +322,20 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_AP_UNAVAIL: case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; + int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_CRITICAL: - case BOOKE_IRQPRIO_WATCHDOG: allowed = vcpu->arch.shared->msr & MSR_CE; - msr_mask = MSR_ME; + allowed = allowed && !crit; + msr_mask = MSR_GS | MSR_ME; + int_class = INT_CLASS_CRIT; break; case BOOKE_IRQPRIO_MACHINE_CHECK: allowed = vcpu->arch.shared->msr & MSR_ME; - msr_mask = 0; + allowed = allowed && !crit; + msr_mask = MSR_GS; + int_class = INT_CLASS_MC; break; case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: @@ -259,28 +344,62 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_EXTERNAL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; + int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_DEBUG: allowed = vcpu->arch.shared->msr & MSR_DE; - msr_mask = MSR_ME; + allowed = allowed && !crit; + msr_mask = MSR_GS | MSR_ME; + int_class = INT_CLASS_CRIT; break; } if (allowed) { - vcpu->arch.shared->srr0 = vcpu->arch.pc; - vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; + switch (int_class) { + case INT_CLASS_NONCRIT: + set_guest_srr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_CRIT: + set_guest_csrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_DBG: + set_guest_dsrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_MC: + set_guest_mcsrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + } + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - vcpu->arch.shared->esr = vcpu->arch.queued_esr; + set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - vcpu->arch.shared->dar = vcpu->arch.queued_dear; + set_guest_dear(vcpu, vcpu->arch.queued_dear); kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); if (!keep_irq) clear_bit(priority, &vcpu->arch.pending_exceptions); } +#ifdef CONFIG_KVM_BOOKE_HV + /* + * If an interrupt is pending but masked, raise a guest doorbell + * so that we are notified when the guest enables the relevant + * MSR bit. + */ + if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT); + if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT); + if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC); +#endif + return allowed; } @@ -344,6 +463,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINVAL; } + if (!current->thread.kvm_vcpu) { + WARN(1, "no vcpu\n"); + return -EPERM; + } + local_irq_disable(); kvmppc_core_prepare_to_enter(vcpu); @@ -363,6 +487,38 @@ out: return ret; } +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); + /* Future optimization: only reload non-volatiles if + * they were actually modified by emulation. */ + return RESUME_GUEST_NV; + + case EMULATE_DO_DCR: + run->exit_reason = KVM_EXIT_DCR; + return RESUME_HOST; + + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + __func__, vcpu->arch.pc, vcpu->arch.last_inst); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + return RESUME_HOST; + + default: + BUG(); + } +} + /** * kvmppc_handle_exit * @@ -371,12 +527,30 @@ out: int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { - enum emulation_result er; int r = RESUME_HOST; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); + switch (exit_nr) { + case BOOKE_INTERRUPT_EXTERNAL: + do_IRQ(current->thread.regs); + break; + + case BOOKE_INTERRUPT_DECREMENTER: + timer_interrupt(current->thread.regs); + break; + +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) + case BOOKE_INTERRUPT_DOORBELL: + doorbell_exception(current->thread.regs); + break; +#endif + case BOOKE_INTERRUPT_MACHINE_CHECK: + /* FIXME */ + break; + } + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -384,30 +558,56 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: - printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); - kvmppc_dump_vcpu(vcpu); - r = RESUME_HOST; + kvm_resched(vcpu); + r = RESUME_GUEST; break; case BOOKE_INTERRUPT_EXTERNAL: kvmppc_account_exit(vcpu, EXT_INTR_EXITS); - if (need_resched()) - cond_resched(); + kvm_resched(vcpu); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DECREMENTER: - /* Since we switched IVPR back to the host's value, the host - * handled this interrupt the moment we enabled interrupts. - * Now we just offer it a chance to reschedule the guest. */ kvmppc_account_exit(vcpu, DEC_EXITS); - if (need_resched()) - cond_resched(); + kvm_resched(vcpu); r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_DOORBELL: + kvmppc_account_exit(vcpu, DBELL_EXITS); + kvm_resched(vcpu); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_GUEST_DBELL_CRIT: + kvmppc_account_exit(vcpu, GDBELL_EXITS); + + /* + * We are here because there is a pending guest interrupt + * which could not be delivered as MSR_CE or MSR_ME was not + * set. Once we break from here we will retry delivery. + */ + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_GUEST_DBELL: + kvmppc_account_exit(vcpu, GDBELL_EXITS); + + /* + * We are here because there is a pending guest interrupt + * which could not be delivered as MSR_EE was not set. Once + * we break from here we will retry delivery. + */ + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_HV_PRIV: + r = emulation_exit(run, vcpu); + break; + case BOOKE_INTERRUPT_PROGRAM: - if (vcpu->arch.shared->msr & MSR_PR) { + if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { /* Program traps generated by user-level software must be handled * by the guest kernel. */ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); @@ -416,32 +616,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - er = kvmppc_emulate_instruction(run, vcpu); - switch (er) { - case EMULATE_DONE: - /* don't overwrite subtypes, just account kvm_stats */ - kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); - /* Future optimization: only reload non-volatiles if - * they were actually modified by emulation. */ - r = RESUME_GUEST_NV; - break; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - r = RESUME_HOST; - break; - case EMULATE_FAIL: - /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, vcpu->arch.pc, vcpu->arch.last_inst); - /* For debugging, encode the failing instruction and - * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= vcpu->arch.last_inst; - r = RESUME_HOST; - break; - default: - BUG(); - } + r = emulation_exit(run, vcpu); break; case BOOKE_INTERRUPT_FP_UNAVAIL: @@ -506,6 +681,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; +#ifdef CONFIG_KVM_BOOKE_HV + case BOOKE_INTERRUPT_HV_SYSCALL: + if (!(vcpu->arch.shared->msr & MSR_PR)) { + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + } else { + /* + * hcall from guest userspace -- send privileged + * instruction program check. + */ + kvmppc_core_queue_program(vcpu, ESR_PPR); + } + + r = RESUME_GUEST; + break; +#else case BOOKE_INTERRUPT_SYSCALL: if (!(vcpu->arch.shared->msr & MSR_PR) && (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { @@ -519,6 +709,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; +#endif case BOOKE_INTERRUPT_DTLB_MISS: { unsigned long eaddr = vcpu->arch.fault_dear; @@ -659,12 +850,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int r; vcpu->arch.pc = 0; - vcpu->arch.shared->msr = 0; - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; vcpu->arch.shared->pir = vcpu->vcpu_id; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ + kvmppc_set_msr(vcpu, 0); +#ifndef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; vcpu->arch.shadow_pid = 1; + vcpu->arch.shared->msr = 0; +#endif /* Eye-catching numbers so we know if the guest takes an interrupt * before it's programmed its own IVPR/IVORs. */ @@ -745,8 +939,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = vcpu->arch.shared->esr; - sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.esr = get_guest_esr(vcpu); + sregs->u.e.dear = get_guest_dear(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -763,8 +957,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - vcpu->arch.shared->esr = sregs->u.e.esr; - vcpu->arch.shared->dar = sregs->u.e.dear; + set_guest_esr(vcpu, sregs->u.e.esr); + set_guest_dear(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); @@ -961,14 +1155,17 @@ void kvmppc_decrementer_func(unsigned long data) void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + current->thread.kvm_vcpu = vcpu; } void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) { + current->thread.kvm_vcpu = NULL; } int __init kvmppc_booke_init(void) { +#ifndef CONFIG_KVM_BOOKE_HV unsigned long ivor[16]; unsigned long max_ivor = 0; int i; @@ -1011,7 +1208,7 @@ int __init kvmppc_booke_init(void) } flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - +#endif /* !BOOKE_HV */ return 0; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 05d1d99428ce..d53bcf2558f5 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -48,7 +48,20 @@ #define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 /* Internal pseudo-irqprio for level triggered externals */ #define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 -#define BOOKE_IRQPRIO_MAX 20 +#define BOOKE_IRQPRIO_DBELL 21 +#define BOOKE_IRQPRIO_DBELL_CRIT 22 +#define BOOKE_IRQPRIO_MAX 23 + +#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \ + (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \ + (1 << BOOKE_IRQPRIO_DBELL) | \ + (1 << BOOKE_IRQPRIO_DECREMENTER) | \ + (1 << BOOKE_IRQPRIO_FIT) | \ + (1 << BOOKE_IRQPRIO_EXTERNAL)) + +#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \ + (1 << BOOKE_IRQPRIO_WATCHDOG) | \ + (1 << BOOKE_IRQPRIO_CRITICAL)) extern unsigned long kvmppc_booke_handlers; @@ -74,4 +87,13 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu); +enum int_class { + INT_CLASS_NONCRIT, + INT_CLASS_CRIT, + INT_CLASS_MC, + INT_CLASS_DBG, +}; + +void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); + #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 3e652da36534..904412bbea40 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -99,6 +99,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } +/* + * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode). + * Their backing store is in real registers, and these functions + * will return the wrong result if called for them in another context + * (such as debugging). + */ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) { int emulated = EMULATE_DONE; @@ -122,9 +128,11 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) kvmppc_set_tcr(vcpu, spr_val); break; - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ + /* + * Note: SPRG4-7 are user-readable. + * These values are loaded into the real SPRGs when resuming the + * guest (PR-mode only). + */ case SPRN_SPRG4: vcpu->arch.shared->sprg4 = spr_val; break; case SPRN_SPRG5: @@ -136,6 +144,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IVPR: vcpu->arch.ivpr = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVPR, spr_val); +#endif break; case SPRN_IVOR0: vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; @@ -145,6 +156,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; case SPRN_IVOR2: vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVOR2, spr_val); +#endif break; case SPRN_IVOR3: vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; @@ -163,6 +177,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; case SPRN_IVOR8: vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVOR8, spr_val); +#endif break; case SPRN_IVOR9: vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S new file mode 100644 index 000000000000..9eaeebd86e44 --- /dev/null +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -0,0 +1,587 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2010-2011 Freescale Semiconductor, Inc. + * + * Author: Varun Sethi + * Author: Scott Wood + * + * This file is derived from arch/powerpc/kvm/booke_interrupts.S + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ + +#define GET_VCPU(vcpu, thread) \ + PPC_LL vcpu, THREAD_KVM_VCPU(thread) + +#define SET_VCPU(vcpu) \ + PPC_STL vcpu, (THREAD + THREAD_KVM_VCPU)(r2) + +#define LONGBYTES (BITS_PER_LONG / 8) + +#define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES)) +#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) + +/* The host stack layout: */ +#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */ +#define HOST_CALLEE_LR (1 * LONGBYTES) +#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */ +/* + * r2 is special: it holds 'current', and it made nonvolatile in the + * kernel with the -ffixed-r2 gcc option. + */ +#define HOST_R2 (3 * LONGBYTES) +#define HOST_NV_GPRS (4 * LONGBYTES) +#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) +#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES) +#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */ + +#define NEED_EMU 0x00000001 /* emulation -- save nv regs */ +#define NEED_DEAR 0x00000002 /* save faulting DEAR */ +#define NEED_ESR 0x00000004 /* save faulting ESR */ + +/* + * On entry: + * r4 = vcpu, r5 = srr0, r6 = srr1 + * saved in vcpu: cr, ctr, r3-r13 + */ +.macro kvm_handler_common intno, srr0, flags + mfspr r10, SPRN_PID + lwz r8, VCPU_HOST_PID(r4) + PPC_LL r11, VCPU_SHARED(r4) + PPC_STL r14, VCPU_GPR(r14)(r4) /* We need a non-volatile GPR. */ + li r14, \intno + + stw r10, VCPU_GUEST_PID(r4) + mtspr SPRN_PID, r8 + + .if \flags & NEED_EMU + lwz r9, VCPU_KVM(r4) + .endif + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + PPC_STL r8, VCPU_TIMING_EXIT_TBL(r4) + bne- 1b + PPC_STL r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + + oris r8, r6, MSR_CE@h +#ifndef CONFIG_64BIT + stw r6, (VCPU_SHARED_MSR + 4)(r11) +#else + std r6, (VCPU_SHARED_MSR)(r11) +#endif + ori r8, r8, MSR_ME | MSR_RI + PPC_STL r5, VCPU_PC(r4) + + /* + * Make sure CE/ME/RI are set (if appropriate for exception type) + * whether or not the guest had it set. Since mfmsr/mtmsr are + * somewhat expensive, skip in the common case where the guest + * had all these bits set (and thus they're still set if + * appropriate for the exception type). + */ + cmpw r6, r8 + .if \flags & NEED_EMU + lwz r9, KVM_LPID(r9) + .endif + beq 1f + mfmsr r7 + .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0 + oris r7, r7, MSR_CE@h + .endif + .if \srr0 != SPRN_MCSRR0 + ori r7, r7, MSR_ME | MSR_RI + .endif + mtmsr r7 +1: + + .if \flags & NEED_EMU + /* + * This assumes you have external PID support. + * To support a bookehv CPU without external PID, you'll + * need to look up the TLB entry and create a temporary mapping. + * + * FIXME: we don't currently handle if the lwepx faults. PR-mode + * booke doesn't handle it either. Since Linux doesn't use + * broadcast tlbivax anymore, the only way this should happen is + * if the guest maps its memory execute-but-not-read, or if we + * somehow take a TLB miss in the middle of this entry code and + * evict the relevant entry. On e500mc, all kernel lowmem is + * bolted into TLB1 large page mappings, and we don't use + * broadcast invalidates, so we should not take a TLB miss here. + * + * Later we'll need to deal with faults here. Disallowing guest + * mappings that are execute-but-not-read could be an option on + * e500mc, but not on chips with an LRAT if it is used. + */ + + mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ + PPC_STL r15, VCPU_GPR(r15)(r4) + PPC_STL r16, VCPU_GPR(r16)(r4) + PPC_STL r17, VCPU_GPR(r17)(r4) + PPC_STL r18, VCPU_GPR(r18)(r4) + PPC_STL r19, VCPU_GPR(r19)(r4) + mr r8, r3 + PPC_STL r20, VCPU_GPR(r20)(r4) + rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS + PPC_STL r21, VCPU_GPR(r21)(r4) + rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR + PPC_STL r22, VCPU_GPR(r22)(r4) + rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID + PPC_STL r23, VCPU_GPR(r23)(r4) + PPC_STL r24, VCPU_GPR(r24)(r4) + PPC_STL r25, VCPU_GPR(r25)(r4) + PPC_STL r26, VCPU_GPR(r26)(r4) + PPC_STL r27, VCPU_GPR(r27)(r4) + PPC_STL r28, VCPU_GPR(r28)(r4) + PPC_STL r29, VCPU_GPR(r29)(r4) + PPC_STL r30, VCPU_GPR(r30)(r4) + PPC_STL r31, VCPU_GPR(r31)(r4) + mtspr SPRN_EPLC, r8 + isync + lwepx r9, 0, r5 + mtspr SPRN_EPLC, r3 + stw r9, VCPU_LAST_INST(r4) + .endif + + .if \flags & NEED_ESR + mfspr r8, SPRN_ESR + PPC_STL r8, VCPU_FAULT_ESR(r4) + .endif + + .if \flags & NEED_DEAR + mfspr r9, SPRN_DEAR + PPC_STL r9, VCPU_FAULT_DEAR(r4) + .endif + + b kvmppc_resume_host +.endm + +/* + * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h + */ +.macro kvm_handler intno srr0, srr1, flags +_GLOBAL(kvmppc_handler_\intno\()_\srr1) + GET_VCPU(r11, r10) + PPC_STL r3, VCPU_GPR(r3)(r11) + mfspr r3, SPRN_SPRG_RSCRATCH0 + PPC_STL r4, VCPU_GPR(r4)(r11) + PPC_LL r4, THREAD_NORMSAVE(0)(r10) + PPC_STL r5, VCPU_GPR(r5)(r11) + PPC_STL r13, VCPU_CR(r11) + mfspr r5, \srr0 + PPC_STL r3, VCPU_GPR(r10)(r11) + PPC_LL r3, THREAD_NORMSAVE(2)(r10) + PPC_STL r6, VCPU_GPR(r6)(r11) + PPC_STL r4, VCPU_GPR(r11)(r11) + mfspr r6, \srr1 + PPC_STL r7, VCPU_GPR(r7)(r11) + PPC_STL r8, VCPU_GPR(r8)(r11) + PPC_STL r9, VCPU_GPR(r9)(r11) + PPC_STL r3, VCPU_GPR(r13)(r11) + mfctr r7 + PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r7, VCPU_CTR(r11) + mr r4, r11 + kvm_handler_common \intno, \srr0, \flags +.endm + +.macro kvm_lvl_handler intno scratch srr0, srr1, flags +_GLOBAL(kvmppc_handler_\intno\()_\srr1) + mfspr r10, SPRN_SPRG_THREAD + GET_VCPU(r11, r10) + PPC_STL r3, VCPU_GPR(r3)(r11) + mfspr r3, \scratch + PPC_STL r4, VCPU_GPR(r4)(r11) + PPC_LL r4, GPR9(r8) + PPC_STL r5, VCPU_GPR(r5)(r11) + PPC_STL r9, VCPU_CR(r11) + mfspr r5, \srr0 + PPC_STL r3, VCPU_GPR(r8)(r11) + PPC_LL r3, GPR10(r8) + PPC_STL r6, VCPU_GPR(r6)(r11) + PPC_STL r4, VCPU_GPR(r9)(r11) + mfspr r6, \srr1 + PPC_LL r4, GPR11(r8) + PPC_STL r7, VCPU_GPR(r7)(r11) + PPC_STL r8, VCPU_GPR(r8)(r11) + PPC_STL r3, VCPU_GPR(r10)(r11) + mfctr r7 + PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r4, VCPU_GPR(r11)(r11) + PPC_STL r7, VCPU_CTR(r11) + mr r4, r11 + kvm_handler_common \intno, \srr0, \flags +.endm + +kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \ + SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR) +kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ + SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU +kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ + SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0 + + +/* Registers: + * SPRG_SCRATCH0: guest r10 + * r4: vcpu pointer + * r11: vcpu->arch.shared + * r14: KVM exit number + */ +_GLOBAL(kvmppc_resume_host) + /* Save remaining volatile guest register state to vcpu. */ + mfspr r3, SPRN_VRSAVE + PPC_STL r0, VCPU_GPR(r0)(r4) + PPC_STL r1, VCPU_GPR(r1)(r4) + mflr r5 + mfspr r6, SPRN_SPRG4 + PPC_STL r2, VCPU_GPR(r2)(r4) + PPC_STL r5, VCPU_LR(r4) + mfspr r7, SPRN_SPRG5 + PPC_STL r3, VCPU_VRSAVE(r4) + PPC_STL r6, VCPU_SHARED_SPRG4(r11) + mfspr r8, SPRN_SPRG6 + PPC_STL r7, VCPU_SHARED_SPRG5(r11) + mfspr r9, SPRN_SPRG7 + PPC_STL r8, VCPU_SHARED_SPRG6(r11) + mfxer r3 + PPC_STL r9, VCPU_SHARED_SPRG7(r11) + + /* save guest MAS registers and restore host mas4 & mas6 */ + mfspr r5, SPRN_MAS0 + PPC_STL r3, VCPU_XER(r4) + mfspr r6, SPRN_MAS1 + stw r5, VCPU_SHARED_MAS0(r11) + mfspr r7, SPRN_MAS2 + stw r6, VCPU_SHARED_MAS1(r11) +#ifndef CONFIG_64BIT + stw r7, (VCPU_SHARED_MAS2 + 4)(r11) +#else + std r7, (VCPU_SHARED_MAS2)(r11) +#endif + mfspr r5, SPRN_MAS3 + mfspr r6, SPRN_MAS4 + stw r5, VCPU_SHARED_MAS7_3+4(r11) + mfspr r7, SPRN_MAS6 + stw r6, VCPU_SHARED_MAS4(r11) + mfspr r5, SPRN_MAS7 + lwz r6, VCPU_HOST_MAS4(r4) + stw r7, VCPU_SHARED_MAS6(r11) + lwz r8, VCPU_HOST_MAS6(r4) + mtspr SPRN_MAS4, r6 + stw r5, VCPU_SHARED_MAS7_3+0(r11) + mtspr SPRN_MAS6, r8 + mfspr r3, SPRN_EPCR + rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH + mtspr SPRN_EPCR, r3 + isync + + /* Restore host stack pointer */ + PPC_LL r1, VCPU_HOST_STACK(r4) + PPC_LL r2, HOST_R2(r1) + + /* Switch to kernel stack and jump to handler. */ + PPC_LL r3, HOST_RUN(r1) + mr r5, r14 /* intno */ + mr r14, r4 /* Save vcpu pointer. */ + bl kvmppc_handle_exit + + /* Restore vcpu pointer and the nonvolatiles we used. */ + mr r4, r14 + PPC_LL r14, VCPU_GPR(r14)(r4) + + andi. r5, r3, RESUME_FLAG_NV + beq skip_nv_load + PPC_LL r15, VCPU_GPR(r15)(r4) + PPC_LL r16, VCPU_GPR(r16)(r4) + PPC_LL r17, VCPU_GPR(r17)(r4) + PPC_LL r18, VCPU_GPR(r18)(r4) + PPC_LL r19, VCPU_GPR(r19)(r4) + PPC_LL r20, VCPU_GPR(r20)(r4) + PPC_LL r21, VCPU_GPR(r21)(r4) + PPC_LL r22, VCPU_GPR(r22)(r4) + PPC_LL r23, VCPU_GPR(r23)(r4) + PPC_LL r24, VCPU_GPR(r24)(r4) + PPC_LL r25, VCPU_GPR(r25)(r4) + PPC_LL r26, VCPU_GPR(r26)(r4) + PPC_LL r27, VCPU_GPR(r27)(r4) + PPC_LL r28, VCPU_GPR(r28)(r4) + PPC_LL r29, VCPU_GPR(r29)(r4) + PPC_LL r30, VCPU_GPR(r30)(r4) + PPC_LL r31, VCPU_GPR(r31)(r4) +skip_nv_load: + /* Should we return to the guest? */ + andi. r5, r3, RESUME_FLAG_HOST + beq lightweight_exit + + srawi r3, r3, 2 /* Shift -ERR back down. */ + +heavyweight_exit: + /* Not returning to guest. */ + PPC_LL r5, HOST_STACK_LR(r1) + + /* + * We already saved guest volatile register state; now save the + * non-volatiles. + */ + + PPC_STL r15, VCPU_GPR(r15)(r4) + PPC_STL r16, VCPU_GPR(r16)(r4) + PPC_STL r17, VCPU_GPR(r17)(r4) + PPC_STL r18, VCPU_GPR(r18)(r4) + PPC_STL r19, VCPU_GPR(r19)(r4) + PPC_STL r20, VCPU_GPR(r20)(r4) + PPC_STL r21, VCPU_GPR(r21)(r4) + PPC_STL r22, VCPU_GPR(r22)(r4) + PPC_STL r23, VCPU_GPR(r23)(r4) + PPC_STL r24, VCPU_GPR(r24)(r4) + PPC_STL r25, VCPU_GPR(r25)(r4) + PPC_STL r26, VCPU_GPR(r26)(r4) + PPC_STL r27, VCPU_GPR(r27)(r4) + PPC_STL r28, VCPU_GPR(r28)(r4) + PPC_STL r29, VCPU_GPR(r29)(r4) + PPC_STL r30, VCPU_GPR(r30)(r4) + PPC_STL r31, VCPU_GPR(r31)(r4) + + /* Load host non-volatile register state from host stack. */ + PPC_LL r14, HOST_NV_GPR(r14)(r1) + PPC_LL r15, HOST_NV_GPR(r15)(r1) + PPC_LL r16, HOST_NV_GPR(r16)(r1) + PPC_LL r17, HOST_NV_GPR(r17)(r1) + PPC_LL r18, HOST_NV_GPR(r18)(r1) + PPC_LL r19, HOST_NV_GPR(r19)(r1) + PPC_LL r20, HOST_NV_GPR(r20)(r1) + PPC_LL r21, HOST_NV_GPR(r21)(r1) + PPC_LL r22, HOST_NV_GPR(r22)(r1) + PPC_LL r23, HOST_NV_GPR(r23)(r1) + PPC_LL r24, HOST_NV_GPR(r24)(r1) + PPC_LL r25, HOST_NV_GPR(r25)(r1) + PPC_LL r26, HOST_NV_GPR(r26)(r1) + PPC_LL r27, HOST_NV_GPR(r27)(r1) + PPC_LL r28, HOST_NV_GPR(r28)(r1) + PPC_LL r29, HOST_NV_GPR(r29)(r1) + PPC_LL r30, HOST_NV_GPR(r30)(r1) + PPC_LL r31, HOST_NV_GPR(r31)(r1) + + /* Return to kvm_vcpu_run(). */ + mtlr r5 + addi r1, r1, HOST_STACK_SIZE + /* r3 still contains the return code from kvmppc_handle_exit(). */ + blr + +/* Registers: + * r3: kvm_run pointer + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcpu_run) + stwu r1, -HOST_STACK_SIZE(r1) + PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + + /* Save host state to stack. */ + PPC_STL r3, HOST_RUN(r1) + mflr r3 + PPC_STL r3, HOST_STACK_LR(r1) + + /* Save host non-volatile register state to stack. */ + PPC_STL r14, HOST_NV_GPR(r14)(r1) + PPC_STL r15, HOST_NV_GPR(r15)(r1) + PPC_STL r16, HOST_NV_GPR(r16)(r1) + PPC_STL r17, HOST_NV_GPR(r17)(r1) + PPC_STL r18, HOST_NV_GPR(r18)(r1) + PPC_STL r19, HOST_NV_GPR(r19)(r1) + PPC_STL r20, HOST_NV_GPR(r20)(r1) + PPC_STL r21, HOST_NV_GPR(r21)(r1) + PPC_STL r22, HOST_NV_GPR(r22)(r1) + PPC_STL r23, HOST_NV_GPR(r23)(r1) + PPC_STL r24, HOST_NV_GPR(r24)(r1) + PPC_STL r25, HOST_NV_GPR(r25)(r1) + PPC_STL r26, HOST_NV_GPR(r26)(r1) + PPC_STL r27, HOST_NV_GPR(r27)(r1) + PPC_STL r28, HOST_NV_GPR(r28)(r1) + PPC_STL r29, HOST_NV_GPR(r29)(r1) + PPC_STL r30, HOST_NV_GPR(r30)(r1) + PPC_STL r31, HOST_NV_GPR(r31)(r1) + + /* Load guest non-volatiles. */ + PPC_LL r14, VCPU_GPR(r14)(r4) + PPC_LL r15, VCPU_GPR(r15)(r4) + PPC_LL r16, VCPU_GPR(r16)(r4) + PPC_LL r17, VCPU_GPR(r17)(r4) + PPC_LL r18, VCPU_GPR(r18)(r4) + PPC_LL r19, VCPU_GPR(r19)(r4) + PPC_LL r20, VCPU_GPR(r20)(r4) + PPC_LL r21, VCPU_GPR(r21)(r4) + PPC_LL r22, VCPU_GPR(r22)(r4) + PPC_LL r23, VCPU_GPR(r23)(r4) + PPC_LL r24, VCPU_GPR(r24)(r4) + PPC_LL r25, VCPU_GPR(r25)(r4) + PPC_LL r26, VCPU_GPR(r26)(r4) + PPC_LL r27, VCPU_GPR(r27)(r4) + PPC_LL r28, VCPU_GPR(r28)(r4) + PPC_LL r29, VCPU_GPR(r29)(r4) + PPC_LL r30, VCPU_GPR(r30)(r4) + PPC_LL r31, VCPU_GPR(r31)(r4) + + +lightweight_exit: + PPC_STL r2, HOST_R2(r1) + + mfspr r3, SPRN_PID + stw r3, VCPU_HOST_PID(r4) + lwz r3, VCPU_GUEST_PID(r4) + mtspr SPRN_PID, r3 + + /* Save vcpu pointer for the exception handlers + * must be done before loading guest r2. + */ +// SET_VCPU(r4) + + PPC_LL r11, VCPU_SHARED(r4) + /* Save host mas4 and mas6 and load guest MAS registers */ + mfspr r3, SPRN_MAS4 + stw r3, VCPU_HOST_MAS4(r4) + mfspr r3, SPRN_MAS6 + stw r3, VCPU_HOST_MAS6(r4) + lwz r3, VCPU_SHARED_MAS0(r11) + lwz r5, VCPU_SHARED_MAS1(r11) +#ifndef CONFIG_64BIT + lwz r6, (VCPU_SHARED_MAS2 + 4)(r11) +#else + ld r6, (VCPU_SHARED_MAS2)(r11) +#endif + lwz r7, VCPU_SHARED_MAS7_3+4(r11) + lwz r8, VCPU_SHARED_MAS4(r11) + mtspr SPRN_MAS0, r3 + mtspr SPRN_MAS1, r5 + mtspr SPRN_MAS2, r6 + mtspr SPRN_MAS3, r7 + mtspr SPRN_MAS4, r8 + lwz r3, VCPU_SHARED_MAS6(r11) + lwz r5, VCPU_SHARED_MAS7_3+0(r11) + mtspr SPRN_MAS6, r3 + mtspr SPRN_MAS7, r5 + /* Disable MAS register updates via exception */ + mfspr r3, SPRN_EPCR + oris r3, r3, SPRN_EPCR_DMIUH@h + mtspr SPRN_EPCR, r3 + + /* + * Host interrupt handlers may have clobbered these guest-readable + * SPRGs, so we need to reload them here with the guest's values. + */ + lwz r3, VCPU_VRSAVE(r4) + lwz r5, VCPU_SHARED_SPRG4(r11) + mtspr SPRN_VRSAVE, r3 + lwz r6, VCPU_SHARED_SPRG5(r11) + mtspr SPRN_SPRG4W, r5 + lwz r7, VCPU_SHARED_SPRG6(r11) + mtspr SPRN_SPRG5W, r6 + lwz r8, VCPU_SHARED_SPRG7(r11) + mtspr SPRN_SPRG6W, r7 + mtspr SPRN_SPRG7W, r8 + + /* Load some guest volatiles. */ + PPC_LL r3, VCPU_LR(r4) + PPC_LL r5, VCPU_XER(r4) + PPC_LL r6, VCPU_CTR(r4) + PPC_LL r7, VCPU_CR(r4) + PPC_LL r8, VCPU_PC(r4) +#ifndef CONFIG_64BIT + lwz r9, (VCPU_SHARED_MSR + 4)(r11) +#else + ld r9, (VCPU_SHARED_MSR)(r11) +#endif + PPC_LL r0, VCPU_GPR(r0)(r4) + PPC_LL r1, VCPU_GPR(r1)(r4) + PPC_LL r2, VCPU_GPR(r2)(r4) + PPC_LL r10, VCPU_GPR(r10)(r4) + PPC_LL r11, VCPU_GPR(r11)(r4) + PPC_LL r12, VCPU_GPR(r12)(r4) + PPC_LL r13, VCPU_GPR(r13)(r4) + mtlr r3 + mtxer r5 + mtctr r6 + mtcr r7 + mtsrr0 r8 + mtsrr1 r9 + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + PPC_STL r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + bne 1b + PPC_STL r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + + /* Finish loading guest volatiles and jump to guest. */ + PPC_LL r5, VCPU_GPR(r5)(r4) + PPC_LL r6, VCPU_GPR(r6)(r4) + PPC_LL r7, VCPU_GPR(r7)(r4) + PPC_LL r8, VCPU_GPR(r8)(r4) + PPC_LL r9, VCPU_GPR(r9)(r4) + + PPC_LL r3, VCPU_GPR(r3)(r4) + PPC_LL r4, VCPU_GPR(r4)(r4) + rfi diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cd53e08403b3..6a530e4b3e7c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -114,6 +114,11 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu) goto out; #endif +#ifdef CONFIG_KVM_BOOKE_HV + if (!cpu_has_feature(CPU_FTR_EMB_HV)) + goto out; +#endif + r = true; out: diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 8167d42a776f..bf191e72b2d8 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case SIGNAL_EXITS: vcpu->stat.signal_exits++; break; + case DBELL_EXITS: + vcpu->stat.dbell_exits++; + break; + case GDBELL_EXITS: + vcpu->stat.gdbell_exits++; + break; } } -- cgit v1.2.3 From 73196cd364a2d972d73fa08da9d81ca3215bed68 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:47 +0000 Subject: KVM: PPC: e500mc support Add processor support for e500mc, using hardware virtualization support (GS-mode). Current issues include: - No support for external proxy (coreint) interrupt mode in the guest. Includes work by Ashish Kalra , Varun Sethi , and Liu Yu . Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/cputable.h | 6 +- arch/powerpc/include/asm/kvm.h | 1 + arch/powerpc/kernel/cpu_setup_fsl_booke.S | 1 + arch/powerpc/kernel/head_fsl_booke.S | 46 ++++ arch/powerpc/kvm/Kconfig | 17 +- arch/powerpc/kvm/Makefile | 11 + arch/powerpc/kvm/e500.h | 13 +- arch/powerpc/kvm/e500_emulate.c | 24 ++- arch/powerpc/kvm/e500_tlb.c | 21 +- arch/powerpc/kvm/e500mc.c | 342 ++++++++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 6 +- 11 files changed, 476 insertions(+), 12 deletions(-) create mode 100644 arch/powerpc/kvm/e500mc.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 7108a9c490a3..67c34afc6b5d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -168,6 +168,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000) #define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000) #define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000) +#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000) /* * Add the 64-bit processor unique features in the top half of the word; @@ -386,11 +387,11 @@ extern const char *powerpc_base_platform; CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ - CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ @@ -539,6 +540,7 @@ enum { #ifdef CONFIG_PPC_E500MC CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 & #endif + ~CPU_FTR_EMB_HV & /* can be removed at runtime */ CPU_FTRS_POSSIBLE, }; #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index b921c3f48928..1bea4d8ea6f4 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -277,6 +277,7 @@ struct kvm_sync_regs { #define KVM_CPU_E500V2 2 #define KVM_CPU_3S_32 3 #define KVM_CPU_3S_64 4 +#define KVM_CPU_E500MC 5 /* for KVM_CAP_SPAPR_TCE */ struct kvm_create_spapr_tce { diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 8053db02b85e..69fdd2322a66 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -73,6 +73,7 @@ _GLOBAL(__setup_cpu_e500v2) mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) + mr r5, r4 mflr r4 bl __e500_icache_setup bl __e500_dcache_setup diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 7c406dd9fea6..89c6d6f36785 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -380,10 +380,16 @@ interrupt_base: mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mfspr r10, SPRN_SPRG_THREAD stw r11, THREAD_NORMSAVE(0)(r10) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif stw r12, THREAD_NORMSAVE(1)(r10) stw r13, THREAD_NORMSAVE(2)(r10) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) + DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -468,10 +474,16 @@ interrupt_base: mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mfspr r10, SPRN_SPRG_THREAD stw r11, THREAD_NORMSAVE(0)(r10) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif stw r12, THREAD_NORMSAVE(1)(r10) stw r13, THREAD_NORMSAVE(2)(r10) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) + DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -580,6 +592,17 @@ interrupt_base: DEBUG_DEBUG_EXCEPTION DEBUG_CRIT_EXCEPTION + GUEST_DOORBELL_EXCEPTION + + CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \ + unknown_exception) + + /* Hypercall */ + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) + + /* Embedded Hypervisor Privilege */ + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) + /* * Local functions */ @@ -883,8 +906,31 @@ _GLOBAL(__setup_e500mc_ivors) mtspr SPRN_IVOR36,r3 li r3,CriticalDoorbell@l mtspr SPRN_IVOR37,r3 + + /* + * We only want to touch IVOR38-41 if we're running on hardware + * that supports category E.HV. The architectural way to determine + * this is MMUCFG[LPIDSIZE]. + */ + mfspr r3, SPRN_MMUCFG + andis. r3, r3, MMUCFG_LPIDSIZE@h + beq no_hv + li r3,GuestDoorbell@l + mtspr SPRN_IVOR38,r3 + li r3,CriticalGuestDoorbell@l + mtspr SPRN_IVOR39,r3 + li r3,Hypercall@l + mtspr SPRN_IVOR40,r3 + li r3,Ehvpriv@l + mtspr SPRN_IVOR41,r3 +skip_hv_ivors: sync blr +no_hv: + lwz r3, CPU_SPEC_FEATURES(r5) + rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV + stw r3, CPU_SPEC_FEATURES(r5) + b skip_hv_ivors /* * extern void giveup_altivec(struct task_struct *prev) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 2c33cd336434..58f6e6817924 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -109,7 +109,7 @@ config KVM_440 config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500 + depends on KVM_440 || KVM_E500 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. @@ -132,6 +132,21 @@ config KVM_E500 If unsure, say N. +config KVM_E500MC + bool "KVM support for PowerPC E500MC/E5500 processors" + depends on EXPERIMENTAL && PPC_E500MC + select KVM + select KVM_MMIO + select KVM_BOOKE_HV + ---help--- + Support running unmodified E500MC/E5500 (32-bit) guest kernels in + virtual machines on E500MC/E5500 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3688aeecc4b2..62febd730ba4 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -38,6 +38,16 @@ kvm-e500-objs := \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) +kvm-e500mc-objs := \ + $(common-objs-y) \ + booke.o \ + booke_emulate.o \ + bookehv_interrupts.o \ + e500mc.o \ + e500_tlb.o \ + e500_emulate.o +kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) + kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ ../../../virt/kvm/coalesced_mmio.o \ fpu.o \ @@ -89,6 +99,7 @@ kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o obj-$(CONFIG_KVM_E500) += kvm.o +obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index ce3f163f702e..31430852ba74 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -2,7 +2,9 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu + * Scott Wood * Ashish Kalra + * Varun Sethi * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.h and @@ -100,6 +102,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); } + /* This geometry is the legacy default -- can be overridden by userspace */ #define KVM_E500_TLB0_WAY_SIZE 128 #define KVM_E500_TLB0_WAY_NUM 2 @@ -250,10 +253,12 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, if (!get_tlb_v(tlbe)) return 0; +#ifndef CONFIG_KVM_BOOKE_HV /* Does it match current guest AS? */ /* XXX what about IS != DS? */ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) return 0; +#endif gpa = get_tlb_raddr(tlbe); if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) @@ -274,7 +279,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *gtlbe); void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_BOOKE_HV +#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) +#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) +#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) +#else unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe); @@ -288,6 +297,6 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) /* Force TS=1 for all guest mappings. */ #define get_tlb_sts(gtlbe) (MAS1_TS) -#endif /* CONFIG_KVM_E500 */ +#endif /* !BOOKE_HV */ #endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index af02c18fc798..98b6c1cd6b82 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -85,6 +85,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { +#ifndef CONFIG_KVM_BOOKE_HV case SPRN_PID: kvmppc_set_pid(vcpu, spr_val); break; @@ -114,6 +115,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; break; +#endif case SPRN_L1CSR0: vcpu_e500->l1csr0 = spr_val; vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); @@ -143,7 +145,14 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IVOR35: vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; break; - +#ifdef CONFIG_KVM_BOOKE_HV + case SPRN_IVOR36: + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val; + break; + case SPRN_IVOR37: + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val; + break; +#endif default: emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } @@ -155,9 +164,11 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; - unsigned long val; switch (sprn) { +#ifndef CONFIG_KVM_BOOKE_HV + unsigned long val; + case SPRN_PID: kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; case SPRN_PID1: @@ -182,6 +193,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) val = vcpu->arch.shared->mas7_3 >> 32; kvmppc_set_gpr(vcpu, rt, val); break; +#endif case SPRN_TLB0CFG: kvmppc_set_gpr(vcpu, rt, vcpu->arch.tlbcfg[0]); break; case SPRN_TLB1CFG: @@ -216,6 +228,14 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_IVOR35: kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); break; +#ifdef CONFIG_KVM_BOOKE_HV + case SPRN_IVOR36: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]); + break; + case SPRN_IVOR37: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]); + break; +#endif default: emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6eb5d655bdb4..e232bb464a80 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -2,7 +2,9 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com + * Scott Wood, scottwood@freescale.com * Ashish Kalra, ashish.kalra@freescale.com + * Varun Sethi, varun.sethi@freescale.com * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, @@ -64,6 +66,7 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) /* Mask off reserved bits. */ mas3 &= MAS3_ATTRIB_MASK; +#ifndef CONFIG_KVM_BOOKE_HV if (!usermode) { /* Guest is in supervisor mode, * so we need to translate guest @@ -71,8 +74,9 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) mas3 &= ~E500_TLB_USER_PERM_MASK; mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; } - - return mas3 | E500_TLB_SUPER_PERM_MASK; + mas3 |= E500_TLB_SUPER_PERM_MASK; +#endif + return mas3; } static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) @@ -98,7 +102,16 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_MAS8, stlbe->mas8); +#endif asm volatile("isync; tlbwe" : : : "memory"); + +#ifdef CONFIG_KVM_BOOKE_HV + /* Must clear mas8 for other host tlbwe's */ + mtspr(SPRN_MAS8, 0); + isync(); +#endif local_irq_restore(flags); trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, @@ -384,6 +397,10 @@ static inline void kvmppc_e500_setup_stlbe( e500_shadow_mas2_attrib(gtlbe->mas2, pr); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + +#ifdef CONFIG_KVM_BOOKE_HV + stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; +#endif } static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c new file mode 100644 index 000000000000..fe6c1de6b701 --- /dev/null +++ b/arch/powerpc/kvm/e500mc.c @@ -0,0 +1,342 @@ +/* + * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Varun Sethi, + * + * Description: + * This file is derived from arch/powerpc/kvm/e500.c, + * by Yu Liu . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "booke.h" +#include "e500.h" + +void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) +{ + enum ppc_dbell dbell_type; + unsigned long tag; + + switch (type) { + case INT_CLASS_NONCRIT: + dbell_type = PPC_G_DBELL; + break; + case INT_CLASS_CRIT: + dbell_type = PPC_G_DBELL_CRIT; + break; + case INT_CLASS_MC: + dbell_type = PPC_G_DBELL_MC; + break; + default: + WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type); + return; + } + + + tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; + mb(); + ppc_msgsnd(dbell_type, 0, tag); +} + +/* gtlbe must not be mapped by more than one host tlb entry */ +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned int tid, ts; + u32 val, eaddr, lpid; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + lpid = vcpu_e500->vcpu.kvm->arch.lpid; + + /* We search the host TLB to invalidate its shadow TLB entry */ + val = (tid << 16) | ts; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + mtspr(SPRN_MAS5, MAS5_SGS | lpid); + + asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + mtspr(SPRN_MAS5, 0); + /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */ + mtspr(SPRN_MAS8, 0); + isync(); + + local_irq_restore(flags); +} + +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); + asm volatile("tlbilxlpid"); + mtspr(SPRN_MAS5, 0); + local_irq_restore(flags); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + vcpu->arch.pid = pid; +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_booke_vcpu_load(vcpu, cpu); + + mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); + mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); + mtspr(SPRN_GPIR, vcpu->vcpu_id); + mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); + mtspr(SPRN_EPLC, vcpu->arch.eplc); + mtspr(SPRN_EPSC, vcpu->arch.epsc); + + mtspr(SPRN_GIVPR, vcpu->arch.ivpr); + mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); + mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); + mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0); + mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1); + mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2); + mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3); + + mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0); + mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1); + + mtspr(SPRN_GEPR, vcpu->arch.epr); + mtspr(SPRN_GDEAR, vcpu->arch.shared->dar); + mtspr(SPRN_GESR, vcpu->arch.shared->esr); + + if (vcpu->arch.oldpir != mfspr(SPRN_PIR)) + kvmppc_e500_tlbil_all(vcpu_e500); + + kvmppc_load_guest_fp(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + vcpu->arch.eplc = mfspr(SPRN_EPLC); + vcpu->arch.epsc = mfspr(SPRN_EPSC); + + vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0); + vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1); + vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2); + vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3); + + vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0); + vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1); + + vcpu->arch.epr = mfspr(SPRN_GEPR); + vcpu->arch.shared->dar = mfspr(SPRN_GDEAR); + vcpu->arch.shared->esr = mfspr(SPRN_GESR); + + vcpu->arch.oldpir = mfspr(SPRN_PIR); + + kvmppc_booke_vcpu_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0) + r = 0; + else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \ + SPRN_EPCR_DUVD; + vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; + vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); + vcpu->arch.epsc = vcpu->arch.eplc; + + vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); + + vcpu->arch.cpu_type = KVM_CPU_E500MC; + + return 0; +} + +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM | + KVM_SREGS_E_PC; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + kvmppc_get_sregs_e500_tlb(vcpu, sregs); + + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; + sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int ret; + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs); + if (ret < 0) + return ret; + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PC) { + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = + sregs->u.e.ivor_high[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = + sregs->u.e.ivor_high[5]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_e500 *vcpu_e500; + struct kvm_vcpu *vcpu; + int err; + + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_e500) { + err = -ENOMEM; + goto out; + } + vcpu = &vcpu_e500->vcpu; + + /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */ + vcpu->arch.oldpir = 0xffffffff; + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = kvmppc_e500_tlb_init(vcpu_e500); + if (err) + goto uninit_vcpu; + + vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_tlb; + + return vcpu; + +uninit_tlb: + kvmppc_e500_tlb_uninit(vcpu_e500); +uninit_vcpu: + kvm_vcpu_uninit(vcpu); + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + free_page((unsigned long)vcpu->arch.shared); + kvmppc_e500_tlb_uninit(vcpu_e500); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + int lpid; + + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + return lpid; + + kvm->arch.lpid = lpid; + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + kvmppc_free_lpid(kvm->arch.lpid); +} + +static int __init kvmppc_e500mc_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + kvmppc_init_lpid(64); + kvmppc_claim_lpid(0); /* host */ + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +} + +static void __exit kvmppc_e500mc_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_e500mc_init); +module_exit(kvmppc_e500mc_exit); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6a530e4b3e7c..14bf8d59bc2a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -235,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif r = 1; @@ -629,7 +629,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; @@ -706,7 +706,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; -- cgit v1.2.3 From f0888f70151c7f53de2b45ee20ff1905837943e8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Feb 2012 00:54:17 +0000 Subject: KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs Currently on POWER7, if we are running the guest on a core and we don't need all the hardware threads, we do nothing to ensure that the unused threads aren't executing in the kernel (other than checking that they are offline). We just assume they're napping and we don't do anything to stop them trying to enter the kernel while the guest is running. This means that a stray IPI can wake up the hardware thread and it will then try to enter the kernel, but since the core is in guest context, it will execute code from the guest in hypervisor mode once it turns the MMU on, which tends to lead to crashes or hangs in the host. This fixes the problem by adding two new one-byte flags in the kvmppc_host_state structure in the PACA which are used to interlock between the primary thread and the unused secondary threads when entering the guest. With these flags, the primary thread can ensure that the unused secondaries are not already in kernel mode (i.e. handling a stray IPI) and then indicate that they should not try to enter the kernel if they do get woken for any reason. Instead they will go into KVM code, find that there is no vcpu to run, acknowledge and clear the IPI and go back to nap mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_book3s_asm.h | 8 +++ arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/exceptions-64s.S | 12 ++-- arch/powerpc/kernel/idle_power7.S | 7 +++ arch/powerpc/kvm/book3s_hv.c | 49 +++++++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 92 ++++++++++++++++++------------- 6 files changed, 124 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 1f2f5b6156bd..88609b23b775 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -79,6 +79,9 @@ struct kvmppc_host_state { u8 napping; #ifdef CONFIG_KVM_BOOK3S_64_HV + u8 hwthread_req; + u8 hwthread_state; + struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; @@ -122,4 +125,9 @@ struct kvmppc_book3s_shadow_vcpu { #endif /*__ASSEMBLY__ */ +/* Values for kvm_state */ +#define KVM_HWTHREAD_IN_KERNEL 0 +#define KVM_HWTHREAD_IN_NAP 1 +#define KVM_HWTHREAD_IN_KVM 2 + #endif /* __ASM_KVM_BOOK3S_ASM_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index bbede5882c5b..2abcf7d4b29c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -540,6 +540,8 @@ int main(void) HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); + HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); + HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); #ifdef CONFIG_KVM_BOOK3S_64_HV HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cb705fdbb458..8829b1095f7f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -63,11 +63,13 @@ BEGIN_FTR_SECTION GET_PACA(r13) #ifdef CONFIG_KVM_BOOK3S_64_HV - lbz r0,PACAPROCSTART(r13) - cmpwi r0,0x80 - bne 1f - li r0,1 - stb r0,PACAPROCSTART(r13) + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 1f b kvm_start_guest 1: #endif diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 0cdc9a392839..7140d838339e 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -16,6 +16,7 @@ #include #include #include +#include #undef DEBUG @@ -81,6 +82,12 @@ _GLOBAL(power7_idle) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* Tell KVM we're napping */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* Magic NAP mode enter sequence */ std r0,0(r1) ptesync diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 01294a5099dd..e87f6196d222 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -569,6 +569,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, list_del(&vcpu->arch.run_list); } +static int kvmppc_grab_hwthread(int cpu) +{ + struct paca_struct *tpaca; + long timeout = 1000; + + tpaca = &paca[cpu]; + + /* Ensure the thread won't go into the kernel if it wakes */ + tpaca->kvm_hstate.hwthread_req = 1; + + /* + * If the thread is already executing in the kernel (e.g. handling + * a stray interrupt), wait for it to get back to nap mode. + * The smp_mb() is to ensure that our setting of hwthread_req + * is visible before we look at hwthread_state, so if this + * races with the code at system_reset_pSeries and the thread + * misses our setting of hwthread_req, we are sure to see its + * setting of hwthread_state, and vice versa. + */ + smp_mb(); + while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) { + if (--timeout <= 0) { + pr_err("KVM: couldn't grab cpu %d\n", cpu); + return -EBUSY; + } + udelay(1); + } + return 0; +} + +static void kvmppc_release_hwthread(int cpu) +{ + struct paca_struct *tpaca; + + tpaca = &paca[cpu]; + tpaca->kvm_hstate.hwthread_req = 0; + tpaca->kvm_hstate.kvm_vcpu = NULL; +} + static void kvmppc_start_thread(struct kvm_vcpu *vcpu) { int cpu; @@ -588,8 +627,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { - tpaca->cpu_start = 0x80; - wmb(); + kvmppc_grab_hwthread(cpu); xics_wake_cpu(cpu); ++vc->n_woken; } @@ -639,7 +677,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) struct kvm_vcpu *vcpu, *vcpu0, *vnext; long ret; u64 now; - int ptid; + int ptid, i; /* don't start if any threads have a signal pending */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) @@ -686,12 +724,17 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->napping_threads = 0; list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) kvmppc_start_thread(vcpu); + /* Grab any remaining hw threads so they can't go into the kernel */ + for (i = ptid; i < threads_per_core; ++i) + kvmppc_grab_hwthread(vc->pcpu + i); preempt_disable(); spin_unlock(&vc->lock); kvm_guest_enter(); __kvmppc_vcore_entry(NULL, vcpu0); + for (i = 0; i < threads_per_core; ++i) + kvmppc_release_hwthread(vc->pcpu + i); spin_lock(&vc->lock); /* disable sending of IPIs on virtual external irqs */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b70bf22a3ff3..d595033bd449 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -26,6 +26,7 @@ #include #include #include +#include /***************************************************************************** * * @@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline) #define XICS_XIRR 4 #define XICS_QIRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ /* * We come in here when wakened from nap mode on a secondary hw thread. @@ -94,26 +96,54 @@ kvm_start_guest: subi r1,r1,STACK_FRAME_OVERHEAD ld r2,PACATOC(r13) - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede + li r0,KVM_HWTHREAD_IN_KVM + stb r0,HSTATE_HWTHREAD_STATE(r13) - /* get vcpu pointer */ - ld r4, HSTATE_KVM_VCPU(r13) + /* NV GPR values from power7_idle() will no longer be valid */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) - /* We got here with an IPI; clear it */ - ld r5, HSTATE_XICS_PHYS(r13) - li r0, 0xff - li r6, XICS_QIRR - li r7, XICS_XIRR - lwzcix r8, r5, r7 /* ack the interrupt */ + /* get vcpu pointer, NULL if we have no vcpu to run */ + ld r4,HSTATE_KVM_VCPU(r13) + cmpdi cr1,r4,0 + + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3,SPRN_SRR1 + rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ + cmpwi r3,4 /* was it an external interrupt? */ + bne 27f + + /* + * External interrupt - for now assume it is an IPI, since we + * should never get any other interrupts sent to offline threads. + * Only do this for secondary threads. + */ + beq cr1,25f + lwz r3,VCPU_PTID(r4) + cmpwi r3,0 + beq 27f +25: ld r5,HSTATE_XICS_PHYS(r13) + li r0,0xff + li r6,XICS_QIRR + li r7,XICS_XIRR + lwzcix r8,r5,r7 /* get and ack the interrupt */ sync - stbcix r0, r5, r6 /* clear it */ - stwcix r8, r5, r7 /* EOI it */ + clrldi. r9,r8,40 /* get interrupt source ID. */ + beq 27f /* none there? */ + cmpwi r9,XICS_IPI + bne 26f + stbcix r0,r5,r6 /* clear IPI */ +26: stwcix r8,r5,r7 /* EOI the interrupt */ - /* NV GPR values from power7_idle() will no longer be valid */ - stb r0, PACA_NAPSTATELOST(r13) +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + + /* if we have no vcpu to run, go back to sleep */ + beq cr1,kvm_no_guest + + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede .global kvmppc_hv_entry kvmppc_hv_entry: @@ -1445,8 +1475,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * Take a nap until a decrementer or external interrupt occurs, * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR */ - li r0,0x80 - stb r0,PACAPROCSTART(r13) + li r0,1 + stb r0,HSTATE_HWTHREAD_REQ(r13) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR,r5 @@ -1463,26 +1493,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) kvm_end_cede: /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - /* If we're a secondary thread and we got here by an IPI, ack it */ - ld r4,HSTATE_KVM_VCPU(r13) - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f - mfspr r3,SPRN_SRR1 - rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ - cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - ld r5, HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_QIRR - li r7,XICS_XIRR - lwzcix r8,r5,r7 /* ack the interrupt */ - sync - stbcix r0,r5,r6 /* clear it */ - stwcix r8,r5,r7 /* EOI it */ -27: /* load up FP state */ bl kvmppc_load_fp @@ -1580,12 +1591,17 @@ secondary_nap: stwcx. r3, 0, r4 bne 51b +kvm_no_guest: + li r0, KVM_HWTHREAD_IN_NAP + stb r0, HSTATE_HWTHREAD_STATE(r13) + li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) + li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 isync - li r0, 0 std r0, HSTATE_SCRATCH0(r13) ptesync ld r0, HSTATE_SCRATCH0(r13) -- cgit v1.2.3 From 2e25aa5f64b18a97f35266e51c71ff4dc644db0c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 19 Feb 2012 17:46:32 +0000 Subject: KVM: PPC: Book3S HV: Make virtual processor area registration more robust The PAPR API allows three sorts of per-virtual-processor areas to be registered (VPA, SLB shadow buffer, and dispatch trace log), and furthermore, these can be registered and unregistered for another virtual CPU. Currently we just update the vcpu fields pointing to these areas at the time of registration or unregistration. If this is done on another vcpu, there is the possibility that the target vcpu is using those fields at the time and could end up using a bogus pointer and corrupting memory. This fixes the race by making the target cpu itself do the update, so we can be sure that the update happens at a time when the fields aren't being used. Each area now has a struct kvmppc_vpa which is used to manage these updates. There is also a spinlock which protects access to all of the kvmppc_vpa structs, other than to the pinned_addr fields. (We could have just taken the spinlock when using the vpa, slb_shadow or dtl fields, but that would mean taking the spinlock on every guest entry and exit.) This also changes 'struct dtl' (which was undefined) to 'struct dtl_entry', which is what the rest of the kernel uses. Thanks to Michael Ellerman for pointing out the need to initialize vcpu->arch.vpa_update_lock. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/hvcall.h | 10 ++ arch/powerpc/include/asm/kvm_host.h | 27 ++++- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 227 +++++++++++++++++++++++++----------- 4 files changed, 190 insertions(+), 76 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 1c324ff55ea8..318bac9f8752 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -114,6 +114,16 @@ #define H_PP1 (1UL<<(63-62)) #define H_PP2 (1UL<<(63-63)) +/* Flags for H_REGISTER_VPA subfunction field */ +#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */ +#define H_VPA_FUNC_MASK 7UL +#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */ +#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */ +#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */ +#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */ +#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */ +#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */ + /* VASI States */ #define H_VASI_INVALID 0 #define H_VASI_ENABLED 1 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 97ecdaf82956..93ffd8dd8554 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -82,7 +82,7 @@ struct kvm_vcpu; struct lppaca; struct slb_shadow; -struct dtl; +struct dtl_entry; struct kvm_vm_stat { u32 remote_tlb_flush; @@ -279,6 +279,19 @@ struct kvmppc_vcore { #define VCORE_EXITING 2 #define VCORE_SLEEPING 3 +/* + * Struct used to manage memory for a virtual processor area + * registered by a PAPR guest. There are three types of area + * that a guest can register. + */ +struct kvmppc_vpa { + void *pinned_addr; /* Address in kernel linear mapping */ + void *pinned_end; /* End of region */ + unsigned long next_gpa; /* Guest phys addr for update */ + unsigned long len; /* Number of bytes required */ + u8 update_pending; /* 1 => update pinned_addr from next_gpa */ +}; + struct kvmppc_pte { ulong eaddr; u64 vpage; @@ -473,11 +486,6 @@ struct kvm_vcpu_arch { u8 prodded; u32 last_inst; - struct lppaca *vpa; - struct slb_shadow *slb_shadow; - struct dtl *dtl; - struct dtl *dtl_end; - wait_queue_head_t *wqp; struct kvmppc_vcore *vcore; int ret; @@ -502,6 +510,13 @@ struct kvm_vcpu_arch { struct task_struct *run_task; struct kvm_run *kvm_run; pgd_t *pgdir; + + spinlock_t vpa_update_lock; + struct kvmppc_vpa vpa; + struct kvmppc_vpa dtl; + struct dtl_entry *dtl_ptr; + unsigned long dtl_index; + struct kvmppc_vpa slb_shadow; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2abcf7d4b29c..502e038f8c8a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -466,7 +466,7 @@ int main(void) DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); - DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e87f6196d222..2444a9ce781f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -134,6 +134,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) vpa->yield_count = 1; } +/* Length for a per-processor buffer is passed in at offset 4 in the buffer */ +struct reg_vpa { + u32 dummy; + union { + u16 hword; + u32 word; + } length; +}; + +static int vpa_is_registered(struct kvmppc_vpa *vpap) +{ + if (vpap->update_pending) + return vpap->next_gpa != 0; + return vpap->pinned_addr != NULL; +} + static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long vcpuid, unsigned long vpa) @@ -142,88 +158,153 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long len, nb; void *va; struct kvm_vcpu *tvcpu; - int err = H_PARAMETER; + int err; + int subfunc; + struct kvmppc_vpa *vpap; tvcpu = kvmppc_find_vcpu(kvm, vcpuid); if (!tvcpu) return H_PARAMETER; - flags >>= 63 - 18; - flags &= 7; - if (flags == 0 || flags == 4) - return H_PARAMETER; - if (flags < 4) { - if (vpa & 0x7f) + subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; + if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || + subfunc == H_VPA_REG_SLB) { + /* Registering new area - address must be cache-line aligned */ + if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) return H_PARAMETER; - if (flags >= 2 && !tvcpu->arch.vpa) - return H_RESOURCE; - /* registering new area; convert logical addr to real */ + + /* convert logical addr to kernel addr and read length */ va = kvmppc_pin_guest_page(kvm, vpa, &nb); if (va == NULL) return H_PARAMETER; - if (flags <= 1) - len = *(unsigned short *)(va + 4); + if (subfunc == H_VPA_REG_VPA) + len = ((struct reg_vpa *)va)->length.hword; else - len = *(unsigned int *)(va + 4); - if (len > nb) - goto out_unpin; - switch (flags) { - case 1: /* register VPA */ - if (len < 640) - goto out_unpin; - if (tvcpu->arch.vpa) - kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); - tvcpu->arch.vpa = va; - init_vpa(vcpu, va); - break; - case 2: /* register DTL */ - if (len < 48) - goto out_unpin; - len -= len % 48; - if (tvcpu->arch.dtl) - kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); - tvcpu->arch.dtl = va; - tvcpu->arch.dtl_end = va + len; + len = ((struct reg_vpa *)va)->length.word; + kvmppc_unpin_guest_page(kvm, va); + + /* Check length */ + if (len > nb || len < sizeof(struct reg_vpa)) + return H_PARAMETER; + } else { + vpa = 0; + len = 0; + } + + err = H_PARAMETER; + vpap = NULL; + spin_lock(&tvcpu->arch.vpa_update_lock); + + switch (subfunc) { + case H_VPA_REG_VPA: /* register VPA */ + if (len < sizeof(struct lppaca)) break; - case 3: /* register SLB shadow buffer */ - if (len < 16) - goto out_unpin; - if (tvcpu->arch.slb_shadow) - kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); - tvcpu->arch.slb_shadow = va; + vpap = &tvcpu->arch.vpa; + err = 0; + break; + + case H_VPA_REG_DTL: /* register DTL */ + if (len < sizeof(struct dtl_entry)) break; - } - } else { - switch (flags) { - case 5: /* unregister VPA */ - if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) - return H_RESOURCE; - if (!tvcpu->arch.vpa) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa); - tvcpu->arch.vpa = NULL; + len -= len % sizeof(struct dtl_entry); + + /* Check that they have previously registered a VPA */ + err = H_RESOURCE; + if (!vpa_is_registered(&tvcpu->arch.vpa)) break; - case 6: /* unregister DTL */ - if (!tvcpu->arch.dtl) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); - tvcpu->arch.dtl = NULL; + + vpap = &tvcpu->arch.dtl; + err = 0; + break; + + case H_VPA_REG_SLB: /* register SLB shadow buffer */ + /* Check that they have previously registered a VPA */ + err = H_RESOURCE; + if (!vpa_is_registered(&tvcpu->arch.vpa)) break; - case 7: /* unregister SLB shadow buffer */ - if (!tvcpu->arch.slb_shadow) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); - tvcpu->arch.slb_shadow = NULL; + + vpap = &tvcpu->arch.slb_shadow; + err = 0; + break; + + case H_VPA_DEREG_VPA: /* deregister VPA */ + /* Check they don't still have a DTL or SLB buf registered */ + err = H_RESOURCE; + if (vpa_is_registered(&tvcpu->arch.dtl) || + vpa_is_registered(&tvcpu->arch.slb_shadow)) break; - } + + vpap = &tvcpu->arch.vpa; + err = 0; + break; + + case H_VPA_DEREG_DTL: /* deregister DTL */ + vpap = &tvcpu->arch.dtl; + err = 0; + break; + + case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */ + vpap = &tvcpu->arch.slb_shadow; + err = 0; + break; } - return H_SUCCESS; - out_unpin: - kvmppc_unpin_guest_page(kvm, va); + if (vpap) { + vpap->next_gpa = vpa; + vpap->len = len; + vpap->update_pending = 1; + } + + spin_unlock(&tvcpu->arch.vpa_update_lock); + return err; } +static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap) +{ + void *va; + unsigned long nb; + + vpap->update_pending = 0; + va = NULL; + if (vpap->next_gpa) { + va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); + if (nb < vpap->len) { + /* + * If it's now too short, it must be that userspace + * has changed the mappings underlying guest memory, + * so unregister the region. + */ + kvmppc_unpin_guest_page(kvm, va); + va = NULL; + } + } + if (vpap->pinned_addr) + kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); + vpap->pinned_addr = va; + if (va) + vpap->pinned_end = va + vpap->len; +} + +static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + spin_lock(&vcpu->arch.vpa_update_lock); + if (vcpu->arch.vpa.update_pending) { + kvmppc_update_vpa(kvm, &vcpu->arch.vpa); + init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + } + if (vcpu->arch.dtl.update_pending) { + kvmppc_update_vpa(kvm, &vcpu->arch.dtl); + vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; + vcpu->arch.dtl_index = 0; + } + if (vcpu->arch.slb_shadow.update_pending) + kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow); + spin_unlock(&vcpu->arch.vpa_update_lock); +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -468,6 +549,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) /* default to host PVR, since we can't spoof it */ vcpu->arch.pvr = mfspr(SPRN_PVR); kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + spin_lock_init(&vcpu->arch.vpa_update_lock); kvmppc_mmu_book3s_hv_init(vcpu); @@ -512,12 +594,14 @@ out: void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - if (vcpu->arch.dtl) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); - if (vcpu->arch.slb_shadow) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); - if (vcpu->arch.vpa) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); + spin_lock(&vcpu->arch.vpa_update_lock); + if (vcpu->arch.dtl.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); + if (vcpu->arch.slb_shadow.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); + if (vcpu->arch.vpa.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); + spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -722,8 +806,13 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->in_guest = 0; vc->pcpu = smp_processor_id(); vc->napping_threads = 0; - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); + if (vcpu->arch.vpa.update_pending || + vcpu->arch.slb_shadow.update_pending || + vcpu->arch.dtl.update_pending) + kvmppc_update_vpas(vcpu); + } /* Grab any remaining hw threads so they can't go into the kernel */ for (i = ptid; i < threads_per_core; ++i) kvmppc_grab_hwthread(vc->pcpu + i); -- cgit v1.2.3 From 7657f4089b097846cc37bfa2b74fc0bd2bd60e30 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 5 Mar 2012 21:42:25 +0000 Subject: KVM: PPC: Book 3S: Fix compilation for !HV configs Commits 2f5cdd5487 ("KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs") and 1c2066b0f7 ("KVM: PPC: Book3S HV: Make virtual processor area registration more robust") added fields to struct kvm_vcpu_arch inside #ifdef CONFIG_KVM_BOOK3S_64_HV regions, and added lines to arch/powerpc/kernel/asm-offsets.c to generate assembler constants for their offsets. Unfortunately this led to compile errors on Book 3S machines for configs that had KVM enabled but not CONFIG_KVM_BOOK3S_64_HV. This fixes the problem by moving the offending lines inside #ifdef CONFIG_KVM_BOOK3S_64_HV regions. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 502e038f8c8a..694af3ebb0e4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -450,6 +450,7 @@ int main(void) DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -466,7 +467,6 @@ int main(void) DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); - DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); @@ -540,10 +540,10 @@ int main(void) HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); - HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); - HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); #ifdef CONFIG_KVM_BOOK3S_64_HV + HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); + HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); -- cgit v1.2.3 From 6e35994d1f6831af1e5577e28c363c9137d7d597 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 18 Apr 2012 06:01:19 +0000 Subject: KVM: PPC: Use clockevent multiplier and shifter for decrementer Time for which the hrtimer is started for decrementer emulation is calculated using tb_ticks_per_usec. While hrtimer uses the clockevent for DEC reprogramming (if needed) and which calculate timebase ticks using the multiplier and shifter mechanism implemented within clockevent layer. It was observed that this conversion (timebase->time->timebase) are not correct because the mechanism are not consistent. In our setup it adds 2% jitter. With this patch clockevent multiplier and shifter mechanism are used when starting hrtimer for decrementer emulation. Now the jitter is < 0.5%. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/time.h | 1 + arch/powerpc/kernel/time.c | 3 ++- arch/powerpc/kvm/emulate.c | 9 +++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2136f58a54e8..3b4b4a8da922 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -23,6 +23,7 @@ extern unsigned long tb_ticks_per_jiffy; extern unsigned long tb_ticks_per_usec; extern unsigned long tb_ticks_per_sec; +extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2c42cd72d0f5..99a995c2a3f2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -100,7 +100,7 @@ static int decrementer_set_next_event(unsigned long evt, static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev); -static struct clock_event_device decrementer_clockevent = { +struct clock_event_device decrementer_clockevent = { .name = "decrementer", .rating = 200, .irq = 0, @@ -108,6 +108,7 @@ static struct clock_event_device decrementer_clockevent = { .set_mode = decrementer_set_mode, .features = CLOCK_EVT_FEAT_ONESHOT, }; +EXPORT_SYMBOL(decrementer_clockevent); DEFINE_PER_CPU(u64, decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index afc9154f1aef..b5872f61a213 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -104,8 +105,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) */ dec_time = vcpu->arch.dec; - dec_time *= 1000; - do_div(dec_time, tb_ticks_per_usec); + /* + * Guest timebase ticks at the same frequency as host decrementer. + * So use the host decrementer calculations for decrementer emulation. + */ + dec_time = dec_time << decrementer_clockevent.shift; + do_div(dec_time, decrementer_clockevent.mult); dec_nsec = do_div(dec_time, NSEC_PER_SEC); hrtimer_start(&vcpu->arch.dec_timer, ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); -- cgit v1.2.3 From 5b74716ebab10e7bce960d148fe6d8f6920451e5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 26 Apr 2012 19:43:42 +0000 Subject: kvm/powerpc: Add new ioctl to retreive server MMU infos This is necessary for qemu to be able to pass the right information to the guest, such as the supported page sizes and corresponding encodings in the SLB and hash table, which can vary depending on the processor type, the type of KVM used (PR vs HV) and the version of KVM Signed-off-by: Benjamin Herrenschmidt [agraf: fix compilation on hv, adjust for newer ioctl numbers] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 70 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kernel/ppc_ksyms.c | 4 +++ arch/powerpc/kvm/book3s_hv.c | 32 +++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 25 ++++++++++++++ arch/powerpc/kvm/powerpc.c | 18 +++++++++- include/linux/kvm.h | 27 +++++++++++++++ 7 files changed, 177 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index eb62761b7683..930126698a0f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1860,6 +1860,76 @@ See KVM_GET_PIT2 for details on struct kvm_pit_state2. This IOCTL replaces the obsolete KVM_SET_PIT. +4.74 KVM_PPC_GET_SMMU_INFO + +Capability: KVM_CAP_PPC_GET_SMMU_INFO +Architectures: powerpc +Type: vm ioctl +Parameters: None +Returns: 0 on success, -1 on error + +This populates and returns a structure describing the features of +the "Server" class MMU emulation supported by KVM. +This can in turn be used by userspace to generate the appropariate +device-tree properties for the guest operating system. + +The structure contains some global informations, followed by an +array of supported segment page sizes: + + struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; + }; + +The supported flags are: + + - KVM_PPC_PAGE_SIZES_REAL: + When that flag is set, guest page sizes must "fit" the backing + store page sizes. When not set, any page size in the list can + be used regardless of how they are backed by userspace. + + - KVM_PPC_1T_SEGMENTS + The emulated MMU supports 1T segments in addition to the + standard 256M ones. + +The "slb_size" field indicates how many SLB entries are supported + +The "sps" array contains 8 entries indicating the supported base +page sizes for a segment in increasing order. Each entry is defined +as follow: + + struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; + }; + +An entry with a "page_shift" of 0 is unused. Because the array is +organized in increasing order, a lookup can stop when encoutering +such an entry. + +The "slb_enc" field provides the encoding to use in the SLB for the +page size. The bits are in positions such as the value can directly +be OR'ed into the "vsid" argument of the slbmte instruction. + +The "enc" array is a list which for each of those segment base page +size provides the list of supported actual page sizes (which can be +only larger or equal to the base page size), along with the +corresponding encoding in the hash PTE. Similarily, the array is +8 entries sorted by increasing sizes and an entry with a "0" shift +is an empty entry and a terminator: + + struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ + }; + +The "pte_enc" field provides a value that can OR'ed into the hash +PTE's RPN field (ie, it needs to be shifted left by 12 to OR it +into the hash PTE second double word). + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c1069f63dcaf..c87e3b503fdc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -140,6 +140,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); +extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, + struct kvm_ppc_smmu_info *info); extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 786a2700ec2d..d1f2aafcbe8c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -190,3 +190,7 @@ EXPORT_SYMBOL(__arch_hweight16); EXPORT_SYMBOL(__arch_hweight32); EXPORT_SYMBOL(__arch_hweight64); #endif + +#ifdef CONFIG_PPC_BOOK3S_64 +EXPORT_SYMBOL_GPL(mmu_psize_defs); +#endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 59c296743595..bb5a0f4b4bbb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1175,6 +1175,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, + int linux_psize) +{ + struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; + + if (!def->shift) + return; + (*sps)->page_shift = def->shift; + (*sps)->slb_enc = def->sllp; + (*sps)->enc[0].page_shift = def->shift; + (*sps)->enc[0].pte_enc = def->penc; + (*sps)++; +} + +int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +{ + struct kvm_ppc_one_seg_page_size *sps; + + info->flags = KVM_PPC_PAGE_SIZES_REAL; + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + info->flags |= KVM_PPC_1T_SEGMENTS; + info->slb_size = mmu_slb_size; + + /* We only support these sizes for now, and no muti-size segments */ + sps = &info->sps[0]; + kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); + kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); + kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); + + return 0; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 815ac5938a9e..a1baec340f7e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1158,6 +1158,31 @@ out: return r; } +#ifdef CONFIG_PPC64 +int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +{ + /* No flags */ + info->flags = 0; + + /* SLB is always 64 entries */ + info->slb_size = 64; + + /* Standard 4k base page size segment */ + info->sps[0].page_shift = 12; + info->sps[0].slb_enc = 0; + info->sps[0].enc[0].page_shift = 12; + info->sps[0].enc[0].pte_enc = 0; + + /* Standard 16M large page size segment */ + info->sps[1].page_shift = 24; + info->sps[1].slb_enc = SLB_VSID_L; + info->sps[1].enc[0].page_shift = 24; + info->sps[1].enc[0].pte_enc = 0; + + return 0; +} +#endif /* CONFIG_PPC64 */ + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6ac31154d170..1493c8de947b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -279,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; +#ifdef CONFIG_PPC_BOOK3S_64 + case KVM_CAP_PPC_GET_SMMU_INFO: + r = 1; + break; +#endif default: r = 0; break; @@ -718,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } #endif - default: r = -EINVAL; } @@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp, } #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + case KVM_PPC_GET_SMMU_INFO: { + struct kvm *kvm = filp->private_data; + struct kvm_ppc_smmu_info info; + + memset(&info, 0, sizeof(info)); + r = kvm_vm_ioctl_get_smmu_info(kvm, &info); + if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) + r = -EFAULT; + break; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 225b452e1d1d..8d696cf6edcc 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo { __u8 pad[108]; }; +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -591,6 +615,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PCI_2_3 75 #define KVM_CAP_KVMCLOCK_CTRL 76 #define KVM_CAP_SIGNAL_MSI 77 +#define KVM_CAP_PPC_GET_SMMU_INFO 78 #ifdef KVM_CAP_IRQ_ROUTING @@ -800,6 +825,8 @@ struct kvm_s390_ucas_mapping { struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) +/* Available with KVM_CAP_PPC_GET_SMMU_INFO */ +#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* * ioctls for vcpu fds -- cgit v1.2.3