diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 18:38:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 18:38:31 -0700 |
commit | c136b84393d4e340e1b53fc7f737dd5827b19ee5 (patch) | |
tree | 985a1bdfafe7ec5ce2d3c738f601cad3998d8ce9 /virt | |
parent | e0f25a3f2d052e36ff67a9b4db835c3e27e950d8 (diff) | |
parent | 1372324b328cd5dabaef5e345e37ad48c63df2a9 (diff) | |
download | lwn-c136b84393d4e340e1b53fc7f737dd5827b19ee5.tar.gz lwn-c136b84393d4e340e1b53fc7f737dd5827b19ee5.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"PPC:
- Better machine check handling for HV KVM
- Ability to support guests with threads=2, 4 or 8 on POWER9
- Fix for a race that could cause delayed recognition of signals
- Fix for a bug where POWER9 guests could sleep with interrupts pending.
ARM:
- VCPU request overhaul
- allow timer and PMU to have their interrupt number selected from userspace
- workaround for Cavium erratum 30115
- handling of memory poisonning
- the usual crop of fixes and cleanups
s390:
- initial machine check forwarding
- migration support for the CMMA page hinting information
- cleanups and fixes
x86:
- nested VMX bugfixes and improvements
- more reliable NMI window detection on AMD
- APIC timer optimizations
Generic:
- VCPU request overhaul + documentation of common code patterns
- kvm_stat improvements"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
Update my email address
kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12
kvm: x86: mmu: allow A/D bits to be disabled in an mmu
x86: kvm: mmu: make spte mmio mask more explicit
x86: kvm: mmu: dead code thanks to access tracking
KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code
KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
KVM: x86: remove ignored type attribute
KVM: LAPIC: Fix lapic timer injection delay
KVM: lapic: reorganize restart_apic_timer
KVM: lapic: reorganize start_hv_timer
kvm: nVMX: Check memory operand to INVVPID
KVM: s390: Inject machine check into the nested guest
KVM: s390: Inject machine check into the guest
tools/kvm_stat: add new interactive command 'b'
tools/kvm_stat: add new command line switch '-i'
tools/kvm_stat: fix error on interactive command 'g'
KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/aarch32.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 139 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 82 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v3-sr.c | 823 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 23 | ||||
-rw-r--r-- | virt/kvm/arm/pmu.c | 117 | ||||
-rw-r--r-- | virt/kvm/arm/psci.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-irqfd.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v2.c | 24 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 22 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 68 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 12 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 45 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 68 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 12 |
15 files changed, 1297 insertions, 150 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 528af4b2d09e..79c7c357804b 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -60,7 +60,7 @@ static const unsigned short cc_map[16] = { /* * Check if a trapped instruction should have been executed or not. */ -bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) { unsigned long cpsr; u32 cpsr_cond; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5976609ef27c..8e89d63005c7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -21,6 +21,7 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/uaccess.h> #include <clocksource/arm_arch_timer.h> #include <asm/arch_timer.h> @@ -35,6 +36,16 @@ static struct timecounter *timecounter; static unsigned int host_vtimer_irq; static u32 host_vtimer_irq_flags; +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { vcpu_vtimer(vcpu)->active_cleared_last = false; @@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. */ - kvm_vcpu_kick(vcpu); + kvm_vcpu_wake_up(vcpu); } static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) @@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, if (likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); + timer_ctx->irq.level, + timer_ctx); WARN_ON(ret); } } @@ -445,23 +457,12 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) kvm_timer_update_state(vcpu); } -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq) +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* - * The vcpu timer irq number cannot be determined in - * kvm_timer_vcpu_init() because it is called much before - * kvm_vcpu_set_target(). To handle this, we determine - * vcpu timer irq number when the vcpu is reset. - */ - vtimer->irq.irq = virt_irq->irq; - ptimer->irq.irq = phys_irq->irq; - - /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 * and to 0 for ARMv7. We provide an implementation that always * resets the timer to be disabled and unmasked and is compliant with @@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); @@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; + + vtimer->irq.irq = default_vtimer_irq.irq; + ptimer->irq.irq = default_ptimer_irq.irq; } static void kvm_timer_init_interrupt(void *info) @@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } +static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) +{ + int vtimer_irq, ptimer_irq; + int i, ret; + + vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); + if (ret) + return false; + + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); + if (ret) + return false; + + kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { + if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || + vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + return false; + } + + return true; +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return -ENODEV; + if (!timer_irqs_are_valid(vcpu)) { + kvm_debug("incorrectly configured timer irqs\n"); + return -EINVAL; + } + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void) val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); write_sysreg(val, cnthctl_el2); } + +static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; + vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + } +} + +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (get_user(irq, uaddr)) + return -EFAULT; + + if (!(irq_is_ppi(irq))) + return -EINVAL; + + if (vcpu->arch.timer_cpu.enabled) + return -EBUSY; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + break; + default: + return -ENXIO; + } + + return 0; +} + +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *timer; + int irq; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + timer = vcpu_vtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + timer = vcpu_ptimer(vcpu); + break; + default: + return -ENXIO; + } + + irq = timer->irq.irq; + return put_user(irq, uaddr); +} + +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + return 0; + } + + return -ENXIO; +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3417e184c8e1..a39a1e161e63 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_timer_vcpu_put(vcpu); } +static void vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu->arch.power_off = false; break; case KVM_MP_STATE_STOPPED: - vcpu->arch.power_off = true; + vcpu_power_off(vcpu); break; default: return -EINVAL; @@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) } ret = kvm_timer_enable(vcpu); + if (ret) + return ret; + + ret = kvm_arm_pmu_v3_enable(vcpu); return ret; } @@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) vcpu->arch.pause = true; - kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); -} - -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pause = true; - kvm_vcpu_kick(vcpu); -} - -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu) -{ - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); - - vcpu->arch.pause = false; - swake_up(wq); + kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); } void kvm_arm_resume_guest(struct kvm *kvm) @@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm) int i; struct kvm_vcpu *vcpu; - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arm_resume_vcpu(vcpu); + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.pause = false; + swake_up(kvm_arch_vcpu_wq(vcpu)); + } } -static void vcpu_sleep(struct kvm_vcpu *vcpu) +static void vcpu_req_sleep(struct kvm_vcpu *vcpu) { struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* Awaken to handle a signal, request we sleep again later. */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) @@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) return vcpu->arch.target >= 0; } +static void check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) + vcpu_req_sleep(vcpu); + + /* + * Clear IRQ_PENDING requests that were made to guarantee + * that a VCPU sees new virtual interrupts. + */ + kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); + } +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); - if (vcpu->arch.power_off || vcpu->arch.pause) - vcpu_sleep(vcpu); + check_vcpu_requests(vcpu); /* * Preparing the interrupts to be injected also @@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_INTR; } + /* + * Ensure we set mode to IN_GUEST_MODE after we disable + * interrupts and before the final VCPU requests check. + * See the comment in kvm_vcpu_exiting_guest_mode() and + * Documentation/virtual/kvm/vcpu-requests.rst + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || - vcpu->arch.power_off || vcpu->arch.pause) { + kvm_request_pending(vcpu)) { + vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); @@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); @@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) * trigger a world-switch round on the running physical CPU to set the * virtual IRQ/FIQ fields in the HCR appropriately. */ + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); return 0; @@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: if (!irqchip_in_kernel(kvm)) return -ENXIO; @@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, irq_num, level); + return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); } return -EINVAL; @@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu->arch.power_off = true; + vcpu_power_off(vcpu); else vcpu->arch.power_off = false; @@ -1115,9 +1141,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - if (is_kernel_in_hyp_mode()) - kvm_timer_init_vhe(); - kvm_arm_init_debug(); } @@ -1137,6 +1160,7 @@ static void cpu_hyp_reinit(void) * event was cancelled before the CPU was reset. */ __cpu_init_stage2(); + kvm_timer_init_vhe(); } else { cpu_init_hyp_mode(NULL); } diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 87940364570b..91728faa13fd 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -19,10 +19,12 @@ #include <linux/irqchip/arm-gic-v3.h> #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) +#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) { @@ -118,6 +120,90 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } +static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP0R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP0R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP0R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP0R3_EL2); + break; + } +} + +static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP1R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP1R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP1R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP1R3_EL2); + break; + } +} + +static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP0R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP0R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP0R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP0R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP1R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP1R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP1R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP1R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; @@ -154,24 +240,27 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) switch (nr_pre_bits) { case 7: - cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); - cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); case 6: - cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); default: - cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); } switch (nr_pre_bits) { case 7: - cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); - cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); case 6: - cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); default: - cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } } else { + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + write_gicreg(0, ICH_HCR_EL2); + cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; @@ -224,26 +313,34 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) switch (nr_pre_bits) { case 7: - write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); - write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); case 6: - write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); default: - write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); } switch (nr_pre_bits) { case 7: - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); case 6: - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); default: - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); } for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } else { + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } /* @@ -287,3 +384,697 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } + +#ifdef CONFIG_ARM64 + +static int __hyp_text __vgic_v3_bpr_min(void) +{ + /* See Pseudocode for VPriorityGroup */ + return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); +} + +static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + + return crm != 8; +} + +#define GICv3_IDLE_PRIORITY 0xff + +static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, + u32 vmcr, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u8 priority = GICv3_IDLE_PRIORITY; + int i, lr = -1; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* Not pending in the state? */ + if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) + continue; + + /* Group-0 interrupt, but Group-0 disabled? */ + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + continue; + + /* Group-1 interrupt, but Group-1 disabled? */ + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + continue; + + /* Not the highest priority? */ + if (lr_prio >= priority) + continue; + + /* This is a candidate */ + priority = lr_prio; + *lr_val = val; + lr = i; + } + + if (lr == -1) + *lr_val = ICC_IAR1_EL1_SPURIOUS; + + return lr; +} + +static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, + int intid, u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + + if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && + (val & ICH_LR_ACTIVE_BIT)) { + *lr_val = val; + return i; + } + } + + *lr_val = ICC_IAR1_EL1_SPURIOUS; + return -1; +} + +static int __hyp_text __vgic_v3_get_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 val; + + /* + * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers + * contain the active priority levels for this VCPU + * for the maximum number of supported priority + * levels, and we return the full priority level only + * if the BPR is programmed to its minimum, otherwise + * we return a combination of the priority level and + * subpriority, as determined by the setting of the + * BPR, but without the full subpriority. + */ + val = __vgic_v3_read_ap0rn(i); + val |= __vgic_v3_read_ap1rn(i); + if (!val) { + hap += 32; + continue; + } + + return (hap + __ffs(val)) << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +{ + return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +{ + unsigned int bpr; + + if (vmcr & ICH_VMCR_CBPR_MASK) { + bpr = __vgic_v3_get_bpr0(vmcr); + if (bpr < 7) + bpr++; + } else { + bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + } + + return bpr; +} + +/* + * Convert a priority to a preemption level, taking the relevant BPR + * into account by zeroing the sub-priority bits. + */ +static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +{ + unsigned int bpr; + + if (!grp) + bpr = __vgic_v3_get_bpr0(vmcr) + 1; + else + bpr = __vgic_v3_get_bpr1(vmcr); + + return pri & (GENMASK(7, 0) << bpr); +} + +/* + * The priority value is independent of any of the BPR values, so we + * normalize it using the minumal BPR value. This guarantees that no + * matter what the guest does with its BPR, we can always set/get the + * same value of a priority. + */ +static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +{ + u8 pre, ap; + u32 val; + int apr; + + pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); + ap = pre >> __vgic_v3_bpr_min(); + apr = ap / 32; + + if (!grp) { + val = __vgic_v3_read_ap0rn(apr); + __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); + } else { + val = __vgic_v3_read_ap1rn(apr); + __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); + } +} + +static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 ap0, ap1; + int c0, c1; + + ap0 = __vgic_v3_read_ap0rn(i); + ap1 = __vgic_v3_read_ap1rn(i); + if (!ap0 && !ap1) { + hap += 32; + continue; + } + + c0 = ap0 ? __ffs(ap0) : 32; + c1 = ap1 ? __ffs(ap1) : 32; + + /* Always clear the LSB, which is the highest priority */ + if (c0 < c1) { + ap0 &= ~BIT(c0); + __vgic_v3_write_ap0rn(ap0, i); + hap += c0; + } else { + ap1 &= ~BIT(c1); + __vgic_v3_write_ap1rn(ap1, i); + hap += c1; + } + + /* Rescale to 8 bits of priority */ + return hap << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + u8 lr_prio, pmr; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr < 0) + goto spurious; + + if (grp != !!(lr_val & ICH_LR_GROUP)) + goto spurious; + + pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + if (pmr <= lr_prio) + goto spurious; + + if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) + goto spurious; + + lr_val &= ~ICH_LR_STATE; + /* No active state for LPIs */ + if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) + lr_val |= ICH_LR_ACTIVE_BIT; + __gic_v3_set_lr(lr_val, lr); + __vgic_v3_set_active_priority(lr_prio, vmcr, grp); + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); + return; + +spurious: + vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); +} + +static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +{ + lr_val &= ~ICH_LR_ACTIVE_BIT; + if (lr_val & ICH_LR_HW) { + u32 pid; + + pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + gic_write_dir(pid); + } + + __gic_v3_set_lr(lr_val, lr); +} + +static void __hyp_text __vgic_v3_bump_eoicount(void) +{ + u32 hcr; + + hcr = read_gicreg(ICH_HCR_EL2); + hcr += 1 << ICH_HCR_EOIcount_SHIFT; + write_gicreg(hcr, ICH_HCR_EL2); +} + +static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + int lr; + + /* EOImode == 0, nothing to be done here */ + if (!(vmcr & ICH_VMCR_EOIM_MASK)) + return; + + /* No deactivate to be performed on an LPI */ + if (vid >= VGIC_MIN_LPI) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + u8 lr_prio, act_prio; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + /* Drop priority in any case */ + act_prio = __vgic_v3_clear_highest_active_priority(); + + /* If EOIing an LPI, no deactivate to be performed */ + if (vid >= VGIC_MIN_LPI) + return; + + /* EOImode == 1, nothing to be done here */ + if (vmcr & ICH_VMCR_EOIM_MASK) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* If priorities or group do not match, the guest has fscked-up. */ + if (grp != !!(lr_val & ICH_LR_GROUP) || + __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) + return; + + /* Let's now perform the deactivation */ + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); +} + +static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); +} + +static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG0_MASK; + else + vmcr &= ~ICH_VMCR_ENG0_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG1_MASK; + else + vmcr &= ~ICH_VMCR_ENG1_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); +} + +static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); +} + +static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min() - 1; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR0_SHIFT; + val &= ICH_VMCR_BPR0_MASK; + vmcr &= ~ICH_VMCR_BPR0_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min(); + + if (vmcr & ICH_VMCR_CBPR_MASK) + return; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR1_SHIFT; + val &= ICH_VMCR_BPR1_MASK; + vmcr &= ~ICH_VMCR_BPR1_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val; + + if (!__vgic_v3_get_group(vcpu)) + val = __vgic_v3_read_ap0rn(n); + else + val = __vgic_v3_read_ap1rn(n); + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (!__vgic_v3_get_group(vcpu)) + __vgic_v3_write_ap0rn(val, n); + else + __vgic_v3_write_ap1rn(val, n); +} + +static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u64 lr_val; + int lr, lr_grp, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr == -1) + goto spurious; + + lr_grp = !!(lr_val & ICH_LR_GROUP); + if (lr_grp != grp) + lr_val = ICC_IAR1_EL1_SPURIOUS; + +spurious: + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); +} + +static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + vmcr &= ICH_VMCR_PMR_MASK; + vmcr >>= ICH_VMCR_PMR_SHIFT; + vcpu_set_reg(vcpu, rt, vmcr); +} + +static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + val <<= ICH_VMCR_PMR_SHIFT; + val &= ICH_VMCR_PMR_MASK; + vmcr &= ~ICH_VMCR_PMR_MASK; + vmcr |= val; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = __vgic_v3_get_highest_active_priority(); + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vtr, val; + + vtr = read_gicreg(ICH_VTR_EL2); + /* PRIbits */ + val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + /* IDbits */ + val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; + /* A3V */ + val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; + /* EOImode */ + val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + /* CBPR */ + val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (val & ICC_CTLR_EL1_CBPR_MASK) + vmcr |= ICH_VMCR_CBPR_MASK; + else + vmcr &= ~ICH_VMCR_CBPR_MASK; + + if (val & ICC_CTLR_EL1_EOImode_MASK) + vmcr |= ICH_VMCR_EOIM_MASK; + else + vmcr &= ~ICH_VMCR_EOIM_MASK; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + int rt; + u32 esr; + u32 vmcr; + void (*fn)(struct kvm_vcpu *, u32, int); + bool is_read; + u32 sysreg; + + esr = kvm_vcpu_get_hsr(vcpu); + if (vcpu_mode_is_32bit(vcpu)) { + if (!kvm_condition_valid(vcpu)) + return 1; + + sysreg = esr_cp15_to_sysreg(esr); + } else { + sysreg = esr_sys64_to_sysreg(esr); + } + + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + switch (sysreg) { + case SYS_ICC_IAR0_EL1: + case SYS_ICC_IAR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_iar; + break; + case SYS_ICC_EOIR0_EL1: + case SYS_ICC_EOIR1_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_eoir; + break; + case SYS_ICC_IGRPEN1_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen1; + else + fn = __vgic_v3_write_igrpen1; + break; + case SYS_ICC_BPR1_EL1: + if (is_read) + fn = __vgic_v3_read_bpr1; + else + fn = __vgic_v3_write_bpr1; + break; + case SYS_ICC_AP0Rn_EL1(0): + case SYS_ICC_AP1Rn_EL1(0): + if (is_read) + fn = __vgic_v3_read_apxr0; + else + fn = __vgic_v3_write_apxr0; + break; + case SYS_ICC_AP0Rn_EL1(1): + case SYS_ICC_AP1Rn_EL1(1): + if (is_read) + fn = __vgic_v3_read_apxr1; + else + fn = __vgic_v3_write_apxr1; + break; + case SYS_ICC_AP0Rn_EL1(2): + case SYS_ICC_AP1Rn_EL1(2): + if (is_read) + fn = __vgic_v3_read_apxr2; + else + fn = __vgic_v3_write_apxr2; + break; + case SYS_ICC_AP0Rn_EL1(3): + case SYS_ICC_AP1Rn_EL1(3): + if (is_read) + fn = __vgic_v3_read_apxr3; + else + fn = __vgic_v3_write_apxr3; + break; + case SYS_ICC_HPPIR0_EL1: + case SYS_ICC_HPPIR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_hppir; + break; + case SYS_ICC_IGRPEN0_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen0; + else + fn = __vgic_v3_write_igrpen0; + break; + case SYS_ICC_BPR0_EL1: + if (is_read) + fn = __vgic_v3_read_bpr0; + else + fn = __vgic_v3_write_bpr0; + break; + case SYS_ICC_DIR_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_dir; + break; + case SYS_ICC_RPR_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_rpr; + break; + case SYS_ICC_CTLR_EL1: + if (is_read) + fn = __vgic_v3_read_ctlr; + else + fn = __vgic_v3_write_ctlr; + break; + case SYS_ICC_PMR_EL1: + if (is_read) + fn = __vgic_v3_read_pmr; + else + fn = __vgic_v3_write_pmr; + break; + default: + return 0; + } + + vmcr = __vgic_v3_read_vmcr(); + rt = kvm_vcpu_sys_get_rt(vcpu); + fn(vcpu, vmcr, rt); + + return 1; +} + +#endif diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1c44aa35f909..0e1fc75f3585 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -20,6 +20,7 @@ #include <linux/kvm_host.h> #include <linux/io.h> #include <linux/hugetlb.h> +#include <linux/sched/signal.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -1262,6 +1263,24 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, __coherent_cache_guest_page(vcpu, pfn, size); } +static void kvm_send_hwpoison_signal(unsigned long address, + struct vm_area_struct *vma) +{ + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_MCEERR_AR; + info.si_addr = (void __user *)address; + + if (is_vm_hugetlb_page(vma)) + info.si_addr_lsb = huge_page_shift(hstate_vma(vma)); + else + info.si_addr_lsb = PAGE_SHIFT; + + send_sig_info(SIGBUS, &info, current); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1331,6 +1350,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, smp_rmb(); pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); + if (pfn == KVM_PFN_ERR_HWPOISON) { + kvm_send_hwpoison_signal(hva, vma); + return 0; + } if (is_error_noslot_pfn(pfn)) return -EFAULT; diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 4b43e7f3b158..fc8a723ff387 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -203,6 +203,24 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) return reg; } +static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool overflow = !!kvm_pmu_overflow_status(vcpu); + + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow, + &vcpu->arch.pmu); + WARN_ON(ret); + } +} + /** * kvm_pmu_overflow_set - set PMU overflow interrupt * @vcpu: The vcpu pointer @@ -210,37 +228,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) */ void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) { - u64 reg; - if (val == 0) return; vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; - reg = kvm_pmu_overflow_status(vcpu); - if (reg != 0) - kvm_vcpu_kick(vcpu); + kvm_pmu_check_overflow(vcpu); } static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) return; - - overflow = !!kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level == overflow) - return; - - pmu->irq_level = overflow; - - if (likely(irqchip_in_kernel(vcpu->kvm))) { - int ret; - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); - WARN_ON(ret); - } + kvm_pmu_check_overflow(vcpu); } bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) @@ -451,34 +450,74 @@ bool kvm_arm_support_pmu_v3(void) return (perf_num_counters() > 0); } -static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!kvm_arm_support_pmu_v3()) - return -ENODEV; + if (!vcpu->arch.pmu.created) + return 0; /* - * We currently require an in-kernel VGIC to use the PMU emulation, - * because we do not support forwarding PMU overflow interrupts to - * userspace yet. + * A valid interrupt configuration for the PMU is either to have a + * properly configured interrupt number and using an in-kernel + * irqchip, or to not have an in-kernel GIC and not set an IRQ. */ - if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) + if (irqchip_in_kernel(vcpu->kvm)) { + int irq = vcpu->arch.pmu.irq_num; + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -EINVAL; + + /* + * If we are using an in-kernel vgic, at this point we know + * the vgic will be initialized, so we can check the PMU irq + * number against the dimensions of the vgic and make sure + * it's valid. + */ + if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + return -EINVAL; + } else if (kvm_arm_pmu_irq_initialized(vcpu)) { + return -EINVAL; + } + + kvm_pmu_vcpu_reset(vcpu); + vcpu->arch.pmu.ready = true; + + return 0; +} + +static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_support_pmu_v3()) return -ENODEV; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || - !kvm_arm_pmu_irq_initialized(vcpu)) + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENXIO; - if (kvm_arm_pmu_v3_ready(vcpu)) + if (vcpu->arch.pmu.created) return -EBUSY; - kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; + if (irqchip_in_kernel(vcpu->kvm)) { + int ret; + + /* + * If using the PMU with an in-kernel virtual GIC + * implementation, we require the GIC to be already + * initialized when initializing the PMU. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, + &vcpu->arch.pmu); + if (ret) + return ret; + } + + vcpu->arch.pmu.created = true; return 0; } -#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) - /* * For one VM the interrupt type must be same for each vcpu. * As a PPI, the interrupt number is the same for all vcpus, @@ -512,6 +551,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; @@ -519,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EFAULT; /* The PMU overflow interrupt can be a PPI or a valid SPI. */ - if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) + if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; if (!pmu_irq_is_valid(vcpu->kvm, irq)) @@ -546,6 +588,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index a08d7a93aebb..f1e363bab5e8 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) * for KVM will preserve the register state. */ kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); return PSCI_RET_SUCCESS; } @@ -64,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); } static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) @@ -178,10 +181,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + kvm_for_each_vcpu(i, tmp, vcpu->kvm) tmp->arch.power_off = true; - kvm_vcpu_kick(tmp); - } + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c63..b7baf581611a 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level); + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); } /** diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 63e0bbdcddcc..37522e65eb53 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -308,34 +308,36 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, - vgic_mmio_read_rao, vgic_mmio_write_wi, 1, + vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, - vgic_mmio_read_enable, vgic_mmio_write_senable, 1, + vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, + vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, - vgic_mmio_read_pending, vgic_mmio_write_spending, 1, + vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, + vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, - vgic_mmio_read_active, vgic_mmio_write_sactive, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, - vgic_mmio_read_active, vgic_mmio_write_cactive, 1, + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, - vgic_mmio_read_priority, vgic_mmio_write_priority, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, - vgic_mmio_read_target, vgic_mmio_write_target, 8, + vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, - vgic_mmio_read_config, vgic_mmio_write_config, 2, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 201d5e2e973d..714fa3933546 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { vgic_mmio_read_raz, vgic_mmio_write_wi, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, - vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, - vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, - VGIC_ACCESS_32bit), + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, + 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), @@ -526,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_sactive, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_cactive, 4, - VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, + 4, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, + 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, vgic_mmio_read_priority, vgic_mmio_write_priority, 32, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 1c17b2a2f105..c1e4bdd66131 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -231,56 +231,94 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * be migrated while we don't hold the IRQ locks and we don't want to be * chasing moving targets. * - * For private interrupts, we only have to make sure the single and only VCPU - * that can potentially queue the IRQ is stopped. + * For private interrupts we don't have to do anything because userspace + * accesses to the VGIC state already require all VCPUs to be stopped, and + * only the VCPU itself can modify its private interrupts active state, which + * guarantees that the VCPU is not running. */ static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) { - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_halt_vcpu(vcpu); - else + if (intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_halt_guest(vcpu->kvm); } /* See vgic_change_active_prepare */ static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) { - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_resume_vcpu(vcpu); - else + if (intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_resume_guest(vcpu->kvm); } -void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; - vgic_change_active_prepare(vcpu, intid); for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); vgic_put_irq(vcpu->kvm, irq); } - vgic_change_active_finish(vcpu, intid); } -void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; + mutex_lock(&vcpu->kvm->lock); vgic_change_active_prepare(vcpu, intid); + + __vgic_mmio_write_cactive(vcpu, addr, len, val); + + vgic_change_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_cactive(vcpu, addr, len, val); +} + +static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); vgic_put_irq(vcpu->kvm, irq); } +} + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->lock); + vgic_change_active_prepare(vcpu, intid); + + __vgic_mmio_write_sactive(vcpu, addr, len, val); + vgic_change_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_sactive(vcpu, addr, len, val); } unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index ea4171acdef3..5693f6df45ec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; * The _WITH_LENGTH version instantiates registers with a fixed length * and is mutually exclusive with the _PER_IRQ version. */ -#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ { \ .reg_offset = off, \ .bits_per_irq = bpi, \ @@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .access_flags = acc, \ .read = rd, \ .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ } #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ @@ -165,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 030248e669f6..96ea597db0e7 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,6 +21,10 @@ #include "vgic.h" +static bool group0_trap; +static bool group1_trap; +static bool common_trap; + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -258,6 +262,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group0_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL0; + if (group1_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -429,6 +439,26 @@ out: return ret; } +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); + +static int __init early_group0_trap_cfg(char *buf) +{ + return strtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + +static int __init early_group1_trap_cfg(char *buf) +{ + return strtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + +static int __init early_common_trap_cfg(char *buf) +{ + return strtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -480,6 +510,21 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); +#ifdef CONFIG_ARM64 + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + group0_trap = true; + group1_trap = true; + } +#endif + + if (group0_trap || group1_trap || common_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + group0_trap ? "G0" : "", + group1_trap ? "G1" : "", + common_trap ? "C" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } + kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 83b24d20ff8f..fed717e07938 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock - * kvm->lpi_list_lock - * vgic_irq->irq_lock + * kvm->lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock + * kvm->lpi_list_lock + * vgic_irq->irq_lock * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. @@ -234,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) /* * Only valid injection if changing level for level-triggered IRQs or for a - * rising edge. + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. */ -static bool vgic_validate_injection(struct vgic_irq *irq, bool level) +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) { + if (irq->owner != owner) + return false; + switch (irq->config) { case VGIC_CONFIG_LEVEL: return irq->line_level != level; @@ -285,8 +290,10 @@ retry: * won't see this one until it exits for some other * reason. */ - if (vcpu) + if (vcpu) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); + } return false; } @@ -332,6 +339,7 @@ retry: spin_unlock(&irq->irq_lock); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); return true; @@ -346,13 +354,16 @@ retry: * false: to ignore the call * Level-sensitive true: raise the input signal * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. * * The VGIC is not concerned with devices being active-LOW or active-HIGH for * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) + bool level, void *owner) { struct kvm_vcpu *vcpu; struct vgic_irq *irq; @@ -374,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, spin_lock(&irq->irq_lock); - if (!vgic_validate_injection(irq, level)) { + if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ spin_unlock(&irq->irq_lock); vgic_put_irq(kvm, irq); @@ -431,6 +442,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) } /** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + spin_lock(&irq->irq_lock); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + spin_unlock(&irq->irq_lock); + + return ret; +} + +/** * vgic_prune_ap_list - Remove non-relevant interrupts from the list * * @vcpu: The VCPU pointer @@ -721,8 +765,10 @@ void vgic_kick_vcpus(struct kvm *kvm) * a good kick... */ kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) + if (kvm_vgic_vcpu_pending_irq(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); + } } } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0fe9d02f6bb..19f0ecb9b93e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -73,17 +73,17 @@ MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns); /* Default doubles per-vcpu halt_poll_ns. */ unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns_grow, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_grow); /* Default resets per-vcpu halt_poll_ns . */ unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns_shrink, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); /* @@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return PTR_ERR(file); } + /* + * Don't call kvm_put_kvm anymore at this point; file->f_op is + * already set, with ->release() being kvm_vm_release(). In error + * cases it will be called by the final fput(file) and will take + * care of doing kvm_put_kvm(kvm). + */ if (kvm_create_vm_debugfs(kvm, r) < 0) { put_unused_fd(r); fput(file); |