diff options
Diffstat (limited to 'arch/arm64/kvm/arm.c')
-rw-r--r-- | arch/arm64/kvm/arm.c | 141 |
1 files changed, 101 insertions, 40 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 646e806c6ca6..68fec8c95fee 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -125,6 +125,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->slots_lock); break; + case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + r = 0; + set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags); + } + mutex_unlock(&kvm->lock); + break; default: break; } @@ -313,6 +321,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SYSTEM_SUSPEND: case KVM_CAP_IRQFD_RESAMPLE: case KVM_CAP_COUNTER_OFFSET: + case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -366,7 +375,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = get_num_wrps(); break; case KVM_CAP_ARM_PMU_V3: - r = kvm_arm_support_pmu_v3(); + r = kvm_supports_guest_pmuv3(); break; case KVM_CAP_ARM_INJECT_SERROR_ESR: r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); @@ -466,7 +475,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) return err; - return kvm_share_hyp(vcpu, vcpu + 1); + err = kvm_share_hyp(vcpu, vcpu + 1); + if (err) + kvm_vgic_vcpu_destroy(vcpu); + + return err; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -560,6 +573,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* + * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2, + * which happens eagerly in VHE. + * + * Also, the VMID allocator only preserves VMIDs that are active at the + * time of rollover, so KVM might need to grab a new VMID for the MMU if + * this is called from kvm_sched_in(). + */ + kvm_arm_vmid_update(&mmu->vmid); + + /* * We guarantee that both TLBs and I-cache are private to each * vcpu. If detecting that a vcpu from the same VM has * previously run on the same physical CPU, call into the @@ -576,8 +599,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) nommu: vcpu->cpu = cpu; - kvm_vgic_load(vcpu); + /* + * The timer must be loaded before the vgic to correctly set up physical + * interrupt deactivation in nested state (e.g. timer interrupt). + */ kvm_timer_vcpu_load(vcpu); + kvm_vgic_load(vcpu); kvm_vcpu_load_debug(vcpu); if (has_vhe()) kvm_vcpu_load_vhe(vcpu); @@ -815,6 +842,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; + if (vcpu_has_nv(vcpu)) { + ret = kvm_vgic_vcpu_nv_init(vcpu); + if (ret) + return ret; + } + /* * This needs to happen after any restriction has been applied * to the feature set. @@ -825,14 +858,20 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; - ret = kvm_arm_pmu_v3_enable(vcpu); - if (ret) - return ret; + if (kvm_vcpu_has_pmu(vcpu)) { + ret = kvm_arm_pmu_v3_enable(vcpu); + if (ret) + return ret; + } if (is_protected_kvm_enabled()) { ret = pkvm_create_hyp_vm(kvm); if (ret) return ret; + + ret = pkvm_create_hyp_vcpu(vcpu); + if (ret) + return ret; } mutex_lock(&kvm->arch.config_lock); @@ -1138,19 +1177,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); - /* - * The VMID allocator only tracks active VMIDs per - * physical CPU, and therefore the VMID allocated may not be - * preserved on VMID roll-over if the task was preempted, - * making a thread's VMID inactive. So we need to call - * kvm_arm_vmid_update() in non-premptible context. - */ - if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) && - has_vhe()) - __load_stage2(vcpu->arch.hw_mmu, - vcpu->arch.hw_mmu->arch); - - kvm_pmu_flush_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) + kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -1169,7 +1197,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ - kvm_pmu_sync_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) + kvm_pmu_sync_hwstate(vcpu); if (unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); @@ -1199,7 +1228,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * that the vgic can properly sample the updated state of the * interrupt line. */ - kvm_pmu_sync_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) + kvm_pmu_sync_hwstate(vcpu); /* * Sync the vgic state before syncing the timer state because @@ -1388,7 +1418,7 @@ static unsigned long system_supported_vcpu_features(void) if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); - if (!kvm_arm_support_pmu_v3()) + if (!kvm_supports_guest_pmuv3()) clear_bit(KVM_ARM_VCPU_PMU_V3, &features); if (!system_supports_sve()) @@ -1980,7 +2010,7 @@ static int kvm_init_vector_slots(void) static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); - unsigned long tcr, ips; + unsigned long tcr; /* * Calculate the raw per-cpu offset without a translation from the @@ -1994,19 +2024,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->mair_el2 = read_sysreg(mair_el1); tcr = read_sysreg(tcr_el1); - ips = FIELD_GET(TCR_IPS_MASK, tcr); if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK); tcr |= TCR_EPD1_MASK; } else { + unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr); + tcr &= TCR_EL2_MASK; - tcr |= TCR_EL2_RES1; + tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips); + if (lpa2_is_enabled()) + tcr |= TCR_EL2_DS; } - tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits); - tcr &= ~TCR_EL2_PS_MASK; - tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips); - if (lpa2_is_enabled()) - tcr |= TCR_EL2_DS; params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); @@ -2290,6 +2319,19 @@ static int __init init_subsystems(void) break; case -ENODEV: case -ENXIO: + /* + * No VGIC? No pKVM for you. + * + * Protected mode assumes that VGICv3 is present, so no point + * in trying to hobble along if vgic initialization fails. + */ + if (is_protected_kvm_enabled()) + goto out; + + /* + * Otherwise, userspace could choose to implement a GIC for its + * guest on non-cooperative hardware. + */ vgic_present = false; err = 0; break; @@ -2297,6 +2339,13 @@ static int __init init_subsystems(void) goto out; } + if (kvm_mode == KVM_MODE_NV && + !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { + kvm_err("NV support requires GICv3, giving up\n"); + err = -EINVAL; + goto out; + } + /* * Init HYP architected timer support */ @@ -2400,6 +2449,13 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; + + /* + * Flush entire BSS since part of its data containing init symbols is read + * while the MMU is off. + */ + kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start), + kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start)); } static int __init kvm_hyp_init_protection(u32 hyp_va_bits) @@ -2461,14 +2517,6 @@ static void finalize_init_hyp_mode(void) per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state); } - } else { - for_each_possible_cpu(cpu) { - struct user_fpsimd_state *fpsimd_state; - - fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs; - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state = - kern_hyp_va(fpsimd_state); - } } } @@ -2705,6 +2753,14 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry; + + /* + * The only thing we have a chance of directly-injecting is LPIs. Maybe + * one day... + */ + if (irq_entry->type != KVM_IRQ_ROUTING_MSI) + return 0; return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, &irqfd->irq_entry); @@ -2714,6 +2770,10 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry; + + if (irq_entry->type != KVM_IRQ_ROUTING_MSI) + return; kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, &irqfd->irq_entry); @@ -2794,11 +2854,12 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - kvm_info("%s%sVHE mode initialized successfully\n", + kvm_info("%s%sVHE%s mode initialized successfully\n", in_hyp_mode ? "" : (is_protected_kvm_enabled() ? "Protected " : "Hyp "), in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? - "h" : "n")); + "h" : "n"), + cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": ""); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM |