summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2024-11-14 07:05:36 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2024-11-14 07:05:36 -0500
commit7b541d557f705c7e5bcd874f3b960c8fb8dee562 (patch)
tree68139f92a8dfab7dd4e23f44ee65fec910faabc2 /arch/arm64/kvm
parentb467ab82a9fde4b46c0cd2c299220857afb0e0d4 (diff)
parent60ad25e14ab5a4e56c8bf7f7d6846eacb9cd53df (diff)
downloadlwn-7b541d557f705c7e5bcd874f3b960c8fb8dee562.tar.gz
lwn-7b541d557f705c7e5bcd874f3b960c8fb8dee562.zip
Merge tag 'kvmarm-6.13' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 changes for 6.13, part #1 - Support for stage-1 permission indirection (FEAT_S1PIE) and permission overlays (FEAT_S1POE), including nested virt + the emulated page table walker - Introduce PSCI SYSTEM_OFF2 support to KVM + client driver. This call was introduced in PSCIv1.3 as a mechanism to request hibernation, similar to the S4 state in ACPI - Explicitly trap + hide FEAT_MPAM (QoS controls) from KVM guests. As part of it, introduce trivial initialization of the host's MPAM context so KVM can use the corresponding traps - PMU support under nested virtualization, honoring the guest hypervisor's trap configuration and event filtering when running a nested guest - Fixes to vgic ITS serialization where stale device/interrupt table entries are not zeroed when the mapping is invalidated by the VM - Avoid emulated MMIO completion if userspace has requested synchronous external abort injection - Various fixes and cleanups affecting pKVM, vCPU initialization, and selftests
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/arch_timer.c3
-rw-r--r--arch/arm64/kvm/arm.c26
-rw-r--r--arch/arm64/kvm/at.c470
-rw-r--r--arch/arm64/kvm/emulate-nested.c301
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h31
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h11
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c116
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c3
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c160
-rw-r--r--arch/arm64/kvm/hypercalls.c2
-rw-r--r--arch/arm64/kvm/mmio.c32
-rw-r--r--arch/arm64/kvm/nested.c82
-rw-r--r--arch/arm64/kvm/pmu-emul.c143
-rw-r--r--arch/arm64/kvm/psci.c44
-rw-r--r--arch/arm64/kvm/reset.c5
-rw-r--r--arch/arm64/kvm/sys_regs.c309
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c32
-rw-r--r--arch/arm64/kvm/vgic/vgic.h23
23 files changed, 1466 insertions, 365 deletions
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 879982b1cc73..1215df590418 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -206,8 +206,7 @@ void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
static inline bool userspace_irqchip(struct kvm *kvm)
{
- return static_branch_unlikely(&userspace_irqchip_in_use) &&
- unlikely(!irqchip_in_kernel(kvm));
+ return unlikely(!irqchip_in_kernel(kvm));
}
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 48cafb65d6ac..a102c3aebdbc 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -69,7 +69,6 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
static bool vgic_present, kvm_arm_initialised;
static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized);
-DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
bool is_kvm_arm_initialised(void)
{
@@ -503,9 +502,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
- static_branch_dec(&userspace_irqchip_in_use);
-
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
@@ -848,22 +844,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
- if (!irqchip_in_kernel(kvm)) {
- /*
- * Tell the rest of the code that there are userspace irqchip
- * VMs in the wild.
- */
- static_branch_inc(&userspace_irqchip_in_use);
- }
-
- /*
- * Initialize traps for protected VMs.
- * NOTE: Move to run in EL2 directly, rather than via a hypercall, once
- * the code is in place for first run initialization at EL2.
- */
- if (kvm_vm_is_protected(kvm))
- kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
-
mutex_lock(&kvm->arch.config_lock);
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
mutex_unlock(&kvm->arch.config_lock);
@@ -1077,7 +1057,7 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
* state gets updated in kvm_timer_update_run and
* kvm_pmu_update_run below).
*/
- if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
if (kvm_timer_should_notify_user(vcpu) ||
kvm_pmu_should_notify_user(vcpu)) {
*ret = -EINTR;
@@ -1199,7 +1179,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
- if (static_branch_unlikely(&userspace_irqchip_in_use))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
@@ -1245,7 +1225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* we don't want vtimer interrupts to race with syncing the
* timer virtual interrupt state.
*/
- if (static_branch_unlikely(&userspace_irqchip_in_use))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_sync_user(vcpu);
kvm_arch_vcpu_ctxsync_fp(vcpu);
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 39f0e87a340e..8c5d7990e5b3 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -24,6 +24,9 @@ struct s1_walk_info {
unsigned int txsz;
int sl;
bool hpd;
+ bool e0poe;
+ bool poe;
+ bool pan;
bool be;
bool s2;
};
@@ -37,6 +40,16 @@ struct s1_walk_result {
u8 APTable;
bool UXNTable;
bool PXNTable;
+ bool uwxn;
+ bool uov;
+ bool ur;
+ bool uw;
+ bool ux;
+ bool pwxn;
+ bool pov;
+ bool pr;
+ bool pw;
+ bool px;
};
struct {
u8 fst;
@@ -87,6 +100,51 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
}
}
+static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ if (!kvm_has_s1pie(vcpu->kvm))
+ return false;
+
+ switch (regime) {
+ case TR_EL2:
+ case TR_EL20:
+ return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
+ case TR_EL10:
+ return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
+ (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
+ default:
+ BUG();
+ }
+}
+
+static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
+{
+ u64 val;
+
+ if (!kvm_has_s1poe(vcpu->kvm)) {
+ wi->poe = wi->e0poe = false;
+ return;
+ }
+
+ switch (wi->regime) {
+ case TR_EL2:
+ case TR_EL20:
+ val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
+ wi->poe = val & TCR2_EL2_POE;
+ wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
+ break;
+ case TR_EL10:
+ if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
+ wi->poe = wi->e0poe = false;
+ return;
+ }
+
+ val = __vcpu_sys_reg(vcpu, TCR2_EL1);
+ wi->poe = val & TCR2_EL1x_POE;
+ wi->e0poe = val & TCR2_EL1x_E0POE;
+ }
+}
+
static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
@@ -98,6 +156,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
wi->regime = compute_translation_regime(vcpu, op);
as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
+ wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
+ (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
va55 = va & BIT(55);
@@ -180,6 +240,14 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
(va55 ?
FIELD_GET(TCR_HPD1, tcr) :
FIELD_GET(TCR_HPD0, tcr)));
+ /* R_JHSVW */
+ wi->hpd |= s1pie_enabled(vcpu, wi->regime);
+
+ /* Do we have POE? */
+ compute_s1poe(vcpu, wi);
+
+ /* R_BVXDG */
+ wi->hpd |= (wi->poe || wi->e0poe);
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55) {
@@ -412,6 +480,11 @@ struct mmu_config {
u64 ttbr1;
u64 tcr;
u64 mair;
+ u64 tcr2;
+ u64 pir;
+ u64 pire0;
+ u64 por_el0;
+ u64 por_el1;
u64 sctlr;
u64 vttbr;
u64 vtcr;
@@ -424,6 +497,17 @@ static void __mmu_config_save(struct mmu_config *config)
config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
config->tcr = read_sysreg_el1(SYS_TCR);
config->mair = read_sysreg_el1(SYS_MAIR);
+ if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
+ config->tcr2 = read_sysreg_el1(SYS_TCR2);
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ config->pir = read_sysreg_el1(SYS_PIR);
+ config->pire0 = read_sysreg_el1(SYS_PIRE0);
+ }
+ if (system_supports_poe()) {
+ config->por_el1 = read_sysreg_el1(SYS_POR);
+ config->por_el0 = read_sysreg_s(SYS_POR_EL0);
+ }
+ }
config->sctlr = read_sysreg_el1(SYS_SCTLR);
config->vttbr = read_sysreg(vttbr_el2);
config->vtcr = read_sysreg(vtcr_el2);
@@ -444,6 +528,17 @@ static void __mmu_config_restore(struct mmu_config *config)
write_sysreg_el1(config->ttbr1, SYS_TTBR1);
write_sysreg_el1(config->tcr, SYS_TCR);
write_sysreg_el1(config->mair, SYS_MAIR);
+ if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
+ write_sysreg_el1(config->tcr2, SYS_TCR2);
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ write_sysreg_el1(config->pir, SYS_PIR);
+ write_sysreg_el1(config->pire0, SYS_PIRE0);
+ }
+ if (system_supports_poe()) {
+ write_sysreg_el1(config->por_el1, SYS_POR);
+ write_sysreg_s(config->por_el0, SYS_POR_EL0);
+ }
+ }
write_sysreg_el1(config->sctlr, SYS_SCTLR);
write_sysreg(config->vttbr, vttbr_el2);
write_sysreg(config->vtcr, vtcr_el2);
@@ -739,6 +834,9 @@ static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
return false;
+ if (s1pie_enabled(vcpu, regime))
+ return true;
+
if (regime == TR_EL10)
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
else
@@ -747,111 +845,343 @@ static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
return sctlr & SCTLR_EL1_EPAN;
}
-static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
{
- bool perm_fail, ur, uw, ux, pr, pw, px;
- struct s1_walk_result wr = {};
- struct s1_walk_info wi = {};
- int ret, idx;
+ bool wxn;
- ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
- if (ret)
- goto compute_par;
-
- if (wr.level == S1_MMU_DISABLED)
- goto compute_par;
-
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
- ret = walk_s1(vcpu, &wi, &wr, vaddr);
-
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
- if (ret)
- goto compute_par;
-
- /* FIXME: revisit when adding indirect permission support */
- /* AArch64.S1DirectBasePermissions() */
- if (wi.regime != TR_EL2) {
- switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
+ /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
+ if (wi->regime != TR_EL2) {
+ switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
case 0b00:
- pr = pw = true;
- ur = uw = false;
+ wr->pr = wr->pw = true;
+ wr->ur = wr->uw = false;
break;
case 0b01:
- pr = pw = ur = uw = true;
+ wr->pr = wr->pw = wr->ur = wr->uw = true;
break;
case 0b10:
- pr = true;
- pw = ur = uw = false;
+ wr->pr = true;
+ wr->pw = wr->ur = wr->uw = false;
break;
case 0b11:
- pr = ur = true;
- pw = uw = false;
+ wr->pr = wr->ur = true;
+ wr->pw = wr->uw = false;
break;
}
- switch (wr.APTable) {
+ /* We don't use px for anything yet, but hey... */
+ wr->px = !((wr->desc & PTE_PXN) || wr->uw);
+ wr->ux = !(wr->desc & PTE_UXN);
+ } else {
+ wr->ur = wr->uw = wr->ux = false;
+
+ if (!(wr->desc & PTE_RDONLY)) {
+ wr->pr = wr->pw = true;
+ } else {
+ wr->pr = true;
+ wr->pw = false;
+ }
+
+ /* XN maps to UXN */
+ wr->px = !(wr->desc & PTE_UXN);
+ }
+
+ switch (wi->regime) {
+ case TR_EL2:
+ case TR_EL20:
+ wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
+ break;
+ case TR_EL10:
+ wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
+ break;
+ }
+
+ wr->pwxn = wr->uwxn = wxn;
+ wr->pov = wi->poe;
+ wr->uov = wi->e0poe;
+}
+
+static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ /* Hierarchical part of AArch64.S1DirectBasePermissions() */
+ if (wi->regime != TR_EL2) {
+ switch (wr->APTable) {
case 0b00:
break;
case 0b01:
- ur = uw = false;
+ wr->ur = wr->uw = false;
break;
case 0b10:
- pw = uw = false;
+ wr->pw = wr->uw = false;
break;
case 0b11:
- pw = ur = uw = false;
+ wr->pw = wr->ur = wr->uw = false;
break;
}
- /* We don't use px for anything yet, but hey... */
- px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
- ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
+ wr->px &= !wr->PXNTable;
+ wr->ux &= !wr->UXNTable;
+ } else {
+ if (wr->APTable & BIT(1))
+ wr->pw = false;
+
+ /* XN maps to UXN */
+ wr->px &= !wr->UXNTable;
+ }
+}
- if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
- bool pan;
+#define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
+
+#define set_priv_perms(wr, r, w, x) \
+ do { \
+ (wr)->pr = (r); \
+ (wr)->pw = (w); \
+ (wr)->px = (x); \
+ } while (0)
+
+#define set_unpriv_perms(wr, r, w, x) \
+ do { \
+ (wr)->ur = (r); \
+ (wr)->uw = (w); \
+ (wr)->ux = (x); \
+ } while (0)
+
+#define set_priv_wxn(wr, v) \
+ do { \
+ (wr)->pwxn = (v); \
+ } while (0)
+
+#define set_unpriv_wxn(wr, v) \
+ do { \
+ (wr)->uwxn = (v); \
+ } while (0)
+
+/* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
+#define set_perms(w, wr, ip) \
+ do { \
+ /* R_LLZDZ */ \
+ switch ((ip)) { \
+ case 0b0000: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b0001: \
+ set_ ## w ## _perms((wr), true , false, false); \
+ break; \
+ case 0b0010: \
+ set_ ## w ## _perms((wr), false, false, true ); \
+ break; \
+ case 0b0011: \
+ set_ ## w ## _perms((wr), true , false, true ); \
+ break; \
+ case 0b0100: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b0101: \
+ set_ ## w ## _perms((wr), true , true , false); \
+ break; \
+ case 0b0110: \
+ set_ ## w ## _perms((wr), true , true , true ); \
+ break; \
+ case 0b0111: \
+ set_ ## w ## _perms((wr), true , true , true ); \
+ break; \
+ case 0b1000: \
+ set_ ## w ## _perms((wr), true , false, false); \
+ break; \
+ case 0b1001: \
+ set_ ## w ## _perms((wr), true , false, false); \
+ break; \
+ case 0b1010: \
+ set_ ## w ## _perms((wr), true , false, true ); \
+ break; \
+ case 0b1011: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b1100: \
+ set_ ## w ## _perms((wr), true , true , false); \
+ break; \
+ case 0b1101: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b1110: \
+ set_ ## w ## _perms((wr), true , true , true ); \
+ break; \
+ case 0b1111: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ } \
+ \
+ /* R_HJYGR */ \
+ set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
+ \
+ } while (0)
+
+static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ u8 up, pp, idx;
- pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
- pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
- pw &= !pan;
- pr &= !pan;
- }
- } else {
- ur = uw = ux = false;
+ idx = pte_pi_index(wr->desc);
- if (!(wr.desc & PTE_RDONLY)) {
- pr = pw = true;
- } else {
- pr = true;
- pw = false;
- }
+ switch (wi->regime) {
+ case TR_EL10:
+ pp = perm_idx(vcpu, PIR_EL1, idx);
+ up = perm_idx(vcpu, PIRE0_EL1, idx);
+ break;
+ case TR_EL20:
+ pp = perm_idx(vcpu, PIR_EL2, idx);
+ up = perm_idx(vcpu, PIRE0_EL2, idx);
+ break;
+ case TR_EL2:
+ pp = perm_idx(vcpu, PIR_EL2, idx);
+ up = 0;
+ break;
+ }
- if (wr.APTable & BIT(1))
- pw = false;
+ set_perms(priv, wr, pp);
- /* XN maps to UXN */
- px = !((wr.desc & PTE_UXN) || wr.UXNTable);
+ if (wi->regime != TR_EL2)
+ set_perms(unpriv, wr, up);
+ else
+ set_unpriv_perms(wr, false, false, false);
+
+ wr->pov = wi->poe && !(pp & BIT(3));
+ wr->uov = wi->e0poe && !(up & BIT(3));
+
+ /* R_VFPJF */
+ if (wr->px && wr->uw) {
+ set_priv_perms(wr, false, false, false);
+ set_unpriv_perms(wr, false, false, false);
+ }
+}
+
+static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ u8 idx, pov_perms, uov_perms;
+
+ idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
+
+ switch (wi->regime) {
+ case TR_EL10:
+ pov_perms = perm_idx(vcpu, POR_EL1, idx);
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL20:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL2:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ uov_perms = 0;
+ break;
+ }
+
+ if (pov_perms & ~POE_RXW)
+ pov_perms = POE_NONE;
+
+ if (wi->poe && wr->pov) {
+ wr->pr &= pov_perms & POE_R;
+ wr->px &= pov_perms & POE_X;
+ wr->pw &= pov_perms & POE_W;
+ }
+
+ if (uov_perms & ~POE_RXW)
+ uov_perms = POE_NONE;
+
+ if (wi->e0poe && wr->uov) {
+ wr->ur &= uov_perms & POE_R;
+ wr->ux &= uov_perms & POE_X;
+ wr->uw &= uov_perms & POE_W;
}
+}
+
+static void compute_s1_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ bool pan;
+
+ if (!s1pie_enabled(vcpu, wi->regime))
+ compute_s1_direct_permissions(vcpu, wi, wr);
+ else
+ compute_s1_indirect_permissions(vcpu, wi, wr);
+
+ if (!wi->hpd)
+ compute_s1_hierarchical_permissions(vcpu, wi, wr);
+
+ if (wi->poe || wi->e0poe)
+ compute_s1_overlay_permissions(vcpu, wi, wr);
+
+ /* R_QXXPC */
+ if (wr->pwxn) {
+ if (!wr->pov && wr->pw)
+ wr->px = false;
+ if (wr->pov && wr->px)
+ wr->pw = false;
+ }
+
+ /* R_NPBXC */
+ if (wr->uwxn) {
+ if (!wr->uov && wr->uw)
+ wr->ux = false;
+ if (wr->uov && wr->ux)
+ wr->uw = false;
+ }
+
+ pan = wi->pan && (wr->ur || wr->uw ||
+ (pan3_enabled(vcpu, wi->regime) && wr->ux));
+ wr->pw &= !pan;
+ wr->pr &= !pan;
+}
- perm_fail = false;
+static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ struct s1_walk_result wr = {};
+ struct s1_walk_info wi = {};
+ bool perm_fail = false;
+ int ret, idx;
+
+ ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
+ if (ret)
+ goto compute_par;
+
+ if (wr.level == S1_MMU_DISABLED)
+ goto compute_par;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = walk_s1(vcpu, &wi, &wr, vaddr);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (ret)
+ goto compute_par;
+
+ compute_s1_permissions(vcpu, &wi, &wr);
switch (op) {
case OP_AT_S1E1RP:
case OP_AT_S1E1R:
case OP_AT_S1E2R:
- perm_fail = !pr;
+ perm_fail = !wr.pr;
break;
case OP_AT_S1E1WP:
case OP_AT_S1E1W:
case OP_AT_S1E2W:
- perm_fail = !pw;
+ perm_fail = !wr.pw;
break;
case OP_AT_S1E0R:
- perm_fail = !ur;
+ perm_fail = !wr.ur;
break;
case OP_AT_S1E0W:
- perm_fail = !uw;
+ perm_fail = !wr.uw;
break;
case OP_AT_S1E1A:
case OP_AT_S1E2A:
@@ -914,6 +1244,17 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
+ if (kvm_has_tcr2(vcpu->kvm)) {
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
+ if (kvm_has_s1pie(vcpu->kvm)) {
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
+ }
+ if (kvm_has_s1poe(vcpu->kvm)) {
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
+ write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
+ }
+ }
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
__load_stage2(mmu, mmu->arch);
@@ -992,12 +1333,9 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
* switching context behind everybody's back, disable interrupts...
*/
scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
- struct kvm_s2_mmu *mmu;
u64 val, hcr;
bool fail;
- mmu = &vcpu->kvm->arch.mmu;
-
val = hcr = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
val |= HCR_VM;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 05b6435d02a9..1ffbfd1c3cf2 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -16,9 +16,13 @@
enum trap_behaviour {
BEHAVE_HANDLE_LOCALLY = 0,
+
BEHAVE_FORWARD_READ = BIT(0),
BEHAVE_FORWARD_WRITE = BIT(1),
- BEHAVE_FORWARD_ANY = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+ BEHAVE_FORWARD_RW = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+
+ /* Traps that take effect in Host EL0, this is rare! */
+ BEHAVE_FORWARD_IN_HOST_EL0 = BIT(2),
};
struct trap_bits {
@@ -79,7 +83,6 @@ enum cgt_group_id {
CGT_MDCR_E2TB,
CGT_MDCR_TDCC,
- CGT_CPACR_E0POE,
CGT_CPTR_TAM,
CGT_CPTR_TCPAC,
@@ -106,6 +109,7 @@ enum cgt_group_id {
CGT_HCR_TPU_TOCU,
CGT_HCR_NV1_nNV2_ENSCXT,
CGT_MDCR_TPM_TPMCR,
+ CGT_MDCR_TPM_HPMN,
CGT_MDCR_TDE_TDA,
CGT_MDCR_TDE_TDOSA,
CGT_MDCR_TDE_TDRA,
@@ -122,6 +126,7 @@ enum cgt_group_id {
CGT_CNTHCTL_EL1PTEN,
CGT_CPTR_TTA,
+ CGT_MDCR_HPMN,
/* Must be last */
__NR_CGT_GROUP_IDS__
@@ -138,7 +143,7 @@ static const struct trap_bits coarse_trap_bits[] = {
.index = HCR_EL2,
.value = HCR_TID2,
.mask = HCR_TID2,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TID3] = {
.index = HCR_EL2,
@@ -162,37 +167,37 @@ static const struct trap_bits coarse_trap_bits[] = {
.index = HCR_EL2,
.value = HCR_TIDCP,
.mask = HCR_TIDCP,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TACR] = {
.index = HCR_EL2,
.value = HCR_TACR,
.mask = HCR_TACR,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TSW] = {
.index = HCR_EL2,
.value = HCR_TSW,
.mask = HCR_TSW,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */
.index = HCR_EL2,
.value = HCR_TPC,
.mask = HCR_TPC,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TPU] = {
.index = HCR_EL2,
.value = HCR_TPU,
.mask = HCR_TPU,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TTLB] = {
.index = HCR_EL2,
.value = HCR_TTLB,
.mask = HCR_TTLB,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TVM] = {
.index = HCR_EL2,
@@ -204,7 +209,7 @@ static const struct trap_bits coarse_trap_bits[] = {
.index = HCR_EL2,
.value = HCR_TDZ,
.mask = HCR_TDZ,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TRVM] = {
.index = HCR_EL2,
@@ -216,205 +221,201 @@ static const struct trap_bits coarse_trap_bits[] = {
.index = HCR_EL2,
.value = HCR_TLOR,
.mask = HCR_TLOR,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TERR] = {
.index = HCR_EL2,
.value = HCR_TERR,
.mask = HCR_TERR,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_APK] = {
.index = HCR_EL2,
.value = 0,
.mask = HCR_APK,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_NV] = {
.index = HCR_EL2,
.value = HCR_NV,
.mask = HCR_NV,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_NV_nNV2] = {
.index = HCR_EL2,
.value = HCR_NV,
.mask = HCR_NV | HCR_NV2,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_NV1_nNV2] = {
.index = HCR_EL2,
.value = HCR_NV | HCR_NV1,
.mask = HCR_NV | HCR_NV1 | HCR_NV2,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_AT] = {
.index = HCR_EL2,
.value = HCR_AT,
.mask = HCR_AT,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_nFIEN] = {
.index = HCR_EL2,
.value = 0,
.mask = HCR_FIEN,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TID4] = {
.index = HCR_EL2,
.value = HCR_TID4,
.mask = HCR_TID4,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TICAB] = {
.index = HCR_EL2,
.value = HCR_TICAB,
.mask = HCR_TICAB,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TOCU] = {
.index = HCR_EL2,
.value = HCR_TOCU,
.mask = HCR_TOCU,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_ENSCXT] = {
.index = HCR_EL2,
.value = 0,
.mask = HCR_ENSCXT,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TTLBIS] = {
.index = HCR_EL2,
.value = HCR_TTLBIS,
.mask = HCR_TTLBIS,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCR_TTLBOS] = {
.index = HCR_EL2,
.value = HCR_TTLBOS,
.mask = HCR_TTLBOS,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TPMCR] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TPMCR,
.mask = MDCR_EL2_TPMCR,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW |
+ BEHAVE_FORWARD_IN_HOST_EL0,
},
[CGT_MDCR_TPM] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TPM,
.mask = MDCR_EL2_TPM,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW |
+ BEHAVE_FORWARD_IN_HOST_EL0,
},
[CGT_MDCR_TDE] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TDE,
.mask = MDCR_EL2_TDE,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TDA] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TDA,
.mask = MDCR_EL2_TDA,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TDOSA] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TDOSA,
.mask = MDCR_EL2_TDOSA,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TDRA] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TDRA,
.mask = MDCR_EL2_TDRA,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_E2PB] = {
.index = MDCR_EL2,
.value = 0,
.mask = BIT(MDCR_EL2_E2PB_SHIFT),
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TPMS] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TPMS,
.mask = MDCR_EL2_TPMS,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TTRF] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TTRF,
.mask = MDCR_EL2_TTRF,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_E2TB] = {
.index = MDCR_EL2,
.value = 0,
.mask = BIT(MDCR_EL2_E2TB_SHIFT),
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_MDCR_TDCC] = {
.index = MDCR_EL2,
.value = MDCR_EL2_TDCC,
.mask = MDCR_EL2_TDCC,
- .behaviour = BEHAVE_FORWARD_ANY,
- },
- [CGT_CPACR_E0POE] = {
- .index = CPTR_EL2,
- .value = CPACR_ELx_E0POE,
- .mask = CPACR_ELx_E0POE,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_CPTR_TAM] = {
.index = CPTR_EL2,
.value = CPTR_EL2_TAM,
.mask = CPTR_EL2_TAM,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_CPTR_TCPAC] = {
.index = CPTR_EL2,
.value = CPTR_EL2_TCPAC,
.mask = CPTR_EL2_TCPAC,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCRX_EnFPM] = {
.index = HCRX_EL2,
.value = 0,
.mask = HCRX_EL2_EnFPM,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_HCRX_TCR2En] = {
.index = HCRX_EL2,
.value = 0,
.mask = HCRX_EL2_TCR2En,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TC] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TC,
.mask = ICH_HCR_TC,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TALL0] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TALL0,
.mask = ICH_HCR_TALL0,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TALL1] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TALL1,
.mask = ICH_HCR_TALL1,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
[CGT_ICH_HCR_TDIR] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TDIR,
.mask = ICH_HCR_TDIR,
- .behaviour = BEHAVE_FORWARD_ANY,
+ .behaviour = BEHAVE_FORWARD_RW,
},
};
@@ -435,6 +436,7 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU),
MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
MCB(CGT_MDCR_TPM_TPMCR, CGT_MDCR_TPM, CGT_MDCR_TPMCR),
+ MCB(CGT_MDCR_TPM_HPMN, CGT_MDCR_TPM, CGT_MDCR_HPMN),
MCB(CGT_MDCR_TDE_TDA, CGT_MDCR_TDE, CGT_MDCR_TDA),
MCB(CGT_MDCR_TDE_TDOSA, CGT_MDCR_TDE, CGT_MDCR_TDOSA),
MCB(CGT_MDCR_TDE_TDRA, CGT_MDCR_TDE, CGT_MDCR_TDRA),
@@ -474,7 +476,7 @@ static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu)
if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10))
return BEHAVE_HANDLE_LOCALLY;
- return BEHAVE_FORWARD_ANY;
+ return BEHAVE_FORWARD_RW;
}
static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
@@ -482,7 +484,7 @@ static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10))
return BEHAVE_HANDLE_LOCALLY;
- return BEHAVE_FORWARD_ANY;
+ return BEHAVE_FORWARD_RW;
}
static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu)
@@ -493,7 +495,35 @@ static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu)
val = translate_cptr_el2_to_cpacr_el1(val);
if (val & CPACR_ELx_TTA)
- return BEHAVE_FORWARD_ANY;
+ return BEHAVE_FORWARD_RW;
+
+ return BEHAVE_HANDLE_LOCALLY;
+}
+
+static enum trap_behaviour check_mdcr_hpmn(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ unsigned int idx;
+
+
+ switch (sysreg) {
+ case SYS_PMEVTYPERn_EL0(0) ... SYS_PMEVTYPERn_EL0(30):
+ case SYS_PMEVCNTRn_EL0(0) ... SYS_PMEVCNTRn_EL0(30):
+ idx = (sys_reg_CRm(sysreg) & 0x3) << 3 | sys_reg_Op2(sysreg);
+ break;
+ case SYS_PMXEVTYPER_EL0:
+ case SYS_PMXEVCNTR_EL0:
+ idx = SYS_FIELD_GET(PMSELR_EL0, SEL,
+ __vcpu_sys_reg(vcpu, PMSELR_EL0));
+ break;
+ default:
+ /* Someone used this trap helper for something else... */
+ KVM_BUG_ON(1, vcpu->kvm);
+ return BEHAVE_HANDLE_LOCALLY;
+ }
+
+ if (kvm_pmu_counter_is_hyp(vcpu, idx))
+ return BEHAVE_FORWARD_RW | BEHAVE_FORWARD_IN_HOST_EL0;
return BEHAVE_HANDLE_LOCALLY;
}
@@ -505,6 +535,7 @@ static const complex_condition_check ccc[] = {
CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
CCC(CGT_CPTR_TTA, check_cptr_tta),
+ CCC(CGT_MDCR_HPMN, check_mdcr_hpmn),
};
/*
@@ -711,6 +742,10 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_MAIR_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_AMAIR_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_CONTEXTIDR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_PIR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_PIRE0_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_POR_EL0, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_POR_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TCR2_EL1, CGT_HCR_TVM_TRVM_HCRX_TCR2En),
SR_TRAP(SYS_DC_ZVA, CGT_HCR_TDZ),
SR_TRAP(SYS_DC_GVA, CGT_HCR_TDZ),
@@ -919,77 +954,77 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_PMOVSCLR_EL0, CGT_MDCR_TPM),
SR_TRAP(SYS_PMCEID0_EL0, CGT_MDCR_TPM),
SR_TRAP(SYS_PMCEID1_EL0, CGT_MDCR_TPM),
- SR_TRAP(SYS_PMXEVTYPER_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMXEVTYPER_EL0, CGT_MDCR_TPM_HPMN),
SR_TRAP(SYS_PMSWINC_EL0, CGT_MDCR_TPM),
SR_TRAP(SYS_PMSELR_EL0, CGT_MDCR_TPM),
- SR_TRAP(SYS_PMXEVCNTR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMXEVCNTR_EL0, CGT_MDCR_TPM_HPMN),
SR_TRAP(SYS_PMCCNTR_EL0, CGT_MDCR_TPM),
SR_TRAP(SYS_PMUSERENR_EL0, CGT_MDCR_TPM),
SR_TRAP(SYS_PMINTENSET_EL1, CGT_MDCR_TPM),
SR_TRAP(SYS_PMINTENCLR_EL1, CGT_MDCR_TPM),
SR_TRAP(SYS_PMMIR_EL1, CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(0), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(1), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(2), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(3), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(4), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(5), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(6), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(7), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(8), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(9), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(10), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(11), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(12), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(13), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(14), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(15), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(16), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(17), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(18), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(19), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(20), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(21), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(22), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(23), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(24), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(25), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(26), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(27), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(28), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(29), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVCNTRn_EL0(30), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(0), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(1), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(2), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(3), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(4), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(5), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(6), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(7), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(8), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(9), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM),
- SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(0), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(1), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(2), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(3), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(4), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(5), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(6), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(7), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(8), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(9), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(10), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(11), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(12), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(13), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(14), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(15), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(16), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(17), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(18), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(19), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(20), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(21), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(22), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(23), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(24), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(25), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(26), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(27), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(28), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(29), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(30), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(0), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(1), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(2), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(3), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(4), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(5), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(6), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(7), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(8), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(9), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM_HPMN),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM_HPMN),
SR_TRAP(SYS_PMCCFILTR_EL0, CGT_MDCR_TPM),
SR_TRAP(SYS_MDCCSR_EL0, CGT_MDCR_TDCC_TDE_TDA),
SR_TRAP(SYS_MDCCINT_EL1, CGT_MDCR_TDCC_TDE_TDA),
@@ -1141,7 +1176,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_AMEVTYPER1_EL0(13), CGT_CPTR_TAM),
SR_TRAP(SYS_AMEVTYPER1_EL0(14), CGT_CPTR_TAM),
SR_TRAP(SYS_AMEVTYPER1_EL0(15), CGT_CPTR_TAM),
- SR_TRAP(SYS_POR_EL0, CGT_CPACR_E0POE),
/* op0=2, op1=1, and CRn<0b1000 */
SR_RANGE_TRAP(sys_reg(2, 1, 0, 0, 0),
sys_reg(2, 1, 7, 15, 7), CGT_CPTR_TTA),
@@ -2021,7 +2055,8 @@ check_mcb:
cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
for (int i = 0; cgids[i] != __RESERVED__; i++) {
- if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
+ if (cgids[i] >= __MULTIPLE_CONTROL_BITS__ &&
+ cgids[i] < __COMPLEX_CONDITIONS__) {
kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
ret = -EINVAL;
}
@@ -2126,11 +2161,19 @@ static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr)
return masks->mask[sr - __VNCR_START__].res0;
}
-static bool check_fgt_bit(struct kvm *kvm, bool is_read,
+static bool check_fgt_bit(struct kvm_vcpu *vcpu, bool is_read,
u64 val, const union trap_config tc)
{
+ struct kvm *kvm = vcpu->kvm;
enum vcpu_sysreg sr;
+ /*
+ * KVM doesn't know about any FGTs that apply to the host, and hopefully
+ * that'll remain the case.
+ */
+ if (is_hyp_ctxt(vcpu))
+ return false;
+
if (tc.pol)
return (val & BIT(tc.bit));
@@ -2207,7 +2250,15 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
* If we're not nesting, immediately return to the caller, with the
* sysreg index, should we have it.
*/
- if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ if (!vcpu_has_nv(vcpu))
+ goto local;
+
+ /*
+ * There are a few traps that take effect InHost, but are constrained
+ * to EL0. Don't bother with computing the trap behaviour if the vCPU
+ * isn't in EL0.
+ */
+ if (is_hyp_ctxt(vcpu) && !vcpu_is_host_el0(vcpu))
goto local;
switch ((enum fgt_group_id)tc.fgt) {
@@ -2253,12 +2304,14 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
goto local;
}
- if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu->kvm, is_read,
- val, tc))
+ if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu, is_read, val, tc))
goto inject;
b = compute_trap_behaviour(vcpu, tc);
+ if (!(b & BEHAVE_FORWARD_IN_HOST_EL0) && vcpu_is_host_el0(vcpu))
+ goto local;
+
if (((b & BEHAVE_FORWARD_READ) && is_read) ||
((b & BEHAVE_FORWARD_WRITE) && !is_read))
goto inject;
@@ -2393,6 +2446,8 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
+
+ kvm_pmu_nested_transition(vcpu);
}
static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
@@ -2475,6 +2530,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
+ kvm_pmu_nested_transition(vcpu);
+
return 1;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 5310fe1da616..34f53707892d 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -204,6 +204,35 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
__deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2);
}
+static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu)
+{
+ u64 r = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1;
+
+ if (!system_supports_mpam())
+ return;
+
+ /* trap guest access to MPAMIDR_EL1 */
+ if (system_supports_mpam_hcr()) {
+ write_sysreg_s(MPAMHCR_EL2_TRAP_MPAMIDR_EL1, SYS_MPAMHCR_EL2);
+ } else {
+ /* From v1.1 TIDR can trap MPAMIDR, set it unconditionally */
+ r |= MPAM2_EL2_TIDR;
+ }
+
+ write_sysreg_s(r, SYS_MPAM2_EL2);
+}
+
+static inline void __deactivate_traps_mpam(void)
+{
+ if (!system_supports_mpam())
+ return;
+
+ write_sysreg_s(0, SYS_MPAM2_EL2);
+
+ if (system_supports_mpam_hcr())
+ write_sysreg_s(MPAMHCR_HOST_FLAGS, SYS_MPAMHCR_EL2);
+}
+
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
{
/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
@@ -244,6 +273,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
}
__activate_traps_hfgxtr(vcpu);
+ __activate_traps_mpam(vcpu);
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -263,6 +293,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
__deactivate_traps_hfgxtr(vcpu);
+ __deactivate_traps_mpam();
}
static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 1579a3c08a36..a651c43ad679 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -58,7 +58,7 @@ static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt)
return false;
vcpu = ctxt_to_vcpu(ctxt);
- return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP);
+ return kvm_has_s1pie(kern_hyp_va(vcpu->kvm));
}
static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
@@ -69,7 +69,7 @@ static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
return false;
vcpu = ctxt_to_vcpu(ctxt);
- return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP);
+ return kvm_has_tcr2(kern_hyp_va(vcpu->kvm));
}
static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
@@ -80,7 +80,7 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
return false;
vcpu = ctxt_to_vcpu(ctxt);
- return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP);
+ return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
}
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
@@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
}
-static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
+ u64 mpidr)
{
- write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
+ write_sysreg(mpidr, vmpidr_el2);
if (has_vhe() ||
!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
index 45a84f0ade04..1e6d995968a1 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -15,6 +15,4 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
-void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
-
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index fefc89209f9e..6aa0b13d86e5 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -105,8 +105,10 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
- hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
+ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
+ hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
+ (HCR_TWI | HCR_TWE);
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
@@ -349,13 +351,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
-static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
-{
- DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
-
- __pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
-}
-
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
@@ -411,7 +406,6 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
- HANDLE_FUNC(__pkvm_vcpu_init_traps),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 077d4098548d..01616c39a810 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -6,6 +6,9 @@
#include <linux/kvm_host.h>
#include <linux/mm.h>
+
+#include <asm/kvm_emulate.h>
+
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
#include <nvhe/memory.h>
@@ -201,11 +204,46 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
}
}
+static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+
+ if (has_hvhe())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
+ }
+
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_FWB;
+
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (vcpu_has_ptrauth(vcpu))
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+}
+
/*
* Initialize trap register values in protected mode.
*/
-void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
+ vcpu->arch.mdcr_el2 = 0;
+
+ pkvm_vcpu_reset_hcr(vcpu);
+
+ if ((!vcpu_is_protected(vcpu)))
+ return;
+
pvm_init_trap_regs(vcpu);
pvm_init_traps_aa64pfr0(vcpu);
pvm_init_traps_aa64pfr1(vcpu);
@@ -289,6 +327,65 @@ void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_spin_unlock(&vm_table_lock);
}
+static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
+{
+ struct kvm *kvm = &hyp_vm->kvm;
+ DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
+
+ /* No restrictions for non-protected VMs. */
+ if (!kvm_vm_is_protected(kvm)) {
+ bitmap_copy(kvm->arch.vcpu_features,
+ host_kvm->arch.vcpu_features,
+ KVM_VCPU_MAX_FEATURES);
+ return;
+ }
+
+ bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
+
+ /*
+ * For protected VMs, always allow:
+ * - CPU starting in poweroff state
+ * - PSCI v0.2
+ */
+ set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features);
+ set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
+
+ /*
+ * Check if remaining features are allowed:
+ * - Performance Monitoring
+ * - Scalable Vectors
+ * - Pointer Authentication
+ */
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), PVM_ID_AA64DFR0_ALLOW))
+ set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
+
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), PVM_ID_AA64PFR0_ALLOW))
+ set_bit(KVM_ARM_VCPU_SVE, allowed_features);
+
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED) &&
+ FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED))
+ set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
+
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI), PVM_ID_AA64ISAR1_ALLOW) &&
+ FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA), PVM_ID_AA64ISAR1_ALLOW))
+ set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
+
+ bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features,
+ allowed_features, KVM_VCPU_MAX_FEATURES);
+}
+
+static void pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+
+ if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) {
+ kvm_vcpu_enable_ptrauth(vcpu);
+ } else {
+ vcpu_clear_flag(&hyp_vcpu->vcpu, GUEST_HAS_PTRAUTH);
+ }
+}
+
static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
{
if (host_vcpu)
@@ -310,6 +407,18 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
+ hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
+ pkvm_init_features_from_host(hyp_vm, host_kvm);
+}
+
+static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
+{
+ struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+
+ if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
+ vcpu_clear_flag(vcpu, GUEST_HAS_SVE);
+ vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
+ }
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -335,6 +444,11 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
+ hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+
+ pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
+ pkvm_vcpu_init_ptrauth(hyp_vcpu);
+ pkvm_vcpu_init_traps(&hyp_vcpu->vcpu);
done:
if (ret)
unpin_host_vcpu(host_vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index dfe8fe0f7eaf..9c2ce1e0e99a 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -265,6 +265,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_
case PSCI_1_0_FN_PSCI_FEATURES:
case PSCI_1_0_FN_SET_SUSPEND_MODE:
case PSCI_1_1_FN64_SYSTEM_RESET2:
+ case PSCI_1_3_FN_SYSTEM_OFF2:
+ case PSCI_1_3_FN64_SYSTEM_OFF2:
return psci_forward(host_ctxt);
case PSCI_1_0_FN64_SYSTEM_SUSPEND:
return psci_system_suspend(func_id, host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 174007f3fadd..cbdd18cd3f98 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -95,7 +95,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
{
void *start, *end, *virt = hyp_phys_to_virt(phys);
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
- enum kvm_pgtable_prot prot;
int ret, i;
/* Recreate the hyp page-table using the early page allocator */
@@ -147,24 +146,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
- pkvm_create_host_sve_mappings();
-
- /*
- * Map the host sections RO in the hypervisor, but transfer the
- * ownership from the host to the hypervisor itself to make sure they
- * can't be donated or shared with another entity.
- *
- * The ownership transition requires matching changes in the host
- * stage-2. This will be done later (see finalize_host_mappings()) once
- * the hyp_vmemmap is addressable.
- */
- prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
- ret = pkvm_create_mappings(&kvm_vgic_global_state,
- &kvm_vgic_global_state + 1, prot);
- if (ret)
- return ret;
-
- return 0;
+ return pkvm_create_host_sve_mappings();
}
static void update_nvhe_init_params(void)
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
index 29305022bc04..dba101565de3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
__sysreg_restore_common_state(ctxt);
__sysreg_restore_user_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 18d4677002b1..3f9741e51d41 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -1012,9 +1012,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
- /* SEIS */
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
- val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index e12bd7d6d2dc..5f78a39053a7 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -15,6 +15,131 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_nested.h>
+static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
+{
+ /* These registers are common with EL1 */
+ __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1);
+ __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
+ __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR);
+ __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0);
+ __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1);
+ __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR);
+ __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR);
+ __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR);
+ __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
+ __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR);
+
+ /*
+ * In VHE mode those registers are compatible between EL1 and EL2,
+ * and the guest uses the _EL1 versions on the CPU naturally.
+ * So we save them into their _EL2 versions here.
+ * For nVHE mode we trap accesses to those registers, so our
+ * _EL2 copy in sys_regs[] is always up-to-date and we don't need
+ * to save anything here.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ u64 val;
+
+ /*
+ * We don't save CPTR_EL2, as accesses to CPACR_EL1
+ * are always trapped, ensuring that the in-memory
+ * copy is always up-to-date. A small blessing...
+ */
+ __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR);
+ __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0);
+ __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1);
+ __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR);
+
+ if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
+ __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2);
+
+ if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
+ __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0);
+ __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR);
+ }
+
+ if (ctxt_has_s1poe(&vcpu->arch.ctxt))
+ __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR);
+ }
+
+ /*
+ * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where
+ * the interesting CNTHCTL_EL2 bits live. So preserve these
+ * bits when reading back the guest-visible value.
+ */
+ val = read_sysreg_el1(SYS_CNTKCTL);
+ val &= CNTKCTL_VALID_BITS;
+ __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
+ __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
+ }
+
+ __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1);
+ __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR);
+ __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR);
+}
+
+static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ /* These registers are common with EL1 */
+ write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1);
+ write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1);
+
+ write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR);
+
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ /*
+ * In VHE mode those registers are compatible between
+ * EL1 and EL2.
+ */
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL);
+ } else {
+ /*
+ * CNTHCTL_EL2 only affects EL1 when running nVHE, so
+ * no need to restore it.
+ */
+ val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2));
+ write_sysreg_el1(val, SYS_SCTLR);
+ val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2));
+ write_sysreg_el1(val, SYS_CPACR);
+ val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2));
+ write_sysreg_el1(val, SYS_TTBR0);
+ val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2));
+ write_sysreg_el1(val, SYS_TCR);
+ }
+
+ if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR2_EL2), SYS_TCR2);
+
+ if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, PIR_EL2), SYS_PIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, PIRE0_EL2), SYS_PIRE0);
+ }
+
+ if (ctxt_has_s1poe(&vcpu->arch.ctxt))
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, POR_EL2), SYS_POR);
+ }
+
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2), SYS_AFSR1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, FAR_EL2), SYS_FAR);
+ write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR);
+}
+
/*
* VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
* pstate, which are handled as part of the el2 return state) on every
@@ -66,6 +191,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
+ u64 mpidr;
host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
@@ -89,7 +215,29 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_user_state(guest_ctxt);
- __sysreg_restore_el1_state(guest_ctxt);
+
+ if (unlikely(__is_hyp_ctxt(guest_ctxt))) {
+ __sysreg_restore_vel2_state(vcpu);
+ } else {
+ if (vcpu_has_nv(vcpu)) {
+ /*
+ * Use the guest hypervisor's VPIDR_EL2 when in a
+ * nested state. The hardware value of MIDR_EL1 gets
+ * restored on put.
+ */
+ write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2);
+
+ /*
+ * As we're restoring a nested guest, set the value
+ * provided by the guest hypervisor.
+ */
+ mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2);
+ } else {
+ mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1);
+ }
+
+ __sysreg_restore_el1_state(guest_ctxt, mpidr);
+ }
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
}
@@ -112,12 +260,20 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
host_ctxt = host_data_ptr(host_ctxt);
- __sysreg_save_el1_state(guest_ctxt);
+ if (unlikely(__is_hyp_ctxt(guest_ctxt)))
+ __sysreg_save_vel2_state(vcpu);
+ else
+ __sysreg_save_el1_state(guest_ctxt);
+
__sysreg_save_user_state(guest_ctxt);
__sysreg32_save_state(vcpu);
/* Restore host user state */
__sysreg_restore_user_state(host_ctxt);
+ /* If leaving a nesting guest, restore MIDR_EL1 default view */
+ if (vcpu_has_nv(vcpu))
+ write_sysreg(read_cpuid_id(), vpidr_el2);
+
vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
}
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index ee6573befb81..27ce4cb44904 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -575,6 +575,8 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_ARM_PSCI_0_2:
case KVM_ARM_PSCI_1_0:
case KVM_ARM_PSCI_1_1:
+ case KVM_ARM_PSCI_1_2:
+ case KVM_ARM_PSCI_1_3:
if (!wants_02)
return -EINVAL;
vcpu->kvm->arch.psci_version = val;
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index cd6b7b83e2c3..ab365e839874 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -72,6 +72,31 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
return data;
}
+static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
+ return false;
+
+ if (vcpu_el1_is_32bit(vcpu)) {
+ switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
+ case unpack_vcpu_flag(EXCEPT_AA32_UND):
+ case unpack_vcpu_flag(EXCEPT_AA32_IABT):
+ case unpack_vcpu_flag(EXCEPT_AA32_DABT):
+ return true;
+ default:
+ return false;
+ }
+ } else {
+ switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
/**
* kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
* or in-kernel IO emulation
@@ -84,8 +109,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
unsigned int len;
int mask;
- /* Detect an already handled MMIO return */
- if (unlikely(!vcpu->mmio_needed))
+ /*
+ * Detect if the MMIO return was already handled or if userspace aborted
+ * the MMIO access.
+ */
+ if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu)))
return 1;
vcpu->mmio_needed = 0;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index c4b17d90fc49..aeaa6017ffd8 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -917,12 +917,13 @@ static void limit_nv_id_regs(struct kvm *kvm)
ID_AA64MMFR4_EL1_E2H0_NI_NV1);
kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val);
- /* Only limited support for PMU, Debug, BPs and WPs */
+ /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
val &= (NV_FTR(DFR0, PMUVer) |
NV_FTR(DFR0, WRPs) |
NV_FTR(DFR0, BRPs) |
- NV_FTR(DFR0, DebugVer));
+ NV_FTR(DFR0, DebugVer) |
+ NV_FTR(DFR0, HPMN0));
/* Cap Debug to ARMv8.1 */
tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
@@ -933,15 +934,15 @@ static void limit_nv_id_regs(struct kvm *kvm)
kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);
}
-u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
+u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg sr, u64 v)
{
- u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr);
struct kvm_sysreg_masks *masks;
masks = vcpu->kvm->arch.sysreg_masks;
if (masks) {
- sr -= __VNCR_START__;
+ sr -= __SANITISED_REG_START__;
v &= ~masks->mask[sr].res0;
v |= masks->mask[sr].res1;
@@ -952,7 +953,11 @@ u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
{
- int i = sr - __VNCR_START__;
+ int i = sr - __SANITISED_REG_START__;
+
+ BUILD_BUG_ON(!__builtin_constant_p(sr));
+ BUILD_BUG_ON(sr < __SANITISED_REG_START__);
+ BUILD_BUG_ON(sr >= NR_SYS_REGS);
kvm->arch.sysreg_masks->mask[i].res0 = res0;
kvm->arch.sysreg_masks->mask[i].res1 = res1;
@@ -1050,7 +1055,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= HCRX_EL2_PTTWI;
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP))
res0 |= HCRX_EL2_SCTLR2En;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+ if (!kvm_has_tcr2(kvm))
res0 |= HCRX_EL2_TCR2En;
if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
@@ -1101,9 +1106,9 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0);
if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
res0 |= HFGxTR_EL2_nRCWMASK_EL1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+ if (!kvm_has_s1pie(kvm))
res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ if (!kvm_has_s1poe(kvm))
res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1);
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
res0 |= HFGxTR_EL2_nS2POR_EL1;
@@ -1200,6 +1205,28 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= ~(res0 | res1);
set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
+ /* TCR2_EL2 */
+ res0 = TCR2_EL2_RES0;
+ res1 = TCR2_EL2_RES1;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
+ res0 |= (TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1 | TCR2_EL2_D128);
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, MEC, IMP))
+ res0 |= TCR2_EL2_AMEC1 | TCR2_EL2_AMEC0;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, HAFDBS, HAFT))
+ res0 |= TCR2_EL2_HAFT;
+ if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
+ res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
+ res0 |= TCR2_EL2_AIE;
+ if (!kvm_has_s1poe(kvm))
+ res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE;
+ if (!kvm_has_s1pie(kvm))
+ res0 |= TCR2_EL2_PIE;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
+ res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 |
+ TCR2_EL2_AMEC1 | TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1);
+ set_sysreg_masks(kvm, TCR2_EL2, res0, res1);
+
/* SCTLR_EL1 */
res0 = SCTLR_EL1_RES0;
res1 = SCTLR_EL1_RES1;
@@ -1207,6 +1234,43 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= SCTLR_EL1_EPAN;
set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
+ /* MDCR_EL2 */
+ res0 = MDCR_EL2_RES0;
+ res1 = MDCR_EL2_RES1;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
+ res0 |= (MDCR_EL2_HPMN | MDCR_EL2_TPMCR |
+ MDCR_EL2_TPM | MDCR_EL2_HPME);
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP))
+ res0 |= MDCR_EL2_E2PB | MDCR_EL2_TPMS;
+ if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, SPMU, IMP))
+ res0 |= MDCR_EL2_EnSPM;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P1))
+ res0 |= MDCR_EL2_HPMD;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
+ res0 |= MDCR_EL2_TTRF;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
+ res0 |= MDCR_EL2_HCCD | MDCR_EL2_HLP;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
+ res0 |= MDCR_EL2_E2TB;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
+ res0 |= MDCR_EL2_TDCC;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, MTPMU, IMP) ||
+ kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP))
+ res0 |= MDCR_EL2_MTPME;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P7))
+ res0 |= MDCR_EL2_HPMFZO;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSS, IMP))
+ res0 |= MDCR_EL2_PMSSE;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2))
+ res0 |= MDCR_EL2_HPMFZS;
+ if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, EBEP, IMP))
+ res0 |= MDCR_EL2_PMEE;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, V8P9))
+ res0 |= MDCR_EL2_EBWE;
+ if (!kvm_has_feat(kvm, ID_AA64DFR2_EL1, STEP, IMP))
+ res0 |= MDCR_EL2_EnSTEPOP;
+ set_sysreg_masks(kvm, MDCR_EL2, res0, res1);
+
return 0;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index ac36c438b8c1..8ad62284fa23 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -89,7 +89,11 @@ static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
{
- u64 val = kvm_vcpu_read_pmcr(kvm_pmc_to_vcpu(pmc));
+ struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
+ u64 val = kvm_vcpu_read_pmcr(vcpu);
+
+ if (kvm_pmu_counter_is_hyp(vcpu, pmc->idx))
+ return __vcpu_sys_reg(vcpu, MDCR_EL2) & MDCR_EL2_HLP;
return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
(pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
@@ -111,6 +115,11 @@ static u32 counter_index_to_evtreg(u64 idx)
return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
}
+static u64 kvm_pmc_read_evtreg(const struct kvm_pmc *pmc)
+{
+ return __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), counter_index_to_evtreg(pmc->idx));
+}
+
static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
{
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
@@ -244,7 +253,7 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
*/
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
{
- unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
+ unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu);
int i;
for_each_set_bit(i, &mask, 32)
@@ -265,7 +274,37 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
irq_work_sync(&vcpu->arch.pmu.overflow_work);
}
-u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
+bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx)
+{
+ unsigned int hpmn;
+
+ if (!vcpu_has_nv(vcpu) || idx == ARMV8_PMU_CYCLE_IDX)
+ return false;
+
+ /*
+ * Programming HPMN=0 is CONSTRAINED UNPREDICTABLE if FEAT_HPMN0 isn't
+ * implemented. Since KVM's ability to emulate HPMN=0 does not directly
+ * depend on hardware (all PMU registers are trapped), make the
+ * implementation choice that all counters are included in the second
+ * range reserved for EL2/EL3.
+ */
+ hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2));
+ return idx >= hpmn;
+}
+
+u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu)
+{
+ u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
+ u64 hpmn;
+
+ if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu))
+ return mask;
+
+ hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2));
+ return mask & ~GENMASK(vcpu->kvm->arch.pmcr_n - 1, hpmn);
+}
+
+u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu)
{
u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu));
@@ -574,7 +613,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
- unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
+ unsigned long mask = kvm_pmu_accessible_counter_mask(vcpu);
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
@@ -585,8 +624,44 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
- return (kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) &&
- (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
+ unsigned int mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2);
+
+ if (!(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)))
+ return false;
+
+ if (kvm_pmu_counter_is_hyp(vcpu, pmc->idx))
+ return mdcr & MDCR_EL2_HPME;
+
+ return kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E;
+}
+
+static bool kvm_pmc_counts_at_el0(struct kvm_pmc *pmc)
+{
+ u64 evtreg = kvm_pmc_read_evtreg(pmc);
+ bool nsu = evtreg & ARMV8_PMU_EXCLUDE_NS_EL0;
+ bool u = evtreg & ARMV8_PMU_EXCLUDE_EL0;
+
+ return u == nsu;
+}
+
+static bool kvm_pmc_counts_at_el1(struct kvm_pmc *pmc)
+{
+ u64 evtreg = kvm_pmc_read_evtreg(pmc);
+ bool nsk = evtreg & ARMV8_PMU_EXCLUDE_NS_EL1;
+ bool p = evtreg & ARMV8_PMU_EXCLUDE_EL1;
+
+ return p == nsk;
+}
+
+static bool kvm_pmc_counts_at_el2(struct kvm_pmc *pmc)
+{
+ struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
+ u64 mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2);
+
+ if (!kvm_pmu_counter_is_hyp(vcpu, pmc->idx) && (mdcr & MDCR_EL2_HPMD))
+ return false;
+
+ return kvm_pmc_read_evtreg(pmc) & ARMV8_PMU_INCLUDE_EL2;
}
/**
@@ -599,17 +674,15 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
struct perf_event *event;
struct perf_event_attr attr;
- u64 eventsel, reg, data;
- bool p, u, nsk, nsu;
+ u64 eventsel, evtreg;
- reg = counter_index_to_evtreg(pmc->idx);
- data = __vcpu_sys_reg(vcpu, reg);
+ evtreg = kvm_pmc_read_evtreg(pmc);
kvm_pmu_stop_counter(pmc);
if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
else
- eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
+ eventsel = evtreg & kvm_pmu_event_mask(vcpu->kvm);
/*
* Neither SW increment nor chained events need to be backed
@@ -627,23 +700,26 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
return;
- p = data & ARMV8_PMU_EXCLUDE_EL1;
- u = data & ARMV8_PMU_EXCLUDE_EL0;
- nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1;
- nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0;
-
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
- attr.exclude_user = (u != nsu);
- attr.exclude_kernel = (p != nsk);
+ attr.exclude_user = !kvm_pmc_counts_at_el0(pmc);
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = eventsel;
/*
+ * Filter events at EL1 (i.e. vEL2) when in a hyp context based on the
+ * guest's EL2 filter.
+ */
+ if (unlikely(is_hyp_ctxt(vcpu)))
+ attr.exclude_kernel = !kvm_pmc_counts_at_el2(pmc);
+ else
+ attr.exclude_kernel = !kvm_pmc_counts_at_el1(pmc);
+
+ /*
* If counting with a 64bit counter, advertise it to the perf
* code, carefully dealing with the initial sample period
* which also depends on the overflow.
@@ -804,7 +880,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
{
- u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+ u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu));
@@ -1139,3 +1215,32 @@ u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
}
+
+void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu)
+{
+ bool reprogrammed = false;
+ unsigned long mask;
+ int i;
+
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
+ mask = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+ for_each_set_bit(i, &mask, 32) {
+ struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
+
+ /*
+ * We only need to reconfigure events where the filter is
+ * different at EL1 vs. EL2, as we're multiplexing the true EL1
+ * event filter bit for nested.
+ */
+ if (kvm_pmc_counts_at_el1(pmc) == kvm_pmc_counts_at_el2(pmc))
+ continue;
+
+ kvm_pmu_create_perf_event(pmc);
+ reprogrammed = true;
+ }
+
+ if (reprogrammed)
+ kvm_vcpu_pmu_restore_guest(vcpu);
+}
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 1f69b667332b..3b5dbe9a0a0e 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -194,6 +194,12 @@ static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN, 0);
}
+static void kvm_psci_system_off2(struct kvm_vcpu *vcpu)
+{
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN,
+ KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
+}
+
static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
{
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, 0);
@@ -322,7 +328,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
switch(psci_fn) {
case PSCI_0_2_FN_PSCI_VERSION:
- val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1;
+ val = PSCI_VERSION(1, minor);
break;
case PSCI_1_0_FN_PSCI_FEATURES:
arg = smccc_get_arg1(vcpu);
@@ -358,6 +364,11 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
if (minor >= 1)
val = 0;
break;
+ case PSCI_1_3_FN_SYSTEM_OFF2:
+ case PSCI_1_3_FN64_SYSTEM_OFF2:
+ if (minor >= 3)
+ val = PSCI_1_3_OFF_TYPE_HIBERNATE_OFF;
+ break;
}
break;
case PSCI_1_0_FN_SYSTEM_SUSPEND:
@@ -392,6 +403,33 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
break;
}
break;
+ case PSCI_1_3_FN_SYSTEM_OFF2:
+ kvm_psci_narrow_to_32bit(vcpu);
+ fallthrough;
+ case PSCI_1_3_FN64_SYSTEM_OFF2:
+ if (minor < 3)
+ break;
+
+ arg = smccc_get_arg1(vcpu);
+ /*
+ * SYSTEM_OFF2 defaults to HIBERNATE_OFF if arg1 is zero. arg2
+ * must be zero.
+ */
+ if ((arg && arg != PSCI_1_3_OFF_TYPE_HIBERNATE_OFF) ||
+ smccc_get_arg2(vcpu) != 0) {
+ val = PSCI_RET_INVALID_PARAMS;
+ break;
+ }
+ kvm_psci_system_off2(vcpu);
+ /*
+ * We shouldn't be going back to the guest after receiving a
+ * SYSTEM_OFF2 request. Preload a return value of
+ * INTERNAL_FAILURE should userspace ignore the exit and resume
+ * the vCPU.
+ */
+ val = PSCI_RET_INTERNAL_FAILURE;
+ ret = 0;
+ break;
default:
return kvm_psci_0_2_call(vcpu);
}
@@ -449,6 +487,10 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
}
switch (version) {
+ case KVM_ARM_PSCI_1_3:
+ return kvm_psci_1_x_call(vcpu, 3);
+ case KVM_ARM_PSCI_1_2:
+ return kvm_psci_1_x_call(vcpu, 2);
case KVM_ARM_PSCI_1_1:
return kvm_psci_1_x_call(vcpu, 1);
case KVM_ARM_PSCI_1_0:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0b0ae5ae7bc2..470524b31951 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -167,11 +167,6 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
}
-static void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
-{
- vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
-}
-
/**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index ff8c4e1b847e..83c6b4a07ef5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -110,6 +110,14 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
PURE_EL2_SYSREG( RVBAR_EL2 );
PURE_EL2_SYSREG( TPIDR_EL2 );
PURE_EL2_SYSREG( HPFAR_EL2 );
+ PURE_EL2_SYSREG( HCRX_EL2 );
+ PURE_EL2_SYSREG( HFGRTR_EL2 );
+ PURE_EL2_SYSREG( HFGWTR_EL2 );
+ PURE_EL2_SYSREG( HFGITR_EL2 );
+ PURE_EL2_SYSREG( HDFGRTR_EL2 );
+ PURE_EL2_SYSREG( HDFGWTR_EL2 );
+ PURE_EL2_SYSREG( HAFGRTR_EL2 );
+ PURE_EL2_SYSREG( CNTVOFF_EL2 );
PURE_EL2_SYSREG( CNTHCTL_EL2 );
MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
translate_sctlr_el2_to_sctlr_el1 );
@@ -126,10 +134,15 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL );
+ MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1, NULL );
+ MAPPED_EL2_SYSREG(POR_EL2, POR_EL1, NULL );
MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
+ MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
default:
return false;
}
@@ -149,6 +162,21 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
goto memory_read;
/*
+ * CNTHCTL_EL2 requires some special treatment to
+ * account for the bits that can be set via CNTKCTL_EL1.
+ */
+ switch (reg) {
+ case CNTHCTL_EL2:
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ val = read_sysreg_el1(SYS_CNTKCTL);
+ val &= CNTKCTL_VALID_BITS;
+ val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
+ return val;
+ }
+ break;
+ }
+
+ /*
* If this register does not have an EL1 counterpart,
* then read the stored EL2 version.
*/
@@ -165,6 +193,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
/* Get the current version of the EL1 counterpart. */
WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
+ if (reg >= __SANITISED_REG_START__)
+ val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
+
return val;
}
@@ -198,6 +229,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
*/
__vcpu_sys_reg(vcpu, reg) = val;
+ switch (reg) {
+ case CNTHCTL_EL2:
+ /*
+ * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
+ * Otherwise, some of the bits are backed by
+ * CNTKCTL_EL1, while the rest is kept in memory.
+ * Yes, this is fun stuff.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu))
+ write_sysreg_el1(val, SYS_CNTKCTL);
+ return;
+ }
+
/* No EL1 counterpart? We're done here.? */
if (reg == el1r)
return;
@@ -390,10 +434,6 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
bool was_enabled = vcpu_has_cache_enabled(vcpu);
u64 val, mask, shift;
- if (reg_to_encoding(r) == SYS_TCR2_EL1 &&
- !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
- return undef_access(vcpu, p, r);
-
BUG_ON(!p->is_write);
get_access_mask(r, &mask, &shift);
@@ -1128,7 +1168,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va
{
bool set;
- val &= kvm_pmu_valid_counter_mask(vcpu);
+ val &= kvm_pmu_accessible_counter_mask(vcpu);
switch (r->reg) {
case PMOVSSET_EL0:
@@ -1151,7 +1191,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va
static int get_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 *val)
{
- u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+ u64 mask = kvm_pmu_accessible_counter_mask(vcpu);
*val = __vcpu_sys_reg(vcpu, r->reg) & mask;
return 0;
@@ -1165,7 +1205,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (pmu_access_el0_disabled(vcpu))
return false;
- mask = kvm_pmu_valid_counter_mask(vcpu);
+ mask = kvm_pmu_accessible_counter_mask(vcpu);
if (p->is_write) {
val = p->regval & mask;
if (r->Op2 & 0x1) {
@@ -1188,7 +1228,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+ u64 mask = kvm_pmu_accessible_counter_mask(vcpu);
if (check_pmu_access_disabled(vcpu, 0))
return false;
@@ -1212,7 +1252,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+ u64 mask = kvm_pmu_accessible_counter_mask(vcpu);
if (pmu_access_el0_disabled(vcpu))
return false;
@@ -1242,7 +1282,7 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (pmu_write_swinc_el0_disabled(vcpu))
return false;
- mask = kvm_pmu_valid_counter_mask(vcpu);
+ mask = kvm_pmu_accessible_counter_mask(vcpu);
kvm_pmu_software_increment(vcpu, p->regval & mask);
return true;
}
@@ -1509,6 +1549,9 @@ static u8 pmuver_to_perfmon(u8 pmuver)
}
}
+static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
+static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
+
/* Read a sanitised cpufeature ID register by sys_reg_desc */
static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
@@ -1522,6 +1565,12 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val = read_sanitised_ftr_reg(id);
switch (id) {
+ case SYS_ID_AA64DFR0_EL1:
+ val = sanitise_id_aa64dfr0_el1(vcpu, val);
+ break;
+ case SYS_ID_AA64PFR0_EL1:
+ val = sanitise_id_aa64pfr0_el1(vcpu, val);
+ break;
case SYS_ID_AA64PFR1_EL1:
if (!kvm_has_mte(vcpu->kvm))
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
@@ -1535,6 +1584,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac);
break;
case SYS_ID_AA64PFR2_EL1:
/* We only expose FPMR */
@@ -1692,11 +1742,8 @@ static unsigned int fp8_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
-static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
+static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
{
- u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-
if (!vcpu_has_sve(vcpu))
val &= ~ID_AA64PFR0_EL1_SVE_MASK;
@@ -1724,6 +1771,13 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
val &= ~ID_AA64PFR0_EL1_AMU_MASK;
+ /*
+ * MPAM is disabled by default as KVM also needs a set of PARTID to
+ * program the MPAMVPMx_EL2 PARTID remapping registers with. But some
+ * older kernels let the guest see the ID bit.
+ */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+
return val;
}
@@ -1737,11 +1791,8 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
(val); \
})
-static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
+static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
{
- u64 val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
-
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
/*
@@ -1834,6 +1885,70 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
return set_id_reg(vcpu, rd, val);
}
+static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u64 mpam_mask = ID_AA64PFR0_EL1_MPAM_MASK;
+
+ /*
+ * Commit 011e5f5bf529f ("arm64/cpufeature: Add remaining feature bits
+ * in ID_AA64PFR0 register") exposed the MPAM field of AA64PFR0_EL1 to
+ * guests, but didn't add trap handling. KVM doesn't support MPAM and
+ * always returns an UNDEF for these registers. The guest must see 0
+ * for this field.
+ *
+ * But KVM must also accept values from user-space that were provided
+ * by KVM. On CPUs that support MPAM, permit user-space to write
+ * the sanitizied value to ID_AA64PFR0_EL1.MPAM, but ignore this field.
+ */
+ if ((hw_val & mpam_mask) == (user_val & mpam_mask))
+ user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+
+ return set_id_reg(vcpu, rd, user_val);
+}
+
+static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ u64 mpam_mask = ID_AA64PFR1_EL1_MPAM_frac_MASK;
+
+ /* See set_id_aa64pfr0_el1 for comment about MPAM */
+ if ((hw_val & mpam_mask) == (user_val & mpam_mask))
+ user_val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+
+ return set_id_reg(vcpu, rd, user_val);
+}
+
+static int set_ctr_el0(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u8 user_L1Ip = SYS_FIELD_GET(CTR_EL0, L1Ip, user_val);
+
+ /*
+ * Both AIVIVT (0b01) and VPIPT (0b00) are documented as reserved.
+ * Hence only allow to set VIPT(0b10) or PIPT(0b11) for L1Ip based
+ * on what hardware reports.
+ *
+ * Using a VIPT software model on PIPT will lead to over invalidation,
+ * but still correct. Hence, we can allow downgrading PIPT to VIPT,
+ * but not the other way around. This is handled via arm64_ftr_safe_value()
+ * as CTR_EL0 ftr_bits has L1Ip field with type FTR_EXACT and safe value
+ * set as VIPT.
+ */
+ switch (user_L1Ip) {
+ case CTR_EL0_L1Ip_RESERVED_VPIPT:
+ case CTR_EL0_L1Ip_RESERVED_AIVIVT:
+ return -EINVAL;
+ case CTR_EL0_L1Ip_VIPT:
+ case CTR_EL0_L1Ip_PIPT:
+ return set_id_reg(vcpu, rd, user_val);
+ default:
+ return -ENOENT;
+ }
+}
+
/*
* cpufeature ID register user accessors
*
@@ -2104,6 +2219,15 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = v, \
}
+#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \
+ SYS_DESC(SYS_##name), \
+ .access = acc, \
+ .reset = rst, \
+ .reg = name, \
+ .visibility = filter, \
+ .val = v, \
+}
+
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
@@ -2150,6 +2274,15 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = mask, \
}
+/* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
+#define ID_FILTERED(sysreg, name, mask) { \
+ ID_DESC(sysreg), \
+ .set_user = set_##name, \
+ .visibility = id_visibility, \
+ .reset = kvm_read_sanitised_id_reg, \
+ .val = (mask), \
+}
+
/*
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
@@ -2236,16 +2369,18 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return __vcpu_sys_reg(vcpu, r->reg) = val;
}
+static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ unsigned int (*fn)(const struct kvm_vcpu *,
+ const struct sys_reg_desc *))
+{
+ return el2_visibility(vcpu, rd) ?: fn(vcpu, rd);
+}
+
static unsigned int sve_el2_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- unsigned int r;
-
- r = el2_visibility(vcpu, rd);
- if (r)
- return r;
-
- return sve_visibility(vcpu, rd);
+ return __el2_visibility(vcpu, rd, sve_visibility);
}
static bool access_zcr_el2(struct kvm_vcpu *vcpu,
@@ -2273,12 +2408,48 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ if (kvm_has_s1poe(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int s1poe_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, s1poe_visibility);
+}
+
+static unsigned int tcr2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_tcr2(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, tcr2_visibility);
+}
+
+static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_s1pie(vcpu->kvm))
return 0;
return REG_HIDDEN;
}
+static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, s1pie_visibility);
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2374,18 +2545,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 ID registers */
/* CRm=4 */
- { SYS_DESC(SYS_ID_AA64PFR0_EL1),
- .access = access_id_reg,
- .get_user = get_id_reg,
- .set_user = set_id_reg,
- .reset = read_sanitised_id_aa64pfr0_el1,
- .val = ~(ID_AA64PFR0_EL1_AMU |
- ID_AA64PFR0_EL1_MPAM |
- ID_AA64PFR0_EL1_SVE |
- ID_AA64PFR0_EL1_RAS |
- ID_AA64PFR0_EL1_AdvSIMD |
- ID_AA64PFR0_EL1_FP), },
- ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR |
+ ID_FILTERED(ID_AA64PFR0_EL1, id_aa64pfr0_el1,
+ ~(ID_AA64PFR0_EL1_AMU |
+ ID_AA64PFR0_EL1_MPAM |
+ ID_AA64PFR0_EL1_SVE |
+ ID_AA64PFR0_EL1_RAS |
+ ID_AA64PFR0_EL1_AdvSIMD |
+ ID_AA64PFR0_EL1_FP)),
+ ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1,
+ ~(ID_AA64PFR1_EL1_PFAR |
ID_AA64PFR1_EL1_DF2 |
ID_AA64PFR1_EL1_MTEX |
ID_AA64PFR1_EL1_THE |
@@ -2406,11 +2574,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_WRITABLE(ID_AA64FPFR0_EL1, ~ID_AA64FPFR0_EL1_RES0),
/* CRm=5 */
- { SYS_DESC(SYS_ID_AA64DFR0_EL1),
- .access = access_id_reg,
- .get_user = get_id_reg,
- .set_user = set_id_aa64dfr0_el1,
- .reset = read_sanitised_id_aa64dfr0_el1,
/*
* Prior to FEAT_Debugv8.9, the architecture defines context-aware
* breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs).
@@ -2423,10 +2586,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
* See DDI0487K.a, section D2.8.3 Breakpoint types and linking
* of breakpoints for more details.
*/
- .val = ID_AA64DFR0_EL1_DoubleLock_MASK |
- ID_AA64DFR0_EL1_WRPs_MASK |
- ID_AA64DFR0_EL1_PMUVer_MASK |
- ID_AA64DFR0_EL1_DebugVer_MASK, },
+ ID_FILTERED(ID_AA64DFR0_EL1, id_aa64dfr0_el1,
+ ID_AA64DFR0_EL1_DoubleLock_MASK |
+ ID_AA64DFR0_EL1_WRPs_MASK |
+ ID_AA64DFR0_EL1_PMUVer_MASK |
+ ID_AA64DFR0_EL1_DebugVer_MASK),
ID_SANITISED(ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2),
ID_UNALLOCATED(5,3),
@@ -2489,7 +2653,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
- { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 },
+ { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0,
+ .visibility = tcr2_visibility },
PTRAUTH_KEY(APIA),
PTRAUTH_KEY(APIB),
@@ -2543,8 +2708,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
- { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
- { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
+ { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1,
+ .visibility = s1pie_visibility },
+ { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1,
+ .visibility = s1pie_visibility },
{ SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1,
.visibility = s1poe_visibility },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
@@ -2553,8 +2720,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_LOREA_EL1), trap_loregion },
{ SYS_DESC(SYS_LORN_EL1), trap_loregion },
{ SYS_DESC(SYS_LORC_EL1), trap_loregion },
+ { SYS_DESC(SYS_MPAMIDR_EL1), undef_access },
{ SYS_DESC(SYS_LORID_EL1), trap_loregion },
+ { SYS_DESC(SYS_MPAM1_EL1), undef_access },
+ { SYS_DESC(SYS_MPAM0_EL1), undef_access },
{ SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
@@ -2599,10 +2769,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
- ID_WRITABLE(CTR_EL0, CTR_EL0_DIC_MASK |
- CTR_EL0_IDC_MASK |
- CTR_EL0_DminLine_MASK |
- CTR_EL0_IminLine_MASK),
+ ID_FILTERED(CTR_EL0, ctr_el0,
+ CTR_EL0_DIC_MASK |
+ CTR_EL0_IDC_MASK |
+ CTR_EL0_DminLine_MASK |
+ CTR_EL0_L1Ip_MASK |
+ CTR_EL0_IminLine_MASK),
{ SYS_DESC(SYS_SVCR), undef_access, reset_val, SVCR, 0, .visibility = sme_visibility },
{ SYS_DESC(SYS_FPMR), undef_access, reset_val, FPMR, 0, .visibility = fp8_visibility },
@@ -2818,14 +2990,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
EL2_REG_VNCR(HACR_EL2, reset_val, 0),
- { SYS_DESC(SYS_ZCR_EL2), .access = access_zcr_el2, .reset = reset_val,
- .visibility = sve_el2_visibility, .reg = ZCR_EL2 },
+ EL2_REG_FILTERED(ZCR_EL2, access_zcr_el2, reset_val, 0,
+ sve_el2_visibility),
EL2_REG_VNCR(HCRX_EL2, reset_val, 0),
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
+ EL2_REG_FILTERED(TCR2_EL2, access_rw, reset_val, TCR2_EL2_RES1,
+ tcr2_el2_visibility),
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
@@ -2853,7 +3027,24 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
+ EL2_REG_FILTERED(PIRE0_EL2, access_rw, reset_val, 0,
+ s1pie_el2_visibility),
+ EL2_REG_FILTERED(PIR_EL2, access_rw, reset_val, 0,
+ s1pie_el2_visibility),
+ EL2_REG_FILTERED(POR_EL2, access_rw, reset_val, 0,
+ s1poe_el2_visibility),
EL2_REG(AMAIR_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_MPAMHCR_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPMV_EL2), undef_access },
+ { SYS_DESC(SYS_MPAM2_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM0_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM1_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM2_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM3_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM4_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM5_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM6_EL2), undef_access },
+ { SYS_DESC(SYS_MPAMVPM7_EL2), undef_access },
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
@@ -4719,7 +4910,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
- if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+ if (kvm_has_tcr2(kvm))
vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
if (kvm_has_fpmr(kvm))
@@ -4769,11 +4960,11 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP |
HFGITR_EL2_ATS1E1WP);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+ if (!kvm_has_s1pie(kvm))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
HFGxTR_EL2_nPIR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ if (!kvm_has_s1poe(kvm))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 |
HFGxTR_EL2_nPOR_EL0);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index ba945ba78cc7..198296933e7e 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -782,6 +782,9 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
ite = find_ite(its, device_id, event_id);
if (ite && its_is_collection_mapped(ite->collection)) {
+ struct its_device *device = find_its_device(its, device_id);
+ int ite_esz = vgic_its_get_abi(its)->ite_esz;
+ gpa_t gpa = device->itt_addr + ite->event_id * ite_esz;
/*
* Though the spec talks about removing the pending state, we
* don't bother here since we clear the ITTE anyway and the
@@ -790,7 +793,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
vgic_its_invalidate_cache(its);
its_free_ite(kvm, ite);
- return 0;
+
+ return vgic_its_write_entry_lock(its, gpa, 0, ite_esz);
}
return E_ITS_DISCARD_UNMAPPED_INTERRUPT;
@@ -1139,9 +1143,11 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
bool valid = its_cmd_get_validbit(its_cmd);
u8 num_eventid_bits = its_cmd_get_size(its_cmd);
gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
+ int dte_esz = vgic_its_get_abi(its)->dte_esz;
struct its_device *device;
+ gpa_t gpa;
- if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL))
+ if (!vgic_its_check_id(its, its->baser_device_table, device_id, &gpa))
return E_ITS_MAPD_DEVICE_OOR;
if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
@@ -1162,7 +1168,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
* is an error, so we are done in any case.
*/
if (!valid)
- return 0;
+ return vgic_its_write_entry_lock(its, gpa, 0, dte_esz);
device = vgic_its_alloc_device(its, device_id, itt_addr,
num_eventid_bits);
@@ -2086,7 +2092,6 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
struct its_ite *ite, gpa_t gpa, int ite_esz)
{
- struct kvm *kvm = its->dev->kvm;
u32 next_offset;
u64 val;
@@ -2095,7 +2100,8 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
ite->collection->collection_id;
val = cpu_to_le64(val);
- return vgic_write_guest_lock(kvm, gpa, &val, ite_esz);
+
+ return vgic_its_write_entry_lock(its, gpa, val, ite_esz);
}
/**
@@ -2239,7 +2245,6 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
gpa_t ptr, int dte_esz)
{
- struct kvm *kvm = its->dev->kvm;
u64 val, itt_addr_field;
u32 next_offset;
@@ -2250,7 +2255,8 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
(itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
(dev->num_eventid_bits - 1));
val = cpu_to_le64(val);
- return vgic_write_guest_lock(kvm, ptr, &val, dte_esz);
+
+ return vgic_its_write_entry_lock(its, ptr, val, dte_esz);
}
/**
@@ -2437,7 +2443,8 @@ static int vgic_its_save_cte(struct vgic_its *its,
((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
collection->collection_id);
val = cpu_to_le64(val);
- return vgic_write_guest_lock(its->dev->kvm, gpa, &val, esz);
+
+ return vgic_its_write_entry_lock(its, gpa, val, esz);
}
/*
@@ -2453,8 +2460,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
u64 val;
int ret;
- BUG_ON(esz > sizeof(val));
- ret = kvm_read_guest_lock(kvm, gpa, &val, esz);
+ ret = vgic_its_read_entry_lock(its, gpa, &val, esz);
if (ret)
return ret;
val = le64_to_cpu(val);
@@ -2492,7 +2498,6 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
u64 baser = its->baser_coll_table;
gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser);
struct its_collection *collection;
- u64 val;
size_t max_size, filled = 0;
int ret, cte_esz = abi->cte_esz;
@@ -2516,10 +2521,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
* table is not fully filled, add a last dummy element
* with valid bit unset
*/
- val = 0;
- BUG_ON(cte_esz > sizeof(val));
- ret = vgic_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz);
- return ret;
+ return vgic_its_write_entry_lock(its, gpa, 0, cte_esz);
}
/*
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index f2486b4d9f95..309295f5e1b0 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -146,6 +146,29 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa,
return ret;
}
+static inline int vgic_its_read_entry_lock(struct vgic_its *its, gpa_t eaddr,
+ u64 *eval, unsigned long esize)
+{
+ struct kvm *kvm = its->dev->kvm;
+
+ if (KVM_BUG_ON(esize != sizeof(*eval), kvm))
+ return -EINVAL;
+
+ return kvm_read_guest_lock(kvm, eaddr, eval, esize);
+
+}
+
+static inline int vgic_its_write_entry_lock(struct vgic_its *its, gpa_t eaddr,
+ u64 eval, unsigned long esize)
+{
+ struct kvm *kvm = its->dev->kvm;
+
+ if (KVM_BUG_ON(esize != sizeof(eval), kvm))
+ return -EINVAL;
+
+ return vgic_write_guest_lock(kvm, eaddr, &eval, esize);
+}
+
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC