diff options
Diffstat (limited to 'tools/testing/selftests/kvm/lib')
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic.c) | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_private.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_private.h) | 25 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_v3.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.c) | 106 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c) | 30 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/handlers.S (renamed from tools/testing/selftests/kvm/lib/aarch64/handlers.S) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/processor.c (renamed from tools/testing/selftests/kvm/lib/aarch64/processor.c) | 377 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/spinlock.c (renamed from tools/testing/selftests/kvm/lib/aarch64/spinlock.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/ucall.c (renamed from tools/testing/selftests/kvm/lib/aarch64/ucall.c) | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/vgic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/vgic.c) | 100 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/assert.c | 8 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/elf.c | 19 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/guest_modes.c | 43 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/guest_sprintf.c | 18 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 849 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/loongarch/exception.S | 65 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/loongarch/processor.c | 402 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/loongarch/ucall.c | 38 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/lru_gen_util.c | 387 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/memstress.c | 44 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/handlers.S | 139 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/processor.c | 259 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c (renamed from tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c) | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/facility.c (renamed from tools/testing/selftests/kvm/lib/s390x/facility.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/processor.c (renamed from tools/testing/selftests/kvm/lib/s390x/processor.c) | 84 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/ucall.c (renamed from tools/testing/selftests/kvm/lib/s390x/ucall.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/sparsebit.c | 22 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/test_util.c | 79 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/ucall_common.c | 34 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/userfaultfd_util.c | 16 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/apic.c (renamed from tools/testing/selftests/kvm/lib/x86_64/apic.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/handlers.S (renamed from tools/testing/selftests/kvm/lib/x86_64/handlers.S) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/hyperv.c (renamed from tools/testing/selftests/kvm/lib/x86_64/hyperv.c) | 14 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/memstress.c (renamed from tools/testing/selftests/kvm/lib/x86_64/memstress.c) | 73 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/pmu.c | 80 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/processor.c (renamed from tools/testing/selftests/kvm/lib/x86_64/processor.c) | 588 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/sev.c (renamed from tools/testing/selftests/kvm/lib/x86_64/sev.c) | 88 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/svm.c (renamed from tools/testing/selftests/kvm/lib/x86_64/svm.c) | 44 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/ucall.c (renamed from tools/testing/selftests/kvm/lib/x86_64/ucall.c) | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/vmx.c (renamed from tools/testing/selftests/kvm/lib/x86_64/vmx.c) | 298 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/pmu.c | 31 |
40 files changed, 2881 insertions, 1523 deletions
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 7abbf8866512..011dfe1dfcb3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -50,7 +50,7 @@ static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) void gic_init(enum gic_type type, unsigned int nr_cpus) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); GUEST_ASSERT(type < GIC_TYPE_MAX); GUEST_ASSERT(nr_cpus); @@ -73,7 +73,7 @@ void gic_irq_disable(unsigned int intid) unsigned int gic_get_and_ack_irq(void) { - uint64_t irqstat; + u64 irqstat; unsigned int intid; GUEST_ASSERT(gic_common_ops); @@ -102,7 +102,7 @@ void gic_set_eoi_split(bool split) gic_common_ops->gic_set_eoi_split(split); } -void gic_set_priority_mask(uint64_t pmr) +void gic_set_priority_mask(u64 pmr) { GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_set_priority_mask(pmr); @@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge) GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_irq_set_config(intid, is_edge); } + +void gic_irq_set_group(unsigned int intid, bool group) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_group(intid, group); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index d24e9ecc96c6..6d393f5c5685 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -12,19 +12,20 @@ struct gic_common_ops { void (*gic_cpu_init)(unsigned int cpu); void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); - uint64_t (*gic_read_iar)(void); - void (*gic_write_eoir)(uint32_t irq); - void (*gic_write_dir)(uint32_t irq); + u64 (*gic_read_iar)(void); + void (*gic_write_eoir)(u32 irq); + void (*gic_write_dir)(u32 irq); void (*gic_set_eoi_split)(bool split); - void (*gic_set_priority_mask)(uint64_t mask); - void (*gic_set_priority)(uint32_t intid, uint32_t prio); - void (*gic_irq_set_active)(uint32_t intid); - void (*gic_irq_clear_active)(uint32_t intid); - bool (*gic_irq_get_active)(uint32_t intid); - void (*gic_irq_set_pending)(uint32_t intid); - void (*gic_irq_clear_pending)(uint32_t intid); - bool (*gic_irq_get_pending)(uint32_t intid); - void (*gic_irq_set_config)(uint32_t intid, bool is_edge); + void (*gic_set_priority_mask)(u64 mask); + void (*gic_set_priority)(u32 intid, u32 prio); + void (*gic_irq_set_active)(u32 intid); + void (*gic_irq_clear_active)(u32 intid); + bool (*gic_irq_get_active)(u32 intid); + void (*gic_irq_set_pending)(u32 intid); + void (*gic_irq_clear_pending)(u32 intid); + bool (*gic_irq_get_pending)(u32 intid); + void (*gic_irq_set_config)(u32 intid, bool is_edge); + void (*gic_irq_set_group)(u32 intid, bool group); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 66d05506f78b..a99a53accfe9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -50,13 +50,13 @@ static void gicv3_gicd_wait_for_rwp(void) } } -static inline volatile void *gicr_base_cpu(uint32_t cpu) +static inline volatile void *gicr_base_cpu(u32 cpu) { /* Align all the redistributors sequentially */ return GICR_BASE_GVA + cpu * SZ_64K * 2; } -static void gicv3_gicr_wait_for_rwp(uint32_t cpu) +static void gicv3_gicr_wait_for_rwp(u32 cpu) { unsigned int count = 100000; /* 1s */ @@ -66,7 +66,7 @@ static void gicv3_gicr_wait_for_rwp(uint32_t cpu) } } -static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) +static void gicv3_wait_for_rwp(u32 cpu_or_dist) { if (cpu_or_dist & DIST_BIT) gicv3_gicd_wait_for_rwp(); @@ -91,34 +91,34 @@ static enum gicv3_intid_range get_intid_range(unsigned int intid) return INVALID_RANGE; } -static uint64_t gicv3_read_iar(void) +static u64 gicv3_read_iar(void) { - uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); + u64 irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); dsb(sy); return irqstat; } -static void gicv3_write_eoir(uint32_t irq) +static void gicv3_write_eoir(u32 irq) { write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); isb(); } -static void gicv3_write_dir(uint32_t irq) +static void gicv3_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); } -static void gicv3_set_priority_mask(uint64_t mask) +static void gicv3_set_priority_mask(u64 mask) { write_sysreg_s(mask, SYS_ICC_PMR_EL1); } static void gicv3_set_eoi_split(bool split) { - uint32_t val; + u32 val; /* * All other fields are read-only, so no need to read CTLR first. In @@ -129,29 +129,29 @@ static void gicv3_set_eoi_split(bool split) isb(); } -uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) +u32 gicv3_reg_readl(u32 cpu_or_dist, u64 offset) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } -void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) +void gicv3_reg_writel(u32 cpu_or_dist, u64 offset, u32 reg_val) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } -uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) +u32 gicv3_getl_fields(u32 cpu_or_dist, u64 offset, u32 mask) { return gicv3_reg_readl(cpu_or_dist, offset) & mask; } -void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, - uint32_t mask, uint32_t reg_val) +void gicv3_setl_fields(u32 cpu_or_dist, u64 offset, + u32 mask, u32 reg_val) { - uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; + u32 tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; tmp |= (reg_val & mask); gicv3_reg_writel(cpu_or_dist, offset, tmp); @@ -165,14 +165,14 @@ void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being * marked as "Reserved" in the Distributor map. */ -static void gicv3_access_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, - bool write, uint32_t *val) +static void gicv3_access_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field, + bool write, u32 *val) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); enum gicv3_intid_range intid_range = get_intid_range(intid); - uint32_t fields_per_reg, index, mask, shift; - uint32_t cpu_or_dist; + u32 fields_per_reg, index, mask, shift; + u32 cpu_or_dist; GUEST_ASSERT(bits_per_field <= reg_bits); GUEST_ASSERT(!write || *val < (1U << bits_per_field)); @@ -197,32 +197,32 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset, *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; } -static void gicv3_write_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +static void gicv3_write_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field, u32 val) { gicv3_access_reg(intid, offset, reg_bits, bits_per_field, true, &val); } -static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field) +static u32 gicv3_read_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field) { - uint32_t val; + u32 val; gicv3_access_reg(intid, offset, reg_bits, bits_per_field, false, &val); return val; } -static void gicv3_set_priority(uint32_t intid, uint32_t prio) +static void gicv3_set_priority(u32 intid, u32 prio) { gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); } /* Sets the intid to be level-sensitive or edge-triggered. */ -static void gicv3_irq_set_config(uint32_t intid, bool is_edge) +static void gicv3_irq_set_config(u32 intid, bool is_edge) { - uint32_t val; + u32 val; /* N/A for private interrupts. */ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); @@ -230,57 +230,57 @@ static void gicv3_irq_set_config(uint32_t intid, bool is_edge) gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); } -static void gicv3_irq_enable(uint32_t intid) +static void gicv3_irq_enable(u32 intid) { bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); } -static void gicv3_irq_disable(uint32_t intid) +static void gicv3_irq_disable(u32 intid) { bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); } -static void gicv3_irq_set_active(uint32_t intid) +static void gicv3_irq_set_active(u32 intid) { gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); } -static void gicv3_irq_clear_active(uint32_t intid) +static void gicv3_irq_clear_active(u32 intid) { gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); } -static bool gicv3_irq_get_active(uint32_t intid) +static bool gicv3_irq_get_active(u32 intid) { return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); } -static void gicv3_irq_set_pending(uint32_t intid) +static void gicv3_irq_set_pending(u32 intid) { gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); } -static void gicv3_irq_clear_pending(uint32_t intid) +static void gicv3_irq_clear_pending(u32 intid) { gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); } -static bool gicv3_irq_get_pending(uint32_t intid) +static bool gicv3_irq_get_pending(u32 intid) { return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } static void gicv3_enable_redist(volatile void *redist_base) { - uint32_t val = readl(redist_base + GICR_WAKER); + u32 val = readl(redist_base + GICR_WAKER); unsigned int count = 100000; /* 1s */ val &= ~GICR_WAKER_ProcessorSleep; @@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base) } } +static void gicv3_set_group(u32 intid, bool grp) +{ + u32 cpu_or_dist; + u32 val; + + cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); + val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); + if (grp) + val |= BIT(intid % 32); + else + val &= ~BIT(intid % 32); + gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val); +} + static void gicv3_cpu_init(unsigned int cpu) { volatile void *sgi_base; unsigned int i; volatile void *redist_base_cpu; + u64 typer; GUEST_ASSERT(cpu < gicv3_data.nr_cpus); redist_base_cpu = gicr_base_cpu(cpu); sgi_base = sgi_base_from_redist(redist_base_cpu); + /* Verify assumption that GICR_TYPER.Processor_number == cpu */ + typer = readq_relaxed(redist_base_cpu + GICR_TYPER); + GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu); + gicv3_enable_redist(redist_base_cpu); /* @@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu) /* Set a default priority threshold */ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); + /* Disable Group-0 interrupts */ + write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1); /* Enable non-secure Group-1 interrupts */ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); } @@ -400,10 +421,11 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_clear_pending = gicv3_irq_clear_pending, .gic_irq_get_pending = gicv3_irq_get_pending, .gic_irq_set_config = gicv3_irq_set_config, + .gic_irq_set_group = gicv3_set_group, }; -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table) +void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size, + gpa_t pend_table) { volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..1188b578121d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -15,6 +15,8 @@ #include "gic_v3.h" #include "processor.h" +#define GITS_COLLECTION_TARGET_SHIFT 16 + static u64 its_read_u64(unsigned long offset) { return readq_relaxed(GITS_BASE_GVA + offset); @@ -52,7 +54,7 @@ static unsigned long its_find_baser(unsigned int type) return -1; } -static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +static void its_install_table(unsigned int type, gpa_t base, size_t size) { unsigned long offset = its_find_baser(type); u64 baser; @@ -67,7 +69,7 @@ static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) its_write_u64(offset, baser); } -static void its_install_cmdq(vm_paddr_t base, size_t size) +static void its_install_cmdq(gpa_t base, size_t size) { u64 cbaser; @@ -80,9 +82,8 @@ static void its_install_cmdq(vm_paddr_t base, size_t size) its_write_u64(GITS_CBASER, cbaser); } -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size) +void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl, + size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size) { u32 ctlr; @@ -163,6 +164,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + #define GITS_CMDQ_POLL_ITERATIONS 0 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) @@ -197,7 +203,7 @@ static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) } } -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base, size_t itt_size, bool valid) { struct its_cmd_block cmd = {}; @@ -217,7 +223,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val its_encode_cmd(&cmd, GITS_CMD_MAPC); its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); its_encode_valid(&cmd, valid); its_send_cmd(cmdq_base, &cmd); @@ -246,3 +252,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id) its_send_cmd(cmdq_base, &cmd); } + +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_SYNC); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S index 0e443eadfac6..0e443eadfac6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S +++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 698e34f39241..01325bf4d36f 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -12,52 +12,48 @@ #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" +#include "vgic.h" #include <linux/bitfield.h> #include <linux/sizes.h> #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +static u64 pgd_index(struct kvm_vm *vm, gva_t gva) { - return (v + vm->page_size) & ~(vm->page_size - 1); -} - -static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) -{ - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + u64 mask = (1UL << (vm->va_bits - shift)) - 1; return (gva >> shift) & mask; } -static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) +static u64 pud_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; - TEST_ASSERT(vm->pgtable_levels == 4, + TEST_ASSERT(vm->mmu.pgtable_levels == 4, "Mode %d does not have 4 page table levels", vm->mode); return (gva >> shift) & mask; } -static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) +static u64 pmd_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; - TEST_ASSERT(vm->pgtable_levels >= 3, + TEST_ASSERT(vm->mmu.pgtable_levels >= 3, "Mode %d does not have >= 3 page table levels", vm->mode); return (gva >> shift) & mask; } -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) +static u64 pte_index(struct kvm_vm *vm, gva_t gva) { - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; return (gva >> vm->page_shift) & mask; } @@ -67,137 +63,150 @@ static inline bool use_lpa2_pte_format(struct kvm_vm *vm) (vm->pa_bits > 48 || vm->va_bits > 48); } -static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) +static u64 addr_pte(struct kvm_vm *vm, u64 pa, u64 attrs) { - uint64_t pte; + u64 pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; return pte; } -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) +static u64 pte_addr(struct kvm_vm *vm, u64 pte) { - uint64_t pa; + u64 pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; } -static uint64_t ptrs_per_pgd(struct kvm_vm *vm) +static u64 ptrs_per_pgd(struct kvm_vm *vm) { - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; return 1 << (vm->va_bits - shift); } -static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) +static u64 __maybe_unused ptrs_per_pte(struct kvm_vm *vm) { return 1 << (vm->page_shift - 3); } void virt_arch_pgd_alloc(struct kvm_vm *vm) { - size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->mmu.pgd_created = true; } -static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t flags) +static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, + u64 flags) { - uint8_t attr_idx = flags & 7; - uint64_t *ptep; + u8 attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + u64 pg_attr; + u64 *ptep; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; + " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; break; default: TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, gpa, pg_attr); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - uint64_t attr_idx = MT_NORMAL; + u64 attr_idx = MT_NORMAL; - _virt_pg_map(vm, vaddr, paddr, attr_idx); + _virt_pg_map(vm, gva, gpa, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) { - uint64_t *ptep; + u64 *ptep; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -215,18 +224,23 @@ unmapped_gva: exit(EXIT_FAILURE); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep = virt_get_pte_hva(vm, gva); + u64 *ptep = virt_get_pte_hva(vm, gva); return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) { #ifdef DEBUG static const char * const type[] = { "", "pud", "pmd", "pte" }; - uint64_t pte, *ptep; + u64 pte, *ptep; if (level == 4) return; @@ -241,15 +255,15 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p #endif } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - int level = 4 - (vm->pgtable_levels - 1); - uint64_t pgd, *ptep; + int level = 4 - (vm->mmu.pgtable_levels - 1); + u64 pgd, *ptep; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { + for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; @@ -258,110 +272,136 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +bool vm_supports_el2(struct kvm_vm *vm) +{ + const char *value = getenv("NV"); + + if (value && *value == '0') + return false; + + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init preferred = {}; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + if (vm_supports_el2(vm)) + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + + *init = preferred; +} + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; - uint64_t sctlr_el1, tcr_el1, ttbr0_el1; + u64 sctlr_el1, tcr_el1, ttbr0_el1; - if (!init) + if (!init) { + kvm_get_default_vcpu_target(vm, &default_init); init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; } vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + vcpu->init = *init; /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); + ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift); /* Configure output size */ switch (vm->mode) { case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ - ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; + tcr_el1 |= TCR_IPS_52_BITS; + ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); + tcr_el1 |= TCR_TBI1; + tcr_el1 |= TCR_EPD1_MASK; if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); + + if (!vcpu_has_el2(vcpu)) + return; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { - uint64_t pstate, pc; + u64 pstate, pc; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); @@ -369,29 +409,29 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) { - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code); } -static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init) { size_t stack_size; - uint64_t stack_vaddr; + gva_t stack_gva; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_gva + stack_size); return vcpu; } -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init); @@ -401,7 +441,7 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { return __aarch64_vcpu_add(vm, vcpu_id, NULL); } @@ -418,13 +458,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) for (i = 0; i < num; i++) { vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]), - va_arg(ap, uint64_t)); + va_arg(ap, u64)); } va_end(ap); } -void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +void kvm_exit_unexpected_exception(int vector, u64 ec, bool valid_ec) { ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); while (1) @@ -457,7 +497,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (u64)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -495,10 +535,10 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size, + MEM_REGION_DATA); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, @@ -522,13 +562,13 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, handlers->exception_handlers[vector][0] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return read_sysreg(tpidr_el1); } -static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, - uint32_t not_sup_val, uint32_t ipa52_min_val) +static u32 max_ipa_for_page_size(u32 vm_ipa, u32 gran, + u32 not_sup_val, u32 ipa52_min_val) { if (gran == not_sup_val) return 0; @@ -538,16 +578,16 @@ static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, return min(vm_ipa, 48U); } -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k) +void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k, + u32 *ipa16k, u32 *ipa64k) { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; - uint64_t val; - uint32_t gran; + u64 val; + u32 gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), - .addr = (uint64_t)&val, + .addr = (u64)&val, }; kvm_fd = open_kvm_dev_path_or_exit(); @@ -565,15 +605,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, ID_AA64MMFR0_EL1_TGRAN64_IMP); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, ID_AA64MMFR0_EL1_TGRAN16_52_BIT); @@ -605,17 +645,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) +void smccc_hvc(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res) { __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, arg6, res); } -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) +void smccc_smc(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res) { __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, arg6, res); @@ -630,7 +670,7 @@ void kvm_selftest_arch_init(void) guest_modes_append_default(); } -void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +void vm_populate_gva_bitmap(struct kvm_vm *vm) { /* * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space @@ -645,3 +685,44 @@ void wfi(void) { asm volatile("wfi"); } + +static bool request_mte; +static bool request_vgic = true; + +void test_wants_mte(void) +{ + request_mte = true; +} + +void test_disable_default_vgic(void) +{ + request_vgic = false; +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + + if (request_vgic && kvm_supports_vgic_v3()) { + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); + vm->arch.has_gic = true; + } +} + +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + __vgic_v3_init(vm->arch.gic_fd); +} + +void kvm_arch_vm_release(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + close(vm->arch.gic_fd); +} + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c index a076e780be5d..a076e780be5d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c +++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index ddab0ce89d4d..e0550ad5aa75 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -6,17 +6,17 @@ */ #include "kvm_util.h" -vm_vaddr_t *ucall_exit_mmio_addr; +gva_t *ucall_exit_mmio_addr; -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { - vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); virt_map(vm, mmio_gva, mmio_gpa, 1); vm->ucall_mmio_addr = mmio_gpa; - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); + write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva); } void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) @@ -25,9 +25,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { - TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64), "Unexpected ucall exit mmio address access"); - return (void *)(*((uint64_t *)run->mmio.data)); + return (void *)(*((u64 *)run->mmio.data)); } return NULL; diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..4ecebf3146a2 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -15,6 +15,17 @@ #include "gic.h" #include "gic_v3.h" +bool kvm_supports_vgic_v3(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + kvm_vm_free(vm); + + return !r; +} + /* * vGIC-v3 default host setup * @@ -30,24 +41,11 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs) { int gic_fd; - uint64_t attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", - nr_vcpus, nr_vcpus_created); + u64 attr; + unsigned int nr_gic_pages; /* Distributor setup */ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); @@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); @@ -73,18 +68,47 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + return gic_fd; +} + +void __vgic_v3_init(int fd) +{ + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); +} - return gic_fd; +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs) +{ + unsigned int nr_vcpus_created = 0; + struct list_head *iter; + int fd; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); + if (fd < 0) + return fd; + + __vgic_v3_init(fd); + return fd; } /* should only work for level sensitive interrupts */ -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level) { - uint64_t attr = 32 * (intid / 32); - uint64_t index = intid % 32; - uint64_t val; + u64 attr = 32 * (intid / 32); + u64 index = intid % 32; + u64 val; int ret; ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, @@ -98,16 +122,16 @@ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) return ret; } -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +void kvm_irq_set_level_info(int gic_fd, u32 intid, int level) { int ret = _kvm_irq_set_level_info(gic_fd, intid, level); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret)); } -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level) { - uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; + u32 irq = intid & KVM_ARM_IRQ_NUM_MASK; TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " "doesn't allow injecting SGIs. There's no mask for it."); @@ -120,23 +144,23 @@ int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) return _kvm_irq_line(vm, irq, level); } -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level) { int ret = _kvm_arm_irq_line(vm, intid, level); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); } -static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, - uint64_t reg_off) +static void vgic_poke_irq(int gic_fd, u32 intid, struct kvm_vcpu *vcpu, + u64 reg_off) { - uint64_t reg = intid / 32; - uint64_t index = intid % 32; - uint64_t attr = reg_off + reg * 4; - uint64_t val; + u64 reg = intid / 32; + u64 index = intid % 32; + u64 attr = reg_off + reg * 4; + u64 val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + u32 group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; if (intid_is_private) { @@ -159,12 +183,12 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, kvm_device_attr_set(gic_fd, group, attr, &val); } -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); } -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index b49690658c60..8be0d09ecf0f 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -6,11 +6,14 @@ */ #include "test_util.h" -#include <execinfo.h> + #include <sys/syscall.h> #include "kselftest.h" +#ifdef __GLIBC__ +#include <execinfo.h> + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) @@ -57,6 +60,9 @@ static void test_dump_stack(void) system(cmd); #pragma GCC diagnostic pop } +#else +static void test_dump_stack(void) {} +#endif static pid_t _gettid(void) { diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index f34d926d9735..1924a9895834 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -7,7 +7,7 @@ #include "test_util.h" -#include <bits/endian.h> +#include <endian.h> #include <linux/elf.h> #include "kvm_util.h" @@ -156,21 +156,20 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment " "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, - n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); - vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; + n1, (u64)phdr.p_memsz); + gva_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); + gva_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, - MEM_REGION_CODE); - TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " + gva_t gva = __vm_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE); + TEST_ASSERT(gva == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" " seg_vstart: 0x%lx\n" - " vaddr: 0x%lx", - n1, seg_vstart, vaddr); - memset(addr_gva2hva(vm, vaddr), 0, seg_size); + " gva: 0x%lx", + n1, seg_vstart, gva); + memset(addr_gva2hva(vm, gva), 0, seg_size); /* TODO(lhuemill): Set permissions of each memory segment * based on the least-significant 3 bits of phdr.p_flags. */ diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index b04901e55138..7a96c43b5704 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -4,7 +4,7 @@ */ #include "guest_modes.h" -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(__riscv) #include "processor.h" enum vm_guest_mode vm_mode_default; #endif @@ -13,12 +13,14 @@ struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { -#ifndef __aarch64__ +#if !defined(__aarch64__) && !defined(__riscv) guest_mode_append(VM_MODE_DEFAULT, true); -#else +#endif + +#ifdef __aarch64__ { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - uint32_t ipa4k, ipa16k, ipa64k; + u32 ipa4k, ipa16k, ipa64k; int i; aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); @@ -74,11 +76,36 @@ void guest_modes_append_default(void) #ifdef __riscv { unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); + unsigned long satp_mode = riscv64_get_satp_mode() << SATP_MODE_SHIFT; + int i; - if (sz >= 52) - guest_mode_append(VM_MODE_P52V48_4K, true); - if (sz >= 48) - guest_mode_append(VM_MODE_P48V48_4K, true); + switch (sz) { + case 59: + guest_mode_append(VM_MODE_P56V57_4K, satp_mode >= SATP_MODE_57); + guest_mode_append(VM_MODE_P56V48_4K, satp_mode >= SATP_MODE_48); + guest_mode_append(VM_MODE_P56V39_4K, satp_mode >= SATP_MODE_39); + break; + case 50: + guest_mode_append(VM_MODE_P50V57_4K, satp_mode >= SATP_MODE_57); + guest_mode_append(VM_MODE_P50V48_4K, satp_mode >= SATP_MODE_48); + guest_mode_append(VM_MODE_P50V39_4K, satp_mode >= SATP_MODE_39); + break; + case 41: + guest_mode_append(VM_MODE_P41V57_4K, satp_mode >= SATP_MODE_57); + guest_mode_append(VM_MODE_P41V48_4K, satp_mode >= SATP_MODE_48); + guest_mode_append(VM_MODE_P41V39_4K, satp_mode >= SATP_MODE_39); + break; + default: + break; + } + + /* set the first supported mode as default */ + vm_mode_default = NUM_VM_MODES; + for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) { + if (guest_modes[i].supported && guest_modes[i].enabled) + vm_mode_default = i; + } + TEST_ASSERT(vm_mode_default != NUM_VM_MODES, "No supported mode!"); } #endif } diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index 74627514c4d4..7a33965349a7 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -35,8 +35,8 @@ static int skip_atoi(const char **s) ({ \ int __res; \ \ - __res = ((uint64_t) n) % (uint32_t) base; \ - n = ((uint64_t) n) / (uint32_t) base; \ + __res = ((u64)n) % (u32)base; \ + n = ((u64)n) / (u32)base; \ __res; \ }) @@ -119,7 +119,7 @@ int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) { char *str, *end; const char *s; - uint64_t num; + u64 num; int i, base; int len; @@ -216,7 +216,7 @@ repeat: while (--field_width > 0) APPEND_BUFFER_SAFE(str, end, ' '); APPEND_BUFFER_SAFE(str, end, - (uint8_t)va_arg(args, int)); + (u8)va_arg(args, int)); while (--field_width > 0) APPEND_BUFFER_SAFE(str, end, ' '); continue; @@ -240,7 +240,7 @@ repeat: flags |= SPECIAL | SMALL | ZEROPAD; } str = number(str, end, - (uint64_t)va_arg(args, void *), 16, + (u64)va_arg(args, void *), 16, field_width, precision, flags); continue; @@ -284,15 +284,15 @@ repeat: continue; } if (qualifier == 'l') - num = va_arg(args, uint64_t); + num = va_arg(args, u64); else if (qualifier == 'h') { - num = (uint16_t)va_arg(args, int); + num = (u16)va_arg(args, int); if (flags & SIGN) - num = (int16_t)num; + num = (s16)num; } else if (flags & SIGN) num = va_arg(args, int); else - num = va_arg(args, uint32_t); + num = va_arg(args, u32); str = number(str, end, num, base, field_width, precision, flags); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 480e3a40d197..e08967ef7b7b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -5,13 +5,14 @@ * Copyright (C) 2018, Google LLC. */ #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" #include <assert.h> #include <sched.h> -#include <sys/mman.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -19,21 +20,33 @@ #define KVM_UTIL_MIN_PFN 2 -uint32_t guest_random_seed; +u32 guest_random_seed; struct guest_random_state guest_rng; -static uint32_t last_guest_seed; +static u32 last_guest_seed; -static int vcpu_mmap_sz(void); +static size_t vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); - TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); + if (fd < 0) + goto error; return fd; + +error: + if (errno == EACCES || errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -47,7 +60,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) @@ -63,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param, ssize_t bytes_read; int fd, r; + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); TEST_ASSERT(r < path_size, @@ -79,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param, return bytes_read; } -static int get_module_param_integer(const char *module_name, const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { /* * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the @@ -103,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param) return atoi_paranoid(value); } -static bool get_module_param_bool(const char *module_name, const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { char value; ssize_t r; @@ -119,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param) TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } -bool get_kvm_param_bool(const char *param) -{ - return get_module_param_bool("kvm", param); -} - -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); -} - -bool get_kvm_amd_param_bool(const char *param) -{ - return get_module_param_bool("kvm_amd", param); -} - -int get_kvm_param_integer(const char *param) -{ - return get_module_param_integer("kvm", param); -} - -int get_kvm_intel_param_integer(const char *param) -{ - return get_module_param_integer("kvm_intel", param); -} - -int get_kvm_amd_param_integer(const char *param) -{ - return get_module_param_integer("kvm_amd", param); -} - /* * Capability * @@ -179,7 +165,7 @@ unsigned int kvm_check_cap(long cap) return (unsigned int)ret; } -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) +void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size) { if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); @@ -196,9 +182,14 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } -const char *vm_guest_mode_string(uint32_t i) +const char *vm_guest_mode_string(u32 i) { static const char * const strings[] = { [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", @@ -210,13 +201,23 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", - [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages", [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", + [VM_MODE_P56V57_4K] = "PA-bits:56, VA-bits:57, 4K pages", + [VM_MODE_P56V48_4K] = "PA-bits:56, VA-bits:48, 4K pages", + [VM_MODE_P56V39_4K] = "PA-bits:56, VA-bits:39, 4K pages", + [VM_MODE_P50V57_4K] = "PA-bits:50, VA-bits:57, 4K pages", + [VM_MODE_P50V48_4K] = "PA-bits:50, VA-bits:48, 4K pages", + [VM_MODE_P50V39_4K] = "PA-bits:50, VA-bits:39, 4K pages", + [VM_MODE_P41V57_4K] = "PA-bits:41, VA-bits:57, 4K pages", + [VM_MODE_P41V48_4K] = "PA-bits:41, VA-bits:48, 4K pages", + [VM_MODE_P41V39_4K] = "PA-bits:41, VA-bits:39, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -236,13 +237,23 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, - [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 }, [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, + [VM_MODE_P56V57_4K] = { 56, 57, 0x1000, 12 }, + [VM_MODE_P56V48_4K] = { 56, 48, 0x1000, 12 }, + [VM_MODE_P56V39_4K] = { 56, 39, 0x1000, 12 }, + [VM_MODE_P50V57_4K] = { 50, 57, 0x1000, 12 }, + [VM_MODE_P50V48_4K] = { 50, 48, 0x1000, 12 }, + [VM_MODE_P50V39_4K] = { 50, 39, 0x1000, 12 }, + [VM_MODE_P41V57_4K] = { 41, 57, 0x1000, 12 }, + [VM_MODE_P41V48_4K] = { 41, 48, 0x1000, 12 }, + [VM_MODE_P41V39_4K] = { 41, 39, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -256,7 +267,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * based on the MSB of the VA. On architectures with this behavior * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. */ -__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +__weak void vm_populate_gva_bitmap(struct kvm_vm *vm) { sparsebit_set_num(vm->vpages_valid, 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); @@ -288,59 +299,77 @@ struct kvm_vm *____vm_create(struct vm_shape shape) /* Setup mode specific traits. */ switch (vm->mode) { case VM_MODE_P52V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P52V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P48V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P48V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; + case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: #ifdef __x86_64__ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); kvm_init_vm_address_properties(vm); - /* - * Ignore KVM support for 5-level paging (vm->va_bits == 57), - * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this mode (48-bit virtual address space). - */ - TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, - "Linear address width (%d bits) not supported", - vm->va_bits); + pr_debug("Guest physical address width detected: %d\n", vm->pa_bits); - vm->pgtable_levels = 4; - vm->va_bits = 48; + pr_debug("Guest virtual address width detected: %d\n", + vm->va_bits); + + if (vm->va_bits == 57) { + vm->mmu.pgtable_levels = 5; + } else { + TEST_ASSERT(vm->va_bits == 48, + "Unexpected guest virtual address width: %d", + vm->va_bits); + vm->mmu.pgtable_levels = 4; + } #else - TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); + TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; break; case VM_MODE_P44V64_4K: - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; + break; + case VM_MODE_P56V57_4K: + case VM_MODE_P50V57_4K: + case VM_MODE_P41V57_4K: + vm->mmu.pgtable_levels = 5; + break; + case VM_MODE_P56V48_4K: + case VM_MODE_P50V48_4K: + case VM_MODE_P41V48_4K: + vm->mmu.pgtable_levels = 4; + break; + case VM_MODE_P56V39_4K: + case VM_MODE_P50V39_4K: + case VM_MODE_P41V39_4K: + vm->mmu.pgtable_levels = 3; break; default: TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); @@ -356,7 +385,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape) /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); - vm_vaddr_populate_bitmap(vm); + vm_populate_gva_bitmap(vm); /* Limit physical addresses to PA-bits. */ vm->max_gfn = vm_compute_max_gfn(vm); @@ -367,12 +396,12 @@ struct kvm_vm *____vm_create(struct vm_shape shape) return vm; } -static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, - uint32_t nr_runnable_vcpus, - uint64_t extra_mem_pages) +static u64 vm_nr_pages_required(enum vm_guest_mode mode, + u32 nr_runnable_vcpus, + u64 extra_mem_pages) { - uint64_t page_size = vm_guest_mode_params[mode].page_size; - uint64_t nr_pages; + u64 page_size = vm_guest_mode_params[mode].page_size; + u64 nr_pages; TEST_ASSERT(nr_runnable_vcpus, "Use vm_create_barebones() for VMs that _never_ have vCPUs"); @@ -406,21 +435,72 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages) +void kvm_set_files_rlimit(u32 nr_vcpus) +{ + /* + * Each vCPU will open two file descriptors: the vCPU itself and the + * vCPU's binary stats file descriptor. Add an arbitrary amount of + * buffer for all other files a test may open. + */ + int nr_fds_wanted = nr_vcpus * 2 + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + +static bool is_guest_memfd_required(struct vm_shape shape) +{ +#ifdef __x86_64__ + return shape.type == KVM_X86_SNP_VM; +#else + return false; +#endif +} + +struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus, + u64 nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, + u64 nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; - int i; + int i, flags; + + kvm_set_files_rlimit(nr_runnable_vcpus); pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, vm_guest_mode_string(shape.mode), shape.type, nr_pages); vm = ____vm_create(shape); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + /* + * Force GUEST_MEMFD for the primary memory region if necessary, e.g. + * for CoCo VMs that require GUEST_MEMFD backed private memory. + */ + flags = 0; + if (is_guest_memfd_required(shape)) + flags |= KVM_MEM_GUEST_MEMFD; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags); for (i = 0; i < NR_MEM_REGIONS; i++) vm->memslots[i] = 0; @@ -442,7 +522,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); return vm; } @@ -466,8 +546,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus, + u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { struct kvm_vm *vm; @@ -480,12 +560,13 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code) { struct kvm_vcpu *vcpus[1]; @@ -533,7 +614,7 @@ void kvm_vm_restart(struct kvm_vm *vmp) } __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) + u32 vcpu_id) { return __vm_vcpu_add(vm, vcpu_id); } @@ -545,20 +626,19 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } -static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +static u32 parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) { - uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + u32 pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), "Not allowed to run on pCPU '%d', check cgroups?", pcpu); @@ -582,7 +662,7 @@ void kvm_print_vcpu_pinning_help(void) " (default: no pinning)\n", name, name); } -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], +void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[], int nr_vcpus) { cpu_set_t allowed_mask; @@ -607,7 +687,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } @@ -635,15 +715,15 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], * region exists. */ static struct userspace_mem_region * -userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) +userspace_mem_region_find(struct kvm_vm *vm, u64 start, u64 end) { struct rb_node *node; for (node = vm->regions.gpa_tree.rb_node; node; ) { struct userspace_mem_region *region = container_of(node, struct userspace_mem_region, gpa_node); - uint64_t existing_start = region->region.guest_phys_addr; - uint64_t existing_end = region->region.guest_phys_addr + u64 existing_start = region->region.guest_phys_addr; + u64 existing_end = region->region.guest_phys_addr + region->region.memory_size - 1; if (start <= existing_end && end >= existing_start) return region; @@ -657,6 +737,20 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } +static void kvm_stats_release(struct kvm_binary_stats *stats) +{ + if (stats->fd < 0) + return; + + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } + + kvm_close(stats->fd); + stats->fd = -1; +} + __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -676,19 +770,15 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) */ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - int ret; - if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); - ret = close(vcpu->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vcpu->fd); + kvm_stats_release(&vcpu->stats); list_del(&vcpu->list); @@ -699,35 +789,32 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) void kvm_vm_release(struct kvm_vm *vmp) { struct kvm_vcpu *vcpu, *tmp; - int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) vm_vcpu_rm(vmp, vcpu); - ret = close(vmp->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vmp->fd); + kvm_close(vmp->kvm_fd); - ret = close(vmp->kvm_fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + /* Free cached stats metadata and close FD */ + kvm_stats_release(&vmp->stats); + + kvm_arch_vm_release(vmp); } static void __vm_mem_region_delete(struct kvm_vm *vm, struct userspace_mem_region *region) { - int ret; - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } if (region->region.guest_memfd >= 0) @@ -748,12 +835,6 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region); @@ -771,7 +852,7 @@ void kvm_vm_free(struct kvm_vm *vmp) int kvm_memfd_alloc(size_t size, bool hugepages) { int memfd_flags = MFD_CLOEXEC; - int fd, r; + int fd; if (hugepages) memfd_flags |= MFD_HUGETLB; @@ -779,11 +860,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages) fd = memfd_create("kvm_selftest", memfd_flags); TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); - r = ftruncate(fd, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); - - r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_ftruncate(fd, size); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); return fd; } @@ -840,8 +918,8 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, } -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva) +int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva) { struct kvm_userspace_memory_region region = { .slot = slot, @@ -854,8 +932,8 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); } -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva) +void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva) { int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); @@ -867,9 +945,9 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset) +int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva, + u32 guest_memfd, u64 guest_memfd_offset) { struct kvm_userspace_memory_region2 region = { .slot = slot, @@ -886,9 +964,9 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); } -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset) +void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva, + u32 guest_memfd, u64 guest_memfd_offset) { int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, guest_memfd, guest_memfd_offset); @@ -900,14 +978,14 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) + gpa_t gpa, u32 slot, u64 npages, u32 flags, + int guest_memfd, u64 guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); size_t mem_size = npages * vm->page_size; - size_t alignment; + size_t alignment = 1; TEST_REQUIRE_SET_USER_MEMORY_REGION2(); @@ -915,32 +993,31 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); - TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" - " guest_paddr: 0x%lx vm->page_size: 0x%x", - guest_paddr, vm->page_size); - TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + " gpa: 0x%lx vm->page_size: 0x%x", + gpa, vm->page_size); + TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1) <= vm->max_gfn, "Physical range beyond maximum " "supported physical address,\n" - " guest_paddr: 0x%lx npages: 0x%lx\n" + " gpa: 0x%lx npages: 0x%lx\n" " vm->max_gfn: 0x%lx vm->page_size: 0x%x", - guest_paddr, npages, vm->max_gfn, vm->page_size); + gpa, npages, vm->max_gfn, vm->page_size); /* * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); + vm, gpa, (gpa + npages * vm->page_size) - 1); if (region != NULL) TEST_FAIL("overlapping userspace_mem_region already " "exists\n" - " requested guest_paddr: 0x%lx npages: 0x%lx " - "page_size: 0x%x\n" - " existing guest_paddr: 0x%lx size: 0x%lx", - guest_paddr, npages, vm->page_size, - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size); + " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n" + " existing gpa: 0x%lx size: 0x%lx", + gpa, npages, vm->page_size, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size); /* Confirm no region with the requested slot already exists. */ hash_for_each_possible(vm->regions.slot_hash, region, slot_node, @@ -950,12 +1027,11 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_FAIL("A mem region with the requested slot " "already exists.\n" - " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" - " existing slot: %u paddr: 0x%lx size: 0x%lx", - slot, guest_paddr, npages, - region->region.slot, - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size); + " requested slot: %u gpa: 0x%lx npages: 0x%lx\n" + " existing slot: %u gpa: 0x%lx size: 0x%lx", + slot, gpa, npages, region->region.slot, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size); } /* Allocate and initialize new mem region structure. */ @@ -963,13 +1039,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_ASSERT(region != NULL, "Insufficient Memory"); region->mmap_size = mem_size; -#ifdef __s390x__ - /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ - alignment = 0x100000; -#else - alignment = 1; -#endif - /* * When using THP mmap is not guaranteed to returned a hugepage aligned * address so we have to pad the mmap. Padding is not needed for HugeTLB @@ -979,7 +1048,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -990,12 +1059,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1019,7 +1085,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (flags & KVM_MEM_GUEST_MEMFD) { if (guest_memfd < 0) { - uint32_t guest_memfd_flags = 0; + u32 guest_memfd_flags = 0; TEST_ASSERT(!guest_memfd_offset, "Offset must be zero when creating new guest_memfd"); guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); @@ -1030,8 +1096,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, * needing to track if the fd is owned by the framework * or by the caller. */ - guest_memfd = dup(guest_memfd); - TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + guest_memfd = kvm_dup(guest_memfd); } region->region.guest_memfd = guest_memfd; @@ -1043,20 +1108,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->unused_phy_pages = sparsebit_alloc(); if (vm_arch_has_protected_memory(vm)) region->protected_phy_pages = sparsebit_alloc(); - sparsebit_set_num(region->unused_phy_pages, - guest_paddr >> vm->page_shift, npages); + sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages); region->region.slot = slot; region->region.flags = flags; - region->region.guest_phys_addr = guest_paddr; + region->region.guest_phys_addr = gpa; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", - ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size, + " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d", + ret, errno, slot, flags, gpa, region->region.memory_size, region->region.guest_memfd); /* Add to quick lookup data structures */ @@ -1066,12 +1129,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1080,10 +1141,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, - uint64_t npages, uint32_t flags) + gpa_t gpa, u32 slot, u64 npages, u32 flags) { - vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); + vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); } /* @@ -1102,7 +1162,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * memory slot ID). */ struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot) +memslot2region(struct kvm_vm *vm, u32 memslot) { struct userspace_mem_region *region; @@ -1133,7 +1193,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) * Sets the flags of the memory region specified by the value of slot, * to the values given by flags. */ -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) +void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags) { int ret; struct userspace_mem_region *region; @@ -1149,6 +1209,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } +void vm_mem_region_reload(struct kvm_vm *vm, u32 slot) +{ + struct userspace_mem_region *region = memslot2region(vm, slot); + struct kvm_userspace_memory_region2 tmp = region->region; + + tmp.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); +} + /* * VM Memory Region Move * @@ -1163,7 +1233,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) * * Change the gpa of a memory region. */ -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) +void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa) { struct userspace_mem_region *region; int ret; @@ -1192,7 +1262,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) * * Delete a memory region. */ -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) +void vm_mem_region_delete(struct kvm_vm *vm, u32 slot) { struct userspace_mem_region *region = memslot2region(vm, slot); @@ -1202,18 +1272,18 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) __vm_mem_region_delete(vm, region); } -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, +void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 base, u64 size, bool punch_hole) { const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); struct userspace_mem_region *region; - uint64_t end = base + size; - uint64_t gpa, len; + u64 end = base + size; + gpa_t gpa, len; off_t fd_offset; int ret; for (gpa = base; gpa < end; gpa += len) { - uint64_t offset; + u64 offset; region = userspace_mem_region_find(vm, gpa, gpa); TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, @@ -1221,7 +1291,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, offset = gpa - region->region.guest_phys_addr; fd_offset = region->region.guest_memfd_offset + offset; - len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); + len = min_t(u64, end - gpa, region->region.memory_size - offset); ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", @@ -1231,14 +1301,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1246,7 +1316,7 @@ static int vcpu_mmap_sz(void) return ret; } -static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) +static bool vcpu_exists(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu; @@ -1262,7 +1332,7 @@ static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. * No additional vCPU setup is done. Returns the vCPU. */ -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu; @@ -1279,12 +1349,15 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vcpu->stats.fd = vcpu_get_stats_fd(vcpu); + else + vcpu->stats.fd = -1; /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); @@ -1293,33 +1366,18 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) } /* - * VM Virtual Address Unused Gap - * - * Input Args: - * vm - Virtual Machine - * sz - Size (bytes) - * vaddr_min - Minimum Virtual Address - * - * Output Args: None - * - * Return: - * Lowest virtual address at or below vaddr_min, with at least - * sz unused bytes. TEST_ASSERT failure if no area of at least - * size sz is available. - * - * Within the VM specified by vm, locates the lowest starting virtual - * address >= vaddr_min, that has at least sz unallocated bytes. A + * Within the VM specified by @vm, locates the lowest starting guest virtual + * address >= @min_gva, that has at least @sz unallocated bytes. A * TEST_ASSERT failure occurs for invalid input or no area of at least - * sz unallocated bytes >= vaddr_min is available. + * @sz unallocated bytes >= @min_gva is available. */ -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva) { - uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; + u64 pages = (sz + vm->page_size - 1) >> vm->page_shift; /* Determine lowest permitted virtual page index. */ - uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; - if ((pgidx_start * vm->page_size) < vaddr_min) + u64 pgidx_start = (min_gva + vm->page_size - 1) >> vm->page_shift; + if ((pgidx_start * vm->page_size) < min_gva) goto no_va_found; /* Loop over section with enough valid virtual page indexes. */ @@ -1356,7 +1414,7 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, } while (pgidx_start != 0); no_va_found: - TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); + TEST_FAIL("No gva of specified pages available, pages: 0x%lx", pages); /* NOT REACHED */ return -1; @@ -1378,148 +1436,91 @@ va_found: return pgidx_start * vm->page_size; } -static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type, - bool protected) +static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type, bool protected) { - uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); + u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); - vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, - vm->memslots[type], protected); + gpa_t gpa = __vm_phy_pages_alloc(vm, pages, + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type], protected); /* * Find an unused range of virtual page addresses of at least * pages in length. */ - vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); + gva_t gva_start = vm_unused_gva_gap(vm, sz, min_gva); /* Map the virtual pages. */ - for (vm_vaddr_t vaddr = vaddr_start; pages > 0; - pages--, vaddr += vm->page_size, paddr += vm->page_size) { - - virt_pg_map(vm, vaddr, paddr); + for (gva_t gva = gva_start; pages > 0; + pages--, gva += vm->page_size, gpa += vm->page_size) { - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + virt_pg_map(vm, gva, gpa); } - return vaddr_start; + return gva_start; } -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type) { - return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, - vm_arch_has_protected_memory(vm)); + return ____vm_alloc(vm, sz, min_gva, type, + vm_arch_has_protected_memory(vm)); } -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type) { - return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false); + return ____vm_alloc(vm, sz, min_gva, type, false); } /* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. The allocated physical space comes from the TEST_DATA memory region. + * Allocates at least sz bytes within the virtual address space of the VM + * given by @vm. The allocated bytes are mapped to a virtual address >= the + * address given by @min_gva. Note that each allocation uses a a unique set + * of pages, with the minimum real allocation being at least a page. The + * allocated physical space comes from the TEST_DATA memory region. */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva) { - return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); + return __vm_alloc(vm, sz, min_gva, MEM_REGION_TEST_DATA); } -/* - * VM Virtual Address Allocate Pages - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least N system pages worth of bytes within the virtual address - * space of the vm. - */ -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) +gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages) { - return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); + return vm_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) { - return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); + return __vm_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); } -/* - * VM Virtual Address Allocate Page - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least one system page worth of bytes within the virtual address - * space of the vm. - */ -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) +gva_t vm_alloc_page(struct kvm_vm *vm) { - return vm_vaddr_alloc_pages(vm, 1); + return vm_alloc_pages(vm, 1); } /* - * Map a range of VM virtual address to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * vaddr - Virtuall address to map - * paddr - VM Physical Address - * npages - The number of pages to map + * Map a range of VM virtual address to the VM's physical address. * - * Output Args: None - * - * Return: None - * - * Within the VM given by @vm, creates a virtual translation for - * @npages starting at @vaddr to the page range starting at @paddr. + * Within the VM given by @vm, creates a virtual translation for @npages + * starting at @gva to the page range starting at @gpa. */ -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages) +void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, unsigned int npages) { size_t page_size = vm->page_size; size_t size = npages * page_size; - TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + TEST_ASSERT(gva + size > gva, "Vaddr overflow"); + TEST_ASSERT(gpa + size > gpa, "Paddr overflow"); while (npages--) { - virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + virt_pg_map(vm, gva, gpa); - vaddr += page_size; - paddr += page_size; + gva += page_size; + gpa += page_size; } } @@ -1540,7 +1541,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, * address providing the memory to the vm physical address is returned. * A TEST_ASSERT failure occurs if no region containing gpa exists. */ -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) +void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa) { struct userspace_mem_region *region; @@ -1573,7 +1574,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) * VM physical address is returned. A TEST_ASSERT failure occurs if no * region containing hva exists. */ -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) +gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva) { struct rb_node *node; @@ -1584,7 +1585,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) if (hva >= region->host_mem) { if (hva <= (region->host_mem + region->region.memory_size - 1)) - return (vm_paddr_t)((uintptr_t) + return (gpa_t)((uintptr_t) region->region.guest_phys_addr + (hva - (uintptr_t)region->host_mem)); @@ -1616,7 +1617,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) * memory without mapping said memory in the guest's address space. And, for * userfaultfd-based demand paging, to do so without triggering userfaults. */ -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) +void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa) { struct userspace_mem_region *region; uintptr_t offset; @@ -1635,7 +1636,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } @@ -1648,7 +1660,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu) rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vcpu); + if (!rc) + assert_on_unhandled_exception(vcpu); return rc; } @@ -1698,8 +1711,8 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - uint32_t page_size = getpagesize(); - uint32_t size = vcpu->vm->dirty_ring_size; + u32 page_size = getpagesize(); + u32 size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); @@ -1714,9 +1727,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); @@ -1729,7 +1741,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) * Device Ioctl */ -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr) { struct kvm_device_attr attribute = { .group = group, @@ -1740,7 +1752,7 @@ int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); } -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) +int __kvm_test_create_device(struct kvm_vm *vm, u64 type) { struct kvm_create_device create_dev = { .type = type, @@ -1750,7 +1762,7 @@ int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); } -int __kvm_create_device(struct kvm_vm *vm, uint64_t type) +int __kvm_create_device(struct kvm_vm *vm, u64 type) { struct kvm_create_device create_dev = { .type = type, @@ -1764,7 +1776,7 @@ int __kvm_create_device(struct kvm_vm *vm, uint64_t type) return err ? : create_dev.fd; } -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) +int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1776,7 +1788,7 @@ int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) +int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1792,7 +1804,7 @@ int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) * IRQ related functions. */ -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level) { struct kvm_irq_level irq_level = { .irq = irq, @@ -1802,7 +1814,7 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); } -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level) { int ret = _kvm_irq_line(vm, irq, level); @@ -1824,7 +1836,7 @@ struct kvm_irq_routing *kvm_gsi_routing_create(void) } void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin) + u32 gsi, u32 pin) { int i; @@ -1874,7 +1886,7 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) * Dumps the current state of the VM given by vm, to the FILE stream * given by stream. */ -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { int ctr; struct userspace_mem_region *region; @@ -1887,8 +1899,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " "host_virt: %p\n", indent + 2, "", - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size, region->host_mem); fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); sparsebit_dump(stream, region->unused_phy_pages, 0); @@ -1900,8 +1912,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); sparsebit_dump(stream, vm->vpages_mapped, indent + 2); fprintf(stream, "%*spgd_created: %u\n", indent, "", - vm->pgd_created); - if (vm->pgd_created) { + vm->mmu.pgd_created); + if (vm->mmu.pgd_created) { fprintf(stream, "%*sVirtual Translation Tables:\n", indent + 2, ""); virt_dump(stream, vm, indent + 4); @@ -1957,9 +1969,9 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), + KVM_EXIT_STRING(ARM_SEA), }; /* @@ -1995,7 +2007,7 @@ const char *exit_reason_str(unsigned int exit_reason) * Input Args: * vm - Virtual Machine * num - number of pages - * paddr_min - Physical address minimum + * min_gpa - Physical address minimum * memslot - Memory region to allocate page from * protected - True if the pages will be used as protected/private memory * @@ -2005,29 +2017,29 @@ const char *exit_reason_str(unsigned int exit_reason) * Starting physical address * * Within the VM specified by vm, locates a range of available physical - * pages at or above paddr_min. If found, the pages are marked as in use + * pages at or above min_gpa. If found, the pages are marked as in use * and their base address is returned. A TEST_ASSERT failure occurs if - * not enough pages are available at or above paddr_min. + * not enough pages are available at or above min_gpa. */ -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected) +gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + gpa_t min_gpa, u32 memslot, + bool protected) { struct userspace_mem_region *region; sparsebit_idx_t pg, base; TEST_ASSERT(num > 0, "Must allocate at least one page"); - TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " + TEST_ASSERT((min_gpa % vm->page_size) == 0, "Min physical address " "not divisible by page size.\n" - " paddr_min: 0x%lx page_size: 0x%x", - paddr_min, vm->page_size); + " min_gpa: 0x%lx page_size: 0x%x", + min_gpa, vm->page_size); region = memslot2region(vm, memslot); TEST_ASSERT(!protected || region->protected_phy_pages, "Region doesn't support protected memory"); - base = pg = paddr_min >> vm->page_shift; + base = pg = min_gpa >> vm->page_shift; do { for (; pg < base + num; ++pg) { if (!sparsebit_is_set(region->unused_phy_pages, pg)) { @@ -2039,8 +2051,8 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, if (pg == 0) { fprintf(stderr, "No guest physical page available, " - "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", - paddr_min, vm->page_size, memslot); + "min_gpa: 0x%lx page_size: 0x%x memslot: %u\n", + min_gpa, vm->page_size, memslot); fputs("---- vm dump ----\n", stderr); vm_dump(stderr, vm, 2); abort(); @@ -2055,13 +2067,12 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, return base * vm->page_size; } -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot) +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot) { - return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); + return vm_phy_pages_alloc(vm, 1, min_gpa, memslot); } -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) +gpa_t vm_alloc_page_table(struct kvm_vm *vm) { return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, vm->memslots[MEM_REGION_PT]); @@ -2079,7 +2090,7 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) * Return: * Equivalent host virtual address */ -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) +void *addr_gva2hva(struct kvm_vm *vm, gva_t gva) { return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } @@ -2177,7 +2188,7 @@ struct kvm_stats_desc *read_stats_descriptors(int stats_fd, * Read the data values of a specified stat from the binary stats interface. */ void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, + struct kvm_stats_desc *desc, u64 *data, size_t max_elements) { size_t nr_elements = min_t(ssize_t, desc->size, max_elements); @@ -2197,49 +2208,42 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, desc->name, size, ret); } -/* - * Read the data of the named stat - * - * Input Args: - * vm - the VM for which the stat should be read - * stat_name - the name of the stat to read - * max_elements - the maximum number of 8-byte values to read into data - * - * Output Args: - * data - the buffer into which stat data should be read - * - * Read the data values of a specified stat from the binary stats interface. - */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements) +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + u64 *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_fd) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); - - break; + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); + return; } + + TEST_FAIL("Unable to find stat '%s'", name); +} + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ } -__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ +} + +__weak void kvm_arch_vm_release(struct kvm_vm *vm) { } @@ -2247,18 +2251,42 @@ __weak void kvm_selftest_arch_init(void) { } +static void report_unexpected_signal(int signum) +{ +#define KVM_CASE_SIGNUM(sig) \ + case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum) + + switch (signum) { + KVM_CASE_SIGNUM(SIGBUS); + KVM_CASE_SIGNUM(SIGSEGV); + KVM_CASE_SIGNUM(SIGILL); + KVM_CASE_SIGNUM(SIGFPE); + default: + TEST_FAIL("Unexpected signal %d\n", signum); + } +} + void __attribute((constructor)) kvm_selftest_init(void) { + struct sigaction sig_sa = { + .sa_handler = report_unexpected_signal, + }; + /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + sigaction(SIGBUS, &sig_sa, NULL); + sigaction(SIGSEGV, &sig_sa, NULL); + sigaction(SIGILL, &sig_sa, NULL); + sigaction(SIGFPE, &sig_sa, NULL); + guest_random_seed = last_guest_seed = random(); pr_info("Random seed: 0x%x\n", guest_random_seed); kvm_selftest_arch_init(); } -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa) { sparsebit_idx_t pg = 0; struct userspace_mem_region *region; @@ -2266,9 +2294,14 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) if (!vm_arch_has_protected_memory(vm)) return false; - region = userspace_mem_region_find(vm, paddr, paddr); - TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr); + region = userspace_mem_region_find(vm, gpa, gpa); + TEST_ASSERT(region, "No vm physical memory at 0x%lx", gpa); - pg = paddr >> vm->page_shift; + pg = gpa >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S new file mode 100644 index 000000000000..3f1e4b67c5ae --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "processor.h" + +/* address of refill exception should be 4K aligned */ +.balign 4096 +.global handle_tlb_refill +handle_tlb_refill: + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 + lddir t0, t0, 1 + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn + + /* + * save and restore all gprs except base register, + * and default value of base register is sp ($r3). + */ +.macro save_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\n, \base, 8 * \n + .endr +.endm + +.macro restore_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\n, \base, 8 * \n + .endr +.endm + +/* address of general exception should be 4K aligned */ +.balign 4096 +.global handle_exception +handle_exception: + csrwr sp, LOONGARCH_CSR_KS0 + csrrd sp, LOONGARCH_CSR_KS1 + addi.d sp, sp, -EXREGS_SIZE + + save_gprs sp + /* save sp register to stack */ + csrrd t0, LOONGARCH_CSR_KS0 + st.d t0, sp, 3 * 8 + + csrrd t0, LOONGARCH_CSR_ERA + st.d t0, sp, PC_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_ESTAT + st.d t0, sp, ESTAT_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_BADV + st.d t0, sp, BADV_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_PRMD + st.d t0, sp, PRMD_OFFSET_EXREGS + + or a0, sp, zero + bl route_exception + ld.d t0, sp, PC_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_ERA + ld.d t0, sp, PRMD_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_PRMD + restore_gprs sp + csrrd sp, LOONGARCH_CSR_KS0 + ertn diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c new file mode 100644 index 000000000000..64d91fb76522 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <linux/compiler.h> + +#include <asm/kvm.h> +#include "kvm_util.h" +#include "pmu.h" +#include "processor.h" +#include "ucall_common.h" + +#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 +#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 + +static gpa_t invalid_pgtable[4]; +static gva_t exception_handlers; + +static u64 virt_pte_index(struct kvm_vm *vm, gva_t gva, int level) +{ + unsigned int shift; + u64 mask; + + shift = level * (vm->page_shift - 3) + vm->page_shift; + mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> shift) & mask; +} + +static u64 pte_addr(struct kvm_vm *vm, u64 entry) +{ + return entry & ~((0x1UL << vm->page_shift) - 1); +} + +static u64 ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +static void virt_set_pgtable(struct kvm_vm *vm, gpa_t table, gpa_t child) +{ + u64 *ptep; + int i, ptrs_per_pte; + + ptep = addr_gpa2hva(vm, table); + ptrs_per_pte = 1 << (vm->page_shift - 3); + for (i = 0; i < ptrs_per_pte; i++) + WRITE_ONCE(*(ptep + i), child); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + int i; + gpa_t child, table; + + if (vm->mmu.pgd_created) + return; + + child = table = 0; + for (i = 0; i < vm->mmu.pgtable_levels; i++) { + invalid_pgtable[i] = child; + table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, + vm->memslots[MEM_REGION_PT]); + TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i); + virt_set_pgtable(vm, table, child); + child = table; + } + vm->mmu.pgd = table; + vm->mmu.pgd_created = true; +} + +static int virt_pte_none(u64 *ptep, int level) +{ + return *ptep == invalid_pgtable[level]; +} + +static u64 *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc) +{ + int level; + u64 *ptep; + gpa_t child; + + if (!vm->mmu.pgd_created) + goto unmapped_gva; + + child = vm->mmu.pgd; + level = vm->mmu.pgtable_levels - 1; + while (level > 0) { + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + if (virt_pte_none(ptep, level)) { + if (alloc) { + child = vm_alloc_page_table(vm); + virt_set_pgtable(vm, child, invalid_pgtable[level - 1]); + WRITE_ONCE(*ptep, child); + } else + goto unmapped_gva; + + } else + child = pte_addr(vm, *ptep); + level--; + } + + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +{ + u64 *ptep; + + ptep = virt_populate_pte(vm, gva, 0); + TEST_ASSERT(*ptep != 0, "Virtual address gva: 0x%lx not mapped\n", gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) +{ + u32 prot_bits; + u64 *ptep; + + TEST_ASSERT((gva % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + "gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + "gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + "gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + + ptep = virt_populate_pte(vm, gva, 1); + prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; + WRITE_ONCE(*ptep, gpa | prot_bits); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) +{ + u64 pte, *ptep; + static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (virt_pte_none(ptep, level)) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", + indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--); + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) +{ + int level; + + if (!vm->mmu.pgd_created) + return; + + level = vm->mmu.pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->mmu.pgd, level); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) +{ +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)", + uc.args[0], uc.args[1], uc.args[2]); +} + +void route_exception(struct ex_regs *regs) +{ + int vector; + unsigned long pc, estat, badv; + struct handlers *handlers; + + handlers = (struct handlers *)exception_handlers; + vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + if (handlers && handlers->exception_handlers[vector]) + return handlers->exception_handlers[vector](regs); + + pc = regs->pc; + badv = regs->badv; + estat = regs->estat; + ucall(UCALL_UNHANDLED, 3, pc, estat, badv); + while (1) ; +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + void *addr; + + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), + LOONGARCH_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + + addr = addr_gva2hva(vm, vm->handlers); + memset(addr, 0, vm->page_size); + exception_handlers = vm->handlers; + sync_global_to_guest(vm, exception_handlers); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector] = handler; +} + +u32 guest_get_vcpuid(void) +{ + return csr_read(LOONGARCH_CSR_CPUID); +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + int i; + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + "num: %u\n", num); + + vcpu_regs_get(vcpu, ®s); + + va_start(ap, num); + for (i = 0; i < num; i++) + regs.gpr[i + 4] = va_arg(ap, u64); + va_end(ap); + + vcpu_regs_set(vcpu, ®s); +} + +static void loongarch_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) +{ + __vcpu_set_reg(vcpu, id, val); +} + +static void loongarch_set_cpucfg(struct kvm_vcpu *vcpu, u64 id, u64 val) +{ + u64 cfgid; + + cfgid = KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, cfgid, val); +} + +static void loongarch_get_csr(struct kvm_vcpu *vcpu, u64 id, void *addr) +{ + u64 csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_get_reg(vcpu, csrid, addr); +} + +static void loongarch_set_csr(struct kvm_vcpu *vcpu, u64 id, u64 val) +{ + u64 csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, csrid, val); +} + +void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int width; + unsigned int cfg; + unsigned long val; + struct kvm_vm *vm = vcpu->vm; + + switch (vm->mode) { + case VM_MODE_P36V47_16K: + case VM_MODE_P47V47_16K: + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + cfg = read_cpucfg(LOONGARCH_CPUCFG6); + loongarch_set_cpucfg(vcpu, LOONGARCH_CPUCFG6, cfg); + + /* kernel mode and page enable mode */ + val = PLV_KERN | CSR_CRMD_PG; + loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1); + + /* time count start from 0 */ + val = 0; + loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val); + + width = vm->page_shift - 3; + + switch (vm->mmu.pgtable_levels) { + case 4: + /* pud page shift and width */ + val = (vm->page_shift + width * 2) << 20 | (width << 25); + /* fall throuth */ + case 3: + /* pmd page shift and width */ + val |= (vm->page_shift + width) << 10 | (width << 15); + /* pte page shift and width */ + val |= vm->page_shift | width << 5; + break; + default: + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels); + } + + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); + + /* PGD page shift and width */ + val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6; + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd); + + /* + * Refill exception runs on real mode + * Entry address should be physical address + */ + val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val); + + /* + * General exception runs on page-enabled mode + * Entry address should be virtual address + */ + val = (unsigned long)handle_exception; + loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val); + val &= ~CSR_TLBIDX_SIZEM; + val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); + + /* LOONGARCH_CSR_KS1 is used for exception stack */ + val = __vm_alloc(vm, vm->page_size, LOONGARCH_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + TEST_ASSERT(val != 0, "No memory for exception stack"); + val = val + vm->page_size; + loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val); + val &= ~CSR_TLBREHI_PS; + val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) +{ + size_t stack_size; + u64 stack_gva; + struct kvm_regs regs; + struct kvm_vcpu *vcpu; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size; + stack_gva = __vm_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_gva != 0, "No memory for vm stack"); + + loongarch_vcpu_setup(vcpu); + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.gpr[3] = stack_gva + stack_size; + vcpu_regs_set(vcpu, ®s); + + return vcpu; +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + /* Setup guest PC register */ + vcpu_regs_get(vcpu, ®s); + regs.pc = (u64)guest_code; + vcpu_regs_set(vcpu, ®s); +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c new file mode 100644 index 000000000000..cd49a3440ead --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + */ +#include "kvm_util.h" + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +gva_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) +{ + gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64), + "Unexpected ucall exit mmio address access"); + + return (void *)(*((u64 *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c new file mode 100644 index 000000000000..46a14fd63d9e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + */ + +#include <time.h> + +#include "lru_gen_util.h" + +/* + * Tracks state while we parse memcg lru_gen stats. The file we're parsing is + * structured like this (some extra whitespace elided): + * + * memcg (id) (path) + * node (id) + * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages) + */ +struct memcg_stats_parse_context { + bool consumed; /* Whether or not this line was consumed */ + /* Next parse handler to invoke */ + void (*next_handler)(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + int current_node_idx; /* Current index in nodes array */ + const char *name; /* The name of the memcg we're looking for */ +}; + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + +struct split_iterator { + char *str; + char *save; +}; + +static char *split_next(struct split_iterator *it) +{ + char *ret = strtok_r(it->str, " \t\n\r", &it->save); + + it->str = NULL; + return ret; +} + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *memcg_id = split_next(&it); + char *memcg_name = split_next(&it); + char *end; + + ctx->consumed = true; + + if (!prefix || strcmp("memcg", prefix)) + return; /* Not a memcg line (maybe empty), skip */ + + TEST_ASSERT(memcg_id && memcg_name, + "malformed memcg line; no memcg id or memcg_name"); + + if (strcmp(memcg_name + 1, ctx->name)) + return; /* Wrong memcg, skip */ + + /* Found it! */ + + stats->memcg_id = strtoul(memcg_id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id); + if (!stats->memcg_id) + return; /* Removed memcg? */ + + ctx->next_handler = memcg_stats_handle_in_memcg; +} + +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *id = split_next(&it); + long found_node_id; + char *end; + + ctx->consumed = true; + ctx->current_node_idx = -1; + + if (!prefix) + return; /* Skip empty lines */ + + if (!strcmp("memcg", prefix)) { + /* Memcg done, found next one; stop. */ + ctx->next_handler = NULL; + return; + } else if (strcmp("node", prefix)) + TEST_ASSERT(false, "found malformed line after 'memcg ...'," + "token: '%s'", prefix); + + /* At this point we know we have a node line. Parse the ID. */ + + TEST_ASSERT(id, "malformed node line; no node id"); + + found_node_id = strtol(id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed node id '%s'", id); + + ctx->current_node_idx = stats->nr_nodes++; + TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES, + "memcg has stats for too many nodes, max is %d", + MAX_NR_NODES); + stats->nodes[ctx->current_node_idx].node = found_node_id; + + ctx->next_handler = memcg_stats_handle_in_node; +} + +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + char *my_line = strdup(line); + struct split_iterator it = { .str = my_line }; + char *gen, *age, *nr_anon, *nr_file; + struct node_stats *node_stats; + struct generation_stats *gen_stats; + char *end; + + TEST_ASSERT(it.str, "failed to copy input line"); + + gen = split_next(&it); + + if (!gen) + goto out_consume; /* Skip empty lines */ + + if (!strcmp("memcg", gen) || !strcmp("node", gen)) { + /* + * Reached next memcg or node section. Don't consume, let the + * other handler deal with this. + */ + ctx->next_handler = memcg_stats_handle_in_memcg; + goto out; + } + + node_stats = &stats->nodes[ctx->current_node_idx]; + TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS, + "found too many generation lines; max is %d", + MAX_NR_GENS); + gen_stats = &node_stats->gens[node_stats->nr_gens++]; + + age = split_next(&it); + nr_anon = split_next(&it); + nr_file = split_next(&it); + + TEST_ASSERT(age && nr_anon && nr_file, + "malformed generation line; not enough tokens"); + + gen_stats->gen = (int)strtol(gen, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen); + + gen_stats->age_ms = strtol(age, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age); + + gen_stats->nr_anon = strtol(nr_anon, &end, 10); + TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'", + nr_anon); + + gen_stats->nr_file = strtol(nr_file, &end, 10); + TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file); + +out_consume: + ctx->consumed = true; +out: + free(my_line); +} + +static void print_memcg_stats(const struct memcg_stats *stats, const char *name) +{ + int node, gen; + + pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id); + for (node = 0; node < stats->nr_nodes; ++node) { + pr_debug("\tnode %d\n", stats->nodes[node].node); + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + const struct generation_stats *gstats = + &stats->nodes[node].gens[gen]; + + pr_debug("\t\tgen %d\tage_ms %ld" + "\tnr_anon %ld\tnr_file %ld\n", + gstats->gen, gstats->age_ms, gstats->nr_anon, + gstats->nr_file); + } + } +} + +/* Re-read lru_gen debugfs information for @memcg into @stats. */ +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg) +{ + FILE *f; + ssize_t read = 0; + char *line = NULL; + size_t bufsz; + struct memcg_stats_parse_context ctx = { + .next_handler = memcg_stats_handle_searching, + .name = memcg, + }; + + memset(stats, 0, sizeof(struct memcg_stats)); + + f = fopen(LRU_GEN_DEBUGFS, "r"); + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + + while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) { + ctx.consumed = false; + + do { + ctx.next_handler(stats, &ctx, line); + if (!ctx.next_handler) + break; + } while (!ctx.consumed); + } + + if (read < 0 && !feof(f)) + TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS); + + TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n" + "Did the memcg get created in the proper mount?", + memcg); + if (line) + free(line); + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); + + print_memcg_stats(stats, memcg); +} + +/* + * Find all pages tracked by lru_gen for this memcg in generation @target_gen. + * + * If @target_gen is negative, look for all generations. + */ +long lru_gen_sum_memcg_stats_for_gen(int target_gen, + const struct memcg_stats *stats) +{ + int node, gen; + long total_nr = 0; + + for (node = 0; node < stats->nr_nodes; ++node) { + const struct node_stats *node_stats = &stats->nodes[node]; + + for (gen = 0; gen < node_stats->nr_gens; ++gen) { + const struct generation_stats *gen_stats = + &node_stats->gens[gen]; + + if (target_gen >= 0 && gen_stats->gen != target_gen) + continue; + + total_nr += gen_stats->nr_anon + gen_stats->nr_file; + } + } + + return total_nr; +} + +/* Find all pages tracked by lru_gen for this memcg. */ +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats) +{ + return lru_gen_sum_memcg_stats_for_gen(-1, stats); +} + +/* + * If lru_gen aging should force page table scanning. + * + * If you want to set this to false, you will need to do eviction + * before doing extra aging passes. + */ +static const bool force_scan = true; + +static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen) +{ + FILE *f = fopen(LRU_GEN_DEBUGFS, "w"); + char *command; + size_t sz; + + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + sz = asprintf(&command, "+ %lu %d %d 1 %d\n", + memcg_id, node_id, max_gen, force_scan); + TEST_ASSERT(sz > 0, "creating aging command failed"); + + pr_debug("Running aging command: %s", command); + if (fwrite(command, sizeof(char), sz, f) < sz) { + TEST_ASSERT(false, "writing aging command %s to %s failed", + command, LRU_GEN_DEBUGFS); + } + + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); +} + +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg) +{ + int node, gen; + + pr_debug("lru_gen: invoking aging...\n"); + + /* Must read memcg stats to construct the proper aging command. */ + lru_gen_read_memcg_stats(stats, memcg); + + for (node = 0; node < stats->nr_nodes; ++node) { + int max_gen = 0; + + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + int this_gen = stats->nodes[node].gens[gen].gen; + + max_gen = max_gen > this_gen ? max_gen : this_gen; + } + + run_aging_impl(stats->memcg_id, stats->nodes[node].node, + max_gen); + } + + /* Re-read so callers get updated information */ + lru_gen_read_memcg_stats(stats, memcg); +} + +/* + * Find which generation contains at least @pages pages, assuming that + * such a generation exists. + */ +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long pages) +{ + int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1; + + for (node = 0; node < stats->nr_nodes; ++node) + for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens; + ++gen_idx) { + gen = stats->nodes[node].gens[gen_idx].gen; + max_gen = gen > max_gen ? gen : max_gen; + min_gen = gen < min_gen ? gen : min_gen; + } + + for (gen = min_gen; gen <= max_gen; ++gen) + /* See if this generation has enough pages. */ + if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages) + return gen; + + return -1; +} + +bool lru_gen_usable(void) +{ + long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK; + int lru_gen_fd, lru_gen_debug_fd; + char mglru_feature_str[8] = {}; + long mglru_features; + + lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY); + if (lru_gen_fd < 0) { + puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH); + return false; + } + if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) { + puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH); + close(lru_gen_fd); + return false; + } + close(lru_gen_fd); + + mglru_features = strtol(mglru_feature_str, NULL, 16); + if ((mglru_features & required_features) != required_features) { + printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n", + mglru_features, required_features); + printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n", + required_features); + return false; + } + + lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR); + __TEST_REQUIRE(lru_gen_debug_fd >= 0, + "lru_gen: Could not open " LRU_GEN_DEBUGFS ", " + "but lru_gen is enabled, so cannot use page_idle."); + close(lru_gen_debug_fd); + return true; +} diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 313277486a1d..6dcd15910a06 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -16,7 +16,7 @@ struct memstress_args memstress_args; * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; struct vcpu_thread { /* The index of the vCPU. */ @@ -44,15 +44,15 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void memstress_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(u32 vcpu_idx) { struct memstress_args *args = &memstress_args; struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; struct guest_random_state rand_state; - uint64_t gva; - uint64_t pages; - uint64_t addr; - uint64_t page; + gva_t gva; + u64 pages; + u64 addr; + u64 page; int i; rand_state = new_guest_random_state(guest_random_seed + vcpu_idx); @@ -76,9 +76,9 @@ void memstress_guest_code(uint32_t vcpu_idx) addr = gva + (page * args->guest_page_size); if (__guest_random_bool(&rand_state, args->write_percent)) - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; else - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); } GUEST_SYNC(1); @@ -87,7 +87,7 @@ void memstress_guest_code(uint32_t vcpu_idx) void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], - uint64_t vcpu_memory_bytes, + u64 vcpu_memory_bytes, bool partition_vcpu_memory_access) { struct memstress_args *args = &memstress_args; @@ -122,15 +122,15 @@ void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, } struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, + u64 vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { struct memstress_args *args = &memstress_args; struct kvm_vm *vm; - uint64_t guest_num_pages, slot0_pages = 0; - uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); - uint64_t region_end_gfn; + u64 guest_num_pages, slot0_pages = 0; + u64 backing_src_pagesz = get_backing_src_pagesz(backing_src); + u64 region_end_gfn; int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); @@ -196,18 +196,14 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; args->gpa = align_down(args->gpa, backing_src_pagesz); -#ifdef __s390x__ - /* Align to 1M (segment size) */ - args->gpa = align_down(args->gpa, 1 << 20); -#endif args->size = guest_num_pages * args->guest_page_size; pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", args->gpa, args->gpa + args->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { - uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; + u64 region_pages = guest_num_pages / slots; + gpa_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, MEMSTRESS_MEM_SLOT_INDEX + i, @@ -236,7 +232,7 @@ void memstress_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) +void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent) { memstress_args.write_percent = write_percent; sync_global_to_guest(vm, memstress_args.write_percent); @@ -248,7 +244,7 @@ void memstress_set_random_access(struct kvm_vm *vm, bool random_access) sync_global_to_guest(vm, memstress_args.random_access); } -uint64_t __weak memstress_nested_pages(int nr_vcpus) +u64 __weak memstress_nested_pages(int nr_vcpus) { return 0; } @@ -265,7 +261,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -353,7 +349,7 @@ void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int sl } void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], - int slots, uint64_t pages_per_slot) + int slots, u64 pages_per_slot) { int i; @@ -364,7 +360,7 @@ void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], } } -unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot) +unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot) { unsigned long **bitmaps; int i; diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S index aa0abd3f35bb..b787b982e922 100644 --- a/tools/testing/selftests/kvm/lib/riscv/handlers.S +++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S @@ -10,85 +10,88 @@ #include <asm/csr.h> .macro save_context - addi sp, sp, (-8*34) - sd x1, 0(sp) - sd x2, 8(sp) - sd x3, 16(sp) - sd x4, 24(sp) - sd x5, 32(sp) - sd x6, 40(sp) - sd x7, 48(sp) - sd x8, 56(sp) - sd x9, 64(sp) - sd x10, 72(sp) - sd x11, 80(sp) - sd x12, 88(sp) - sd x13, 96(sp) - sd x14, 104(sp) - sd x15, 112(sp) - sd x16, 120(sp) - sd x17, 128(sp) - sd x18, 136(sp) - sd x19, 144(sp) - sd x20, 152(sp) - sd x21, 160(sp) - sd x22, 168(sp) - sd x23, 176(sp) - sd x24, 184(sp) - sd x25, 192(sp) - sd x26, 200(sp) - sd x27, 208(sp) - sd x28, 216(sp) - sd x29, 224(sp) - sd x30, 232(sp) - sd x31, 240(sp) + addi sp, sp, (-8*36) + sd x1, 8(sp) + sd x2, 16(sp) + sd x3, 24(sp) + sd x4, 32(sp) + sd x5, 40(sp) + sd x6, 48(sp) + sd x7, 56(sp) + sd x8, 64(sp) + sd x9, 72(sp) + sd x10, 80(sp) + sd x11, 88(sp) + sd x12, 96(sp) + sd x13, 104(sp) + sd x14, 112(sp) + sd x15, 120(sp) + sd x16, 128(sp) + sd x17, 136(sp) + sd x18, 144(sp) + sd x19, 152(sp) + sd x20, 160(sp) + sd x21, 168(sp) + sd x22, 176(sp) + sd x23, 184(sp) + sd x24, 192(sp) + sd x25, 200(sp) + sd x26, 208(sp) + sd x27, 216(sp) + sd x28, 224(sp) + sd x29, 232(sp) + sd x30, 240(sp) + sd x31, 248(sp) csrr s0, CSR_SEPC csrr s1, CSR_SSTATUS - csrr s2, CSR_SCAUSE - sd s0, 248(sp) + csrr s2, CSR_STVAL + csrr s3, CSR_SCAUSE + sd s0, 0(sp) sd s1, 256(sp) sd s2, 264(sp) + sd s3, 272(sp) .endm .macro restore_context + ld s3, 272(sp) ld s2, 264(sp) ld s1, 256(sp) - ld s0, 248(sp) - csrw CSR_SCAUSE, s2 + ld s0, 0(sp) + csrw CSR_SCAUSE, s3 csrw CSR_SSTATUS, s1 csrw CSR_SEPC, s0 - ld x31, 240(sp) - ld x30, 232(sp) - ld x29, 224(sp) - ld x28, 216(sp) - ld x27, 208(sp) - ld x26, 200(sp) - ld x25, 192(sp) - ld x24, 184(sp) - ld x23, 176(sp) - ld x22, 168(sp) - ld x21, 160(sp) - ld x20, 152(sp) - ld x19, 144(sp) - ld x18, 136(sp) - ld x17, 128(sp) - ld x16, 120(sp) - ld x15, 112(sp) - ld x14, 104(sp) - ld x13, 96(sp) - ld x12, 88(sp) - ld x11, 80(sp) - ld x10, 72(sp) - ld x9, 64(sp) - ld x8, 56(sp) - ld x7, 48(sp) - ld x6, 40(sp) - ld x5, 32(sp) - ld x4, 24(sp) - ld x3, 16(sp) - ld x2, 8(sp) - ld x1, 0(sp) - addi sp, sp, (8*34) + ld x31, 248(sp) + ld x30, 240(sp) + ld x29, 232(sp) + ld x28, 224(sp) + ld x27, 216(sp) + ld x26, 208(sp) + ld x25, 200(sp) + ld x24, 192(sp) + ld x23, 184(sp) + ld x22, 176(sp) + ld x21, 168(sp) + ld x20, 160(sp) + ld x19, 152(sp) + ld x18, 144(sp) + ld x17, 136(sp) + ld x16, 128(sp) + ld x15, 120(sp) + ld x14, 112(sp) + ld x13, 104(sp) + ld x12, 96(sp) + ld x11, 88(sp) + ld x10, 80(sp) + ld x9, 72(sp) + ld x8, 64(sp) + ld x7, 56(sp) + ld x6, 48(sp) + ld x5, 40(sp) + ld x4, 32(sp) + ld x3, 24(sp) + ld x2, 16(sp) + ld x1, 8(sp) + addi sp, sp, (8*36) .endm .balign 4 diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 6ae47b3d6b25..ded5429f3448 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -8,15 +8,16 @@ #include <linux/compiler.h> #include <assert.h> +#include "guest_modes.h" #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; -bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext) { unsigned long value = 0; int ret; @@ -26,41 +27,36 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) return !ret && !!value; } -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) -{ - return (v + vm->page_size) & ~(vm->page_size - 1); -} - -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +static u64 pte_addr(struct kvm_vm *vm, u64 entry) { return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << PGTBL_PAGE_SIZE_SHIFT; } -static uint64_t ptrs_per_pte(struct kvm_vm *vm) +static u64 ptrs_per_pte(struct kvm_vm *vm) { - return PGTBL_PAGE_SIZE / sizeof(uint64_t); + return PGTBL_PAGE_SIZE / sizeof(u64); } -static uint64_t pte_index_mask[] = { +static u64 pte_index_mask[] = { PGTBL_L0_INDEX_MASK, PGTBL_L1_INDEX_MASK, PGTBL_L2_INDEX_MASK, PGTBL_L3_INDEX_MASK, }; -static uint32_t pte_index_shift[] = { +static u32 pte_index_shift[] = { PGTBL_L0_INDEX_SHIFT, PGTBL_L1_INDEX_SHIFT, PGTBL_L2_INDEX_SHIFT, PGTBL_L3_INDEX_SHIFT, }; -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +static u64 pte_index(struct kvm_vm *vm, gva_t gva, int level) { TEST_ASSERT(level > -1, "Negative page table level (%d) not possible", level); - TEST_ASSERT(level < vm->pgtable_levels, + TEST_ASSERT(level < vm->mmu.pgtable_levels, "Invalid page table level (%d)", level); return (gva & pte_index_mask[level]) >> pte_index_shift[level]; @@ -68,37 +64,36 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->mmu.pgd_created = true; } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - uint64_t *ptep, next_ppn; - int level = vm->pgtable_levels - 1; + u64 *ptep, next_ppn; + int level = vm->mmu.pgtable_levels - 1; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, + " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!*ptep) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | @@ -108,7 +103,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) while (level > -1) { ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + - pte_index(vm, vaddr, level) * 8; + pte_index(vm, gva, level) * 8; if (!*ptep && level > 0) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; @@ -118,20 +113,20 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) level--; } - paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; - *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | + gpa = gpa >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (gpa << PGTBL_PTE_ADDR_SHIFT) | PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep; - int level = vm->pgtable_levels - 1; + u64 *ptep; + int level = vm->mmu.pgtable_levels - 1; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!ptep) goto unmapped_gva; level--; @@ -152,12 +147,12 @@ unmapped_gva: exit(1); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, + u64 page, int level) { #ifdef DEBUG static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; - uint64_t pte, *ptep; + u64 pte, *ptep; if (level < 0) return; @@ -174,15 +169,16 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, #endif } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - int level = vm->pgtable_levels - 1; - uint64_t pgd, *ptep; + struct kvm_mmu *mmu = &vm->mmu; + int level = mmu->pgtable_levels - 1; + u64 pgd, *ptep; - if (!vm->pgd_created) + if (!mmu->pgd_created) return; - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { + for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; @@ -197,63 +193,82 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) { struct kvm_vm *vm = vcpu->vm; unsigned long satp; + unsigned long satp_mode; + unsigned long max_satp_mode; /* * The RISC-V Sv48 MMU mode supports 56-bit physical address * for 48-bit virtual address with 4KB last level page size. */ switch (vm->mode) { - case VM_MODE_P52V48_4K: - case VM_MODE_P48V48_4K: - case VM_MODE_P40V48_4K: + case VM_MODE_P56V57_4K: + case VM_MODE_P50V57_4K: + case VM_MODE_P41V57_4K: + satp_mode = SATP_MODE_57; + break; + case VM_MODE_P56V48_4K: + case VM_MODE_P50V48_4K: + case VM_MODE_P41V48_4K: + satp_mode = SATP_MODE_48; + break; + case VM_MODE_P56V39_4K: + case VM_MODE_P50V39_4K: + case VM_MODE_P41V39_4K: + satp_mode = SATP_MODE_39; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; - satp |= SATP_MODE_48; + max_satp_mode = vcpu_get_reg(vcpu, RISCV_CONFIG_REG(satp_mode)); + + if ((satp_mode >> SATP_MODE_SHIFT) > max_satp_mode) + TEST_FAIL("Unable to set satp mode 0x%lx, max mode 0x%lx\n", + satp_mode >> SATP_MODE_SHIFT, max_satp_mode); + + satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; + satp |= satp_mode; vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { struct kvm_riscv_core core; - vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); + core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); + core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); + core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); + core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); + core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); + core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); + core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); + core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); + core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); + core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); + core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); + core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); + core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); + core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); + core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); + core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); + core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); + core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); + core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); + core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); + core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); + core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); + core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); + core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); + core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); + core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); + core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); + core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); + core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); + core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); + core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); + core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); fprintf(stream, " MODE: 0x%lx\n", core.mode); @@ -295,20 +310,20 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { int r; size_t stack_size; - unsigned long stack_vaddr; + unsigned long stack_gva; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); @@ -328,7 +343,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); /* Setup stack pointer and program counter of guest */ - vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_gva + stack_size); /* Setup sscratch for guest_get_vcpuid() */ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id); @@ -342,7 +357,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; - uint64_t id = RISCV_CORE_REG(regs.a0); + u64 id = RISCV_CORE_REG(regs.a0); int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" @@ -377,7 +392,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) id = RISCV_CORE_REG(regs.a7); break; } - vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); + vcpu_set_reg(vcpu, id, va_arg(ap, u64)); } va_end(ap); @@ -402,7 +417,7 @@ struct handlers { exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; }; -void route_exception(struct ex_regs *regs) +void route_exception(struct pt_regs *regs) { struct handlers *handlers = (struct handlers *)exception_handlers; int vector = 0, ec; @@ -433,10 +448,10 @@ void vcpu_init_vector_tables(struct kvm_vcpu *vcpu) void vm_init_vector_tables(struct kvm_vm *vm) { - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size, + MEM_REGION_DATA); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler) @@ -454,7 +469,7 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle handlers->exception_handlers[1][0] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return csr_read(CSR_SSCRATCH); } @@ -515,3 +530,43 @@ unsigned long get_host_sbi_spec_version(void) return ret.value; } + +void kvm_selftest_arch_init(void) +{ + /* + * riscv64 doesn't have a true default mode, so start by detecting the + * supported vm mode. + */ + guest_modes_append_default(); +} + +unsigned long riscv64_get_satp_mode(void) +{ + int kvm_fd, vm_fd, vcpu_fd, err; + u64 val; + struct kvm_one_reg reg = { + .id = RISCV_CONFIG_REG(satp_mode), + .addr = (u64)&val, + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL); + TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd)); + + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd)); + + err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); + + return val; +} + +bool kvm_arch_has_default_irqchip(void) +{ + return kvm_check_cap(KVM_CAP_IRQCHIP); +} diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..f5480473f192 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c @@ -13,7 +13,7 @@ static void guest_code(void) { - uint64_t diag318_info = 0x12345678; + u64 diag318_info = 0x12345678; asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); } @@ -23,13 +23,13 @@ static void guest_code(void) * we create an ad-hoc VM here to handle the instruction then extract the * necessary data. It is up to the caller to decide what to do with that data. */ -static uint64_t diag318_handler(void) +static u64 diag318_handler(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; - uint64_t reg; - uint64_t diag318_info; + u64 reg; + u64 diag318_info; vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_run(vcpu); @@ -51,9 +51,9 @@ static uint64_t diag318_handler(void) return diag318_info; } -uint64_t get_diag318_info(void) +u64 get_diag318_info(void) { - static uint64_t diag318_info; + static u64 diag318_info; static bool printed_skip; /* diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c index d540812d911a..9a778054f07f 100644 --- a/tools/testing/selftests/kvm/lib/s390x/facility.c +++ b/tools/testing/selftests/kvm/lib/s390/facility.c @@ -10,5 +10,5 @@ #include "facility.h" -uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +u64 stfl_doublewords[NB_STFL_DOUBLEWORDS]; bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..a9adb3782b35 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -12,21 +12,21 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { - vm_paddr_t paddr; + gpa_t gpa; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, + gpa = vm_phy_pages_alloc(vm, PAGES_PER_REGION, KVM_GUEST_PAGE_TABLE_MIN_PADDR, vm->memslots[MEM_REGION_PT]); - memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); + memset(addr_gpa2hva(vm, gpa), 0xff, PAGES_PER_REGION * vm->page_size); - vm->pgd = paddr; - vm->pgd_created = true; + vm->mmu.pgd = gpa; + vm->mmu.pgd_created = true; } /* @@ -34,9 +34,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) * a page table (ri == 4). Returns a suitable region/segment table entry * which points to the freshly allocated pages. */ -static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) +static u64 virt_alloc_region(struct kvm_vm *vm, int ri) { - uint64_t taddr; + u64 taddr; taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); @@ -47,30 +47,28 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { int ri, idx; - uint64_t *entry; + u64 *entry; TEST_ASSERT((gva % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", - gva, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (gva >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - gva); + "Virtual address not on page boundary,\n" + " gva: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", + " gpa: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", gva, vm->max_gfn, vm->page_size); /* Walk through region and segment tables */ - entry = addr_gpa2hva(vm, vm->pgd); + entry = addr_gpa2hva(vm, vm->mmu.pgd); for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; if (entry[idx] & REGION_ENTRY_INVALID) @@ -86,15 +84,15 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) entry[idx] = gpa; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int ri, idx; - uint64_t *entry; + u64 *entry; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - entry = addr_gpa2hva(vm, vm->pgd); + entry = addr_gpa2hva(vm, vm->mmu.pgd); for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), @@ -111,10 +109,10 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return (entry[idx] & ~0xffful) + (gva & 0xffful); } -static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t ptea_start) +static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, u8 indent, + u64 ptea_start) { - uint64_t *pte, ptea; + u64 *pte, ptea; for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { pte = addr_gpa2hva(vm, ptea); @@ -125,10 +123,10 @@ static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t reg_tab_addr) +static void virt_dump_region(FILE *stream, struct kvm_vm *vm, u8 indent, + u64 reg_tab_addr) { - uint64_t addr, *entry; + u64 addr, *entry; for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { entry = addr_gpa2hva(vm, addr); @@ -147,12 +145,12 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - virt_dump_region(stream, vm, indent, vm->pgd); + virt_dump_region(stream, vm, indent, vm->mmu.pgd); } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -160,10 +158,10 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu->run->psw_addr = (uintptr_t)guest_code; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); - uint64_t stack_vaddr; + u64 stack_gva; struct kvm_regs regs; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; @@ -171,20 +169,19 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); /* Setup guest registers */ vcpu_regs_get(vcpu, ®s); - regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; + regs.gprs[15] = stack_gva + (DEFAULT_STACK_PGS * getpagesize()) - 160; vcpu_regs_set(vcpu, ®s); vcpu_sregs_get(vcpu, &sregs); sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ - sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ + sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */ vcpu_sregs_set(vcpu, &sregs); vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ @@ -206,13 +203,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) vcpu_regs_get(vcpu, ®s); for (i = 0; i < num; i++) - regs.gprs[i + 2] = va_arg(ap, uint64_t); + regs.gprs[i + 2] = va_arg(ap, u64); vcpu_regs_set(vcpu, ®s); va_end(ap); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr); @@ -221,3 +218,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c index cca98734653d..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390/ucall.c diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index cfed9d26cc71..4d845000de15 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -76,11 +76,11 @@ * the use of a binary-search tree, where each node contains at least * the following members: * - * typedef uint64_t sparsebit_idx_t; - * typedef uint64_t sparsebit_num_t; + * typedef u64 sparsebit_idx_t; + * typedef u64 sparsebit_num_t; * * sparsebit_idx_t idx; - * uint32_t mask; + * u32 mask; * sparsebit_num_t num_after; * * The idx member contains the bit index of the first bit described by this @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -162,7 +162,7 @@ #define DUMP_LINE_MAX 100 /* Does not include indent amount */ -typedef uint32_t mask_t; +typedef u32 mask_t; #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) struct node { @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ @@ -2056,9 +2056,9 @@ unsigned char get8(void) return ch; } -uint64_t get64(void) +u64 get64(void) { - uint64_t x; + u64 x; x = get8(); x = (x << 8) | get8(); @@ -2074,9 +2074,9 @@ int main(void) { s = sparsebit_alloc(); for (;;) { - uint8_t op = get8() & 0xf; - uint64_t first = get64(); - uint64_t last = get64(); + u8 op = get8() & 0xf; + u64 first = get64(); + u64 last = get64(); operate(op, first, last); } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8ed0b74ae837..bab1bd2b775b 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,20 +18,27 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. */ -struct guest_random_state new_guest_random_state(uint32_t seed) +struct guest_random_state new_guest_random_state(u32 seed) { struct guest_random_state s = {.seed = seed}; return s; } -uint32_t guest_random_u32(struct guest_random_state *state) +u32 guest_random_u32(struct guest_random_state *state) { - state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + state->seed = (u64)state->seed * 48271 % ((u32)(1 << 31) - 1); return state->seed; } @@ -76,12 +83,12 @@ size_t parse_size(const char *size) return base << shift; } -int64_t timespec_to_ns(struct timespec ts) +s64 timespec_to_ns(struct timespec ts) { - return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec; + return (s64)ts.tv_nsec + 1000000000LL * (s64)ts.tv_sec; } -struct timespec timespec_add_ns(struct timespec ts, int64_t ns) +struct timespec timespec_add_ns(struct timespec ts, s64 ns) { struct timespec res; @@ -94,15 +101,15 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns) struct timespec timespec_add(struct timespec ts1, struct timespec ts2) { - int64_t ns1 = timespec_to_ns(ts1); - int64_t ns2 = timespec_to_ns(ts2); + s64 ns1 = timespec_to_ns(ts1); + s64 ns2 = timespec_to_ns(ts2); return timespec_add_ns((struct timespec){0}, ns1 + ns2); } struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) { - int64_t ns1 = timespec_to_ns(ts1); - int64_t ns2 = timespec_to_ns(ts2); + s64 ns1 = timespec_to_ns(ts1); + s64 ns2 = timespec_to_ns(ts2); return timespec_add_ns((struct timespec){0}, ns1 - ns2); } @@ -116,7 +123,7 @@ struct timespec timespec_elapsed(struct timespec start) struct timespec timespec_div(struct timespec ts, int divisor) { - int64_t ns = timespec_to_ns(ts) / divisor; + s64 ns = timespec_to_ns(ts) / divisor; return timespec_add_ns((struct timespec){0}, ns); } @@ -132,37 +139,57 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -bool thp_configured(void) +static bool test_sysfs_path(const char *path) { - int ret; struct stat statbuf; + int ret; - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + ret = stat(path, &statbuf); TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "Error in stating /sys/kernel/mm/transparent_hugepage"); + "Error in stat()ing '%s'", path); return ret == 0; } -size_t get_trans_hugepagesz(void) +bool thp_configured(void) +{ + return test_sysfs_path("/sys/kernel/mm/transparent_hugepage"); +} + +static size_t get_sysfs_val(const char *path) { size_t size; FILE *f; int ret; - TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); - - f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); - TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + f = fopen(path, "r"); + TEST_ASSERT(f, "Error opening '%s'", path); ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret > 0, "Error reading '%s'", path); + + /* Re-scan the input stream to verify the entire file was read. */ ret = fscanf(f, "%ld", &size); - TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); - fclose(f); + TEST_ASSERT(ret < 1, "Error reading '%s'", path); + fclose(f); return size; } +size_t get_trans_hugepagesz(void) +{ + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); +} + +bool is_numa_balancing_enabled(void) +{ + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) + return false; + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; +} + size_t get_def_hugetlb_pagesz(void) { char buf[64]; @@ -198,7 +225,7 @@ size_t get_def_hugetlb_pagesz(void) #define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) #define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB) -const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i) { static const struct vm_mem_backing_src_alias aliases[] = { [VM_MEM_SRC_ANONYMOUS] = { @@ -290,9 +317,9 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) #define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK)) -size_t get_backing_src_pagesz(uint32_t i) +size_t get_backing_src_pagesz(u32 i) { - uint32_t flag = vm_mem_backing_src_alias(i)->flag; + u32 flag = vm_mem_backing_src_alias(i)->flag; switch (i) { case VM_MEM_SRC_ANONYMOUS: @@ -308,7 +335,7 @@ size_t get_backing_src_pagesz(uint32_t i) } } -bool is_backing_src_hugetlb(uint32_t i) +bool is_backing_src_hugetlb(u32 i) { return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); } diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 42151e571953..029ce21f9f2f 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -14,7 +14,7 @@ struct ucall_header { struct ucall ucalls[KVM_MAX_VCPUS]; }; -int ucall_nr_pages_required(uint64_t page_size) +int ucall_nr_pages_required(u64 page_size) { return align_up(sizeof(struct ucall_header), page_size) / page_size; } @@ -25,16 +25,16 @@ int ucall_nr_pages_required(uint64_t page_size) */ static struct ucall_header *ucall_pool; -void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa) { struct ucall_header *hdr; struct ucall *uc; - vm_vaddr_t vaddr; + gva_t gva; int i; - vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, - MEM_REGION_DATA); - hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + gva = vm_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, + MEM_REGION_DATA); + hdr = (struct ucall_header *)addr_gva2hva(vm, gva); memset(hdr, 0, sizeof(*hdr)); for (i = 0; i < KVM_MAX_VCPUS; ++i) { @@ -42,7 +42,7 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) uc->hva = uc; } - write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + write_guest_global(vm, ucall_pool, (struct ucall_header *)gva); ucall_arch_init(vm, mmio_gpa); } @@ -79,7 +79,7 @@ static void ucall_free(struct ucall *uc) clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); } -void ucall_assert(uint64_t cmd, const char *exp, const char *file, +void ucall_assert(u64 cmd, const char *exp, const char *file, unsigned int line, const char *fmt, ...) { struct ucall *uc; @@ -88,20 +88,20 @@ void ucall_assert(uint64_t cmd, const char *exp, const char *file, uc = ucall_alloc(); uc->cmd = cmd; - WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp)); - WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file)); + WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (u64)(exp)); + WRITE_ONCE(uc->args[GUEST_FILE], (u64)(file)); WRITE_ONCE(uc->args[GUEST_LINE], line); va_start(va, fmt); guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } -void ucall_fmt(uint64_t cmd, const char *fmt, ...) +void ucall_fmt(u64 cmd, const char *fmt, ...) { struct ucall *uc; va_list va; @@ -113,12 +113,12 @@ void ucall_fmt(uint64_t cmd, const char *fmt, ...) guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } -void ucall(uint64_t cmd, int nargs, ...) +void ucall(u64 cmd, int nargs, ...) { struct ucall *uc; va_list va; @@ -132,15 +132,15 @@ void ucall(uint64_t cmd, int nargs, ...) va_start(va, nargs); for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); + WRITE_ONCE(uc->args[i], va_arg(va, u64)); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct ucall ucall; void *addr; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 7c9de8414462..ef8d76f71f83 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -27,7 +27,7 @@ static void *uffd_handler_thread_fn(void *arg) { struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; int uffd = reader_args->uffd; - int64_t pages = 0; + s64 pages = 0; struct timespec start; struct timespec ts_diff; struct epoll_event evt; @@ -100,8 +100,8 @@ static void *uffd_handler_thread_fn(void *arg) } struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, - void *hva, uint64_t len, - uint64_t num_readers, + void *hva, u64 len, + u64 num_readers, uffd_handler_t handler) { struct uffd_desc *uffd_desc; @@ -109,12 +109,12 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, int uffd; struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + u64 expected_ioctls = ((u64)1) << _UFFDIO_COPY; int ret, i; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); @@ -132,7 +132,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, /* In order to get minor faults, prefault via the alias. */ if (is_minor) - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + expected_ioctls = ((u64)1) << _UFFDIO_CONTINUE; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); @@ -141,9 +141,9 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, uffdio_api.features = 0; TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, "ioctl UFFDIO_API failed: %" PRIu64, - (uint64_t)uffdio_api.api); + (u64)uffdio_api.api); - uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.start = (u64)hva; uffdio_register.range.len = len; uffdio_register.mode = uffd_mode; TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 89153a333e83..5182fd0d6a76 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c @@ -14,7 +14,7 @@ void apic_disable(void) void xapic_enable(void) { - uint64_t val = rdmsr(MSR_IA32_APICBASE); + u64 val = rdmsr(MSR_IA32_APICBASE); /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ if (val & MSR_IA32_APICBASE_EXTD) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S index 7629819734af..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86/handlers.S diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index 15bc8cd583aa..d200c5c26e2e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -76,23 +76,23 @@ bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) } struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva) + gva_t *p_hv_pages_gva) { - vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + gva_t hv_pages_gva = vm_alloc_page(vm); struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); /* Setup of a region of guest memory for the VP Assist page. */ - hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist = (void *)vm_alloc_page(vm); hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); /* Setup of a region of guest memory for the partition assist page. */ - hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist = (void *)vm_alloc_page(vm); hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); /* Setup of a region of guest memory for the enlightened VMCS. */ - hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs = (void *)vm_alloc_page(vm); hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); @@ -100,9 +100,9 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, return hv; } -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +int enable_vp_assist(u64 vp_assist_pa, void *vp_assist) { - uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index d61e623afc8c..61cf952cd2dc 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to memstress.c. + * x86-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -13,9 +13,10 @@ #include "kvm_util.h" #include "memstress.h" #include "processor.h" +#include "svm_util.h" #include "vmx.h" -void memstress_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(u64 vcpu_id) { memstress_guest_code(vcpu_id); vmcall(); @@ -29,9 +30,10 @@ __asm__( " ud2;" ); -static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) -{ #define L2_GUEST_STACK_SIZE 64 + +static void l1_vmx_code(struct vmx_pages *vmx, u64 vcpu_id) +{ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; unsigned long *rsp; @@ -45,11 +47,35 @@ static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); GUEST_DONE(); } -uint64_t memstress_nested_pages(int nr_vcpus) +static void l1_svm_code(struct svm_test_data *svm, u64 vcpu_id) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + unsigned long *rsp; + + + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; + *rsp = vcpu_id; + generic_svm_setup(svm, memstress_l2_guest_entry, rsp); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + + +static void memstress_l1_guest_code(void *data, u64 vcpu_id) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data, vcpu_id); + else + l1_svm_code(data, vcpu_id); +} + +u64 memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -59,46 +85,37 @@ uint64_t memstress_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +static void memstress_setup_ept_mappings(struct kvm_vm *vm) { - uint64_t start, end; - - prepare_eptp(vmx, vm, 0); + u64 start, end; /* * Identity map the first 4G and the test region with 1G pages so that * KVM can shadow the EPT12 with the maximum huge page size supported * by the backing source. */ - nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); + tdp_identity_map_1g(vm, 0, 0x100000000ULL); start = align_down(memstress_args.gpa, PG_SIZE_1G); end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); - nested_identity_map_1g(vmx, vm, start, end - start); + tdp_identity_map_1g(vm, start, end - start); } void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { - struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; - vm_vaddr_t vmx_gva; + gva_t nested_gva; int vcpu_id; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_cpu_has_ept()); + TEST_REQUIRE(kvm_cpu_has_tdp()); + vm_enable_tdp(vm); + memstress_setup_ept_mappings(vm); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - vmx = vcpu_alloc_vmx(vm, &vmx_gva); - - if (vcpu_id == 0) { - memstress_setup_ept(vmx, vm); - vmx0 = vmx; - } else { - /* Share the same EPT table across all vCPUs. */ - vmx->eptp = vmx0->eptp; - vmx->eptp_hva = vmx0->eptp_hva; - vmx->eptp_gpa = vmx0->eptp_gpa; - } + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); /* * Override the vCPU to run memstress_l1_guest_code() which will @@ -107,6 +124,6 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc vcpu_regs_get(vcpus[vcpu_id], ®s); regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); - vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); + vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id); } } diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c new file mode 100644 index 000000000000..0851b74b4e46 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Tencent, Inc. + */ + +#include <stdint.h> + +#include <linux/kernel.h> + +#include "kvm_util.h" +#include "processor.h" +#include "pmu.h" + +const u64 intel_pmu_arch_events[] = { + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + INTEL_ARCH_TOPDOWN_SLOTS, + INTEL_ARCH_TOPDOWN_BE_BOUND, + INTEL_ARCH_TOPDOWN_BAD_SPEC, + INTEL_ARCH_TOPDOWN_FE_BOUND, + INTEL_ARCH_TOPDOWN_RETIRING, + INTEL_ARCH_LBR_INSERTS, +}; +kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); + +const u64 amd_pmu_zen_events[] = { + AMD_ZEN_CORE_CYCLES, + AMD_ZEN_INSTRUCTIONS_RETIRED, + AMD_ZEN_BRANCHES_RETIRED, + AMD_ZEN_BRANCHES_MISPREDICTED, +}; +kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); + +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static u64 get_pmu_errata(void) +{ + if (!this_cpu_is_intel()) + return 0; + + if (this_cpu_family() != 0x6) + return 0; + + switch (this_cpu_model()) { + case 0xDD: /* Clearwater Forest */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT); + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) | + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT); + default: + return 0; + } +} + +u64 pmu_errata_mask; + +void kvm_init_pmu_errata(void) +{ + pmu_errata_mask = get_pmu_errata(); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 636b29ba8985..b51467d70f6e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2018, Google LLC. */ #include "linux/bitmap.h" #include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" +#include "smm.h" +#include "svm_util.h" #include "sev.h" +#include "vmx.h" #ifndef NUM_INTERRUPTS #define NUM_INTERRUPTS 256 @@ -19,13 +21,48 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -vm_vaddr_t exception_handlers; +gva_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; +bool host_cpu_is_hygon; +bool host_cpu_is_amd_compatible; bool is_forced_emulation_enabled; -uint64_t guest_tsc_khz; +u64 guest_tsc_khz; + +const char *ex_str(int vector) +{ + switch (vector) { +#define VEC_STR(v) case v##_VECTOR: return "#" #v + case DE_VECTOR: return "no exception"; + case KVM_MAGIC_DE_VECTOR: return "#DE"; + VEC_STR(DB); + VEC_STR(NMI); + VEC_STR(BP); + VEC_STR(OF); + VEC_STR(BR); + VEC_STR(UD); + VEC_STR(NM); + VEC_STR(DF); + VEC_STR(TS); + VEC_STR(NP); + VEC_STR(SS); + VEC_STR(GP); + VEC_STR(PF); + VEC_STR(MF); + VEC_STR(AC); + VEC_STR(MC); + VEC_STR(XM); + VEC_STR(VE); + VEC_STR(CP); + VEC_STR(HV); + VEC_STR(VC); + VEC_STR(SX); + default: return "#??"; +#undef VEC_STR + } +} -static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) +static void regs_dump(FILE *stream, struct kvm_regs *regs, u8 indent) { fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " "rcx: 0x%.16llx rdx: 0x%.16llx\n", @@ -49,7 +86,7 @@ static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) } static void segment_dump(FILE *stream, struct kvm_segment *segment, - uint8_t indent) + u8 indent) { fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " "selector: 0x%.4x type: 0x%.2x\n", @@ -66,7 +103,7 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment, } static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, - uint8_t indent) + u8 indent) { fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " "padding: 0x%.4x 0x%.4x 0x%.4x\n", @@ -74,7 +111,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, dtable->padding[0], dtable->padding[1], dtable->padding[2]); } -static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) +static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, u8 indent) { unsigned int i; @@ -124,47 +161,83 @@ bool kvm_is_tdp_enabled(void) return get_kvm_amd_param_bool("npt"); } +static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu, + struct pte_masks *pte_masks) +{ + /* If needed, create the top-level page table. */ + if (!mmu->pgd_created) { + mmu->pgd = vm_alloc_page_table(vm); + mmu->pgd_created = true; + mmu->arch.pte_masks = *pte_masks; + } + + TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5, + "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging", + mmu->pgtable_levels); +} + void virt_arch_pgd_alloc(struct kvm_vm *vm) { - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); + + struct pte_masks pte_masks = (struct pte_masks){ + .present = BIT_ULL(0), + .writable = BIT_ULL(1), + .user = BIT_ULL(2), + .accessed = BIT_ULL(5), + .dirty = BIT_ULL(6), + .huge = BIT_ULL(7), + .nx = BIT_ULL(63), + .executable = 0, + .c = vm->arch.c_bit, + .s = vm->arch.s_bit, + }; + + virt_mmu_init(vm, &vm->mmu, &pte_masks); +} + +void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, + struct pte_masks *pte_masks) +{ + TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized"); - /* If needed, create page map l4 table. */ - if (!vm->pgd_created) { - vm->pgd = vm_alloc_page_table(vm); - vm->pgd_created = true; - } + vm->stage2_mmu.pgtable_levels = pgtable_levels; + virt_mmu_init(vm, &vm->stage2_mmu, pte_masks); } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, - uint64_t vaddr, int level) +static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, + u64 *parent_pte, gva_t gva, int level) { - uint64_t pt_gpa = PTE_GET_PA(*parent_pte); - uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); - int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + u64 pt_gpa = PTE_GET_PA(*parent_pte); + u64 *page_table = addr_gpa2hva(vm, pt_gpa); + int index = (gva >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte), "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", - level + 1, vaddr); + level + 1, gva); return &page_table[index]; } -static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t *parent_pte, - uint64_t vaddr, - uint64_t paddr, - int current_level, - int target_level) +static u64 *virt_create_upper_pte(struct kvm_vm *vm, + struct kvm_mmu *mmu, + u64 *parent_pte, + gva_t gva, + gpa_t gpa, + int current_level, + int target_level) { - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); + u64 *pte = virt_get_pte(vm, mmu, parent_pte, gva, current_level); - paddr = vm_untag_gpa(vm, paddr); + gpa = vm_untag_gpa(vm, gpa); - if (!(*pte & PTE_PRESENT_MASK)) { - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; + if (!is_present_pte(mmu, pte)) { + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + PTE_ALWAYS_SET_MASK(mmu); if (current_level == target_level) - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); + *pte |= PTE_HUGE_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK); else *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; } else { @@ -174,81 +247,84 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx", - current_level, vaddr); - TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx", - current_level, vaddr); + "Cannot create hugepage at level: %u, gva: 0x%lx", + current_level, gva); + TEST_ASSERT(!is_huge_pte(mmu, pte), + "Cannot create page table at level: %u, gva: 0x%lx", + current_level, gva); } return pte; } -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, + gpa_t gpa, int level) { - const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + const u64 pg_size = PG_LEVEL_SIZE(level); + u64 *pte = &mmu->pgd; + int current_level; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); - TEST_ASSERT((vaddr % pg_size) == 0, + TEST_ASSERT((gva % pg_size) == 0, "Virtual address not aligned,\n" - "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % pg_size) == 0, + "gva: 0x%lx page size: 0x%lx", gva, pg_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % pg_size) == 0, "Physical address not aligned,\n" - " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + " gpa: 0x%lx page size: 0x%lx", gpa, pg_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, - "Unexpected bits in paddr: %lx", paddr); + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + TEST_ASSERT(vm_untag_gpa(vm, gpa) == gpa, + "Unexpected bits in gpa: %lx", gpa); + + TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu), + "X and NX bit masks cannot be used simultaneously"); /* * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; + for (current_level = mmu->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_create_upper_pte(vm, mmu, pte, gva, gpa, + current_level, level); + if (is_huge_pte(mmu, pte)) + return; + } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); - TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); + pte = virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K); + TEST_ASSERT(!is_present_pte(mmu, pte), + "PTE already present for 4k page at gva: 0x%lx", gva); + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + PTE_ALWAYS_SET_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final * leaf PTE needs manually set the C/S-bit. */ - if (vm_is_gpa_protected(vm, paddr)) - *pte |= vm->arch.c_bit; + if (vm_is_gpa_protected(vm, gpa)) + *pte |= PTE_C_BIT_MASK(mmu); else - *pte |= vm->arch.s_bit; + *pte |= PTE_S_BIT_MASK(mmu); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); + __virt_pg_map(vm, &vm->mmu, gva, gpa, PG_LEVEL_4K); } -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level) +void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa, + u64 nr_bytes, int level) { - uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t nr_pages = nr_bytes / pg_size; + u64 pg_size = PG_LEVEL_SIZE(level); + u64 nr_pages = nr_bytes / pg_size; int i; TEST_ASSERT(nr_bytes % pg_size == 0, @@ -256,16 +332,19 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, vaddr, paddr, level); + __virt_pg_map(vm, &vm->mmu, gva, gpa, level); + sparsebit_set_num(vm->vpages_mapped, gva >> vm->page_shift, + nr_bytes / PAGE_SIZE); - vaddr += pg_size; - paddr += pg_size; + gva += pg_size; + gpa += pg_size; } } -static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) +static bool vm_is_target_pte(struct kvm_mmu *mmu, u64 *pte, + int *level, int current_level) { - if (*pte & PTE_LARGE_MASK) { + if (is_huge_pte(mmu, pte)) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, "Unexpected hugepage at level %d", current_level); @@ -275,60 +354,65 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) return *level == current_level; } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level) +static u64 *__vm_get_page_table_entry(struct kvm_vm *vm, + struct kvm_mmu *mmu, + gva_t gva, + int *level) { - uint64_t *pml4e, *pdpe, *pde; + int va_width = 12 + (mmu->pgtable_levels) * 9; + u64 *pte = &mmu->pgd; + int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels, "Invalid PG_LEVEL_* '%d'", *level); - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - vaddr); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); /* - * Based on the mode check above there are 48 bits in the vaddr, so - * shift 16 to sign extend the last bit (bit-47), + * Check that the gva is a sign-extended va_width value. */ - TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), - "Canonical check failed. The virtual address is invalid."); - - pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; + TEST_ASSERT(gva == (((s64)gva << (64 - va_width) >> (64 - va_width))), + "Canonical check failed. The virtual address is invalid."); + + for (current_level = mmu->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_get_pte(vm, mmu, pte, gva, current_level); + if (vm_is_target_pte(mmu, pte, level, current_level)) + return pte; + } - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; + return virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K); +} - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; +u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa) +{ + int level = PG_LEVEL_4K; - return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level); } -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) +u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_4K; - return __vm_get_page_table_entry(vm, vaddr, &level); + return __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - uint64_t *pml4e, *pml4e_start; - uint64_t *pdpe, *pdpe_start; - uint64_t *pde, *pde_start; - uint64_t *pte, *pte_start; + struct kvm_mmu *mmu = &vm->mmu; + u64 *pml4e, *pml4e_start; + u64 *pdpe, *pdpe_start; + u64 *pde, *pde_start; + u64 *pte, *pte_start; - if (!vm->pgd_created) + if (!mmu->pgd_created) return; fprintf(stream, "%*s " @@ -336,47 +420,47 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*s index hvaddr gpaddr " "addr w exec dirty\n", indent, ""); - pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); - for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e_start = (u64 *)addr_gpa2hva(vm, mmu->pgd); + for (u16 n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; - if (!(*pml4e & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pml4e)) continue; fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " " %u\n", indent, "", pml4e - pml4e_start, pml4e, addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), - !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); + is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e)); pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); - for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + for (u16 n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; - if (!(*pdpe & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pdpe)) continue; fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " "%u %u\n", indent, "", pdpe - pdpe_start, pdpe, addr_hva2gpa(vm, pdpe), - PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), - !!(*pdpe & PTE_NX_MASK)); + PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe), + is_nx_pte(mmu, pdpe)); pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); - for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + for (u16 n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; - if (!(*pde & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pde)) continue; fprintf(stream, "%*spde 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u\n", indent, "", pde - pde_start, pde, addr_hva2gpa(vm, pde), - PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), - !!(*pde & PTE_NX_MASK)); + PTE_GET_PFN(*pde), is_writable_pte(mmu, pde), + is_nx_pte(mmu, pde)); pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); - for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + for (u16 n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; - if (!(*pte & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pte)) continue; fprintf(stream, "%*spte 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u " @@ -385,19 +469,83 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pte - pte_start, pte, addr_hva2gpa(vm, pte), PTE_GET_PFN(*pte), - !!(*pte & PTE_WRITABLE_MASK), - !!(*pte & PTE_NX_MASK), - !!(*pte & PTE_DIRTY_MASK), - ((uint64_t) n1 << 27) - | ((uint64_t) n2 << 18) - | ((uint64_t) n3 << 9) - | ((uint64_t) n4)); + is_writable_pte(mmu, pte), + is_nx_pte(mmu, pte), + is_dirty_pte(mmu, pte), + ((u64)n1 << 27) + | ((u64)n2 << 18) + | ((u64)n3 << 9) + | ((u64)n4)); } } } } } +void vm_enable_tdp(struct kvm_vm *vm) +{ + if (kvm_cpu_has(X86_FEATURE_VMX)) + vm_enable_ept(vm); + else + vm_enable_npt(vm); +} + +bool kvm_cpu_has_tdp(void) +{ + return kvm_cpu_has_ept() || kvm_cpu_has_npt(); +} + +void __tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size, int level) +{ + size_t page_size = PG_LEVEL_SIZE(level); + size_t npages = size / page_size; + + TEST_ASSERT(l2_gpa + size > l2_gpa, "L2 GPA overflow"); + TEST_ASSERT(gpa + size > gpa, "GPA overflow"); + + while (npages--) { + __virt_pg_map(vm, &vm->stage2_mmu, l2_gpa, gpa, level); + l2_gpa += page_size; + gpa += page_size; + } +} + +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size) +{ + __tdp_map(vm, l2_gpa, gpa, size, PG_LEVEL_4K); +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void tdp_identity_map_default_memslots(struct kvm_vm *vm) +{ + u32 s, memslot = 0; + sparsebit_idx_t i, last; + struct userspace_mem_region *region = memslot2region(vm, memslot); + + /* Only memslot 0 is mapped here, ensure it's the only one being used */ + for (s = 0; s < NR_MEM_REGIONS; s++) + TEST_ASSERT_EQ(vm->memslots[s], 0); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + tdp_map(vm, (u64)i << vm->page_shift, + (u64)i << vm->page_shift, 1 << vm->page_shift); + } +} + +/* Identity map a region with 1GiB Pages. */ +void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size) +{ + __tdp_map(vm, addr, addr, size, PG_LEVEL_1G); +} + /* * Set Unusable Segment * @@ -465,12 +613,12 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) segp->present = true; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_NONE; - uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); + u64 *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); - TEST_ASSERT(*pte & PTE_PRESENT_MASK, + TEST_ASSERT(is_present_pte(&vm->mmu, pte), "Leaf PTE not PRESENT for gva: 0x%08lx", gva); /* @@ -480,7 +628,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); } -static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) +static void kvm_seg_set_tss_64bit(gva_t base, struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); segp->base = base; @@ -494,7 +642,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; - TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); @@ -508,6 +657,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; if (kvm_cpu_has(X86_FEATURE_XSAVE)) sregs.cr4 |= X86_CR4_OSXSAVE; + if (vm->mmu.pgtable_levels == 5) + sregs.cr4 |= X86_CR4_LA57; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -517,7 +668,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) kvm_seg_set_kernel_data_64bit(&sregs.gs); kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); - sregs.cr3 = vm->pgd; + sregs.cr3 = vm->mmu.pgd; vcpu_sregs_set(vcpu, &sregs); } @@ -559,7 +710,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return false; if (regs->vector == DE_VECTOR) - return false; + regs->vector = KVM_MAGIC_DE_VECTOR; regs->rip = regs->r11; regs->r9 = regs->vector; @@ -590,16 +741,16 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) struct kvm_segment seg; int i; - vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.gdt = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; kvm_seg_set_kernel_code_64bit(&seg); kvm_seg_fill_gdt_64bit(vm, &seg); @@ -614,9 +765,9 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)) { - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + gva_t *handlers = (gva_t *)addr_gva2hva(vm, vm->handlers); - handlers[vector] = (vm_vaddr_t)handler; + handlers[vector] = (gva_t)handler; } void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) @@ -627,7 +778,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) REPORT_GUEST_ASSERT(uc); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) { int r; @@ -639,9 +790,12 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); + sync_global_to_guest(vm, host_cpu_is_hygon); + sync_global_to_guest(vm, host_cpu_is_amd_compatible); sync_global_to_guest(vm, is_forced_emulation_enabled); + sync_global_to_guest(vm, pmu_errata_mask); - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { struct kvm_sev_init init = { 0 }; vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); @@ -662,18 +816,17 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_regs_set(vcpu, ®s); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_mp_state mp_state; struct kvm_regs regs; - vm_vaddr_t stack_vaddr; + gva_t stack_gva; struct kvm_vcpu *vcpu; - stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); - stack_vaddr += DEFAULT_STACK_PGS * getpagesize(); + stack_gva += DEFAULT_STACK_PGS * getpagesize(); /* * Align stack to match calling sequence requirements in section "The @@ -684,9 +837,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * If this code is ever used to launch a vCPU with 32-bit entry point it * may need to subtract 4 bytes instead of 8 bytes. */ - TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE), - "__vm_vaddr_alloc() did not provide a page-aligned address"); - stack_vaddr -= 8; + TEST_ASSERT(IS_ALIGNED(stack_gva, PAGE_SIZE), + "__vm_alloc() did not provide a page-aligned address"); + stack_gva -= 8; vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -696,7 +849,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); regs.rflags = regs.rflags | 0x2; - regs.rsp = stack_vaddr; + regs.rsp = stack_gva; vcpu_regs_set(vcpu, ®s); /* Setup the MP state */ @@ -713,7 +866,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); @@ -748,9 +901,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return kvm_supported_cpuid; } -static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) +static u32 __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + u32 function, u32 index, + u8 reg, u8 lo, u8 hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -777,14 +930,14 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, feature.reg, feature.bit, feature.bit); } -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property) +u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) { return __kvm_cpu_has(cpuid, property.function, property.index, property.reg, property.lo_bit, property.hi_bit); } -uint64_t kvm_get_feature_msr(uint64_t msr_index) +u64 kvm_get_feature_msr(u64 msr_index) { struct { struct kvm_msrs header; @@ -803,7 +956,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) return buffer.entry.data; } -void __vm_xsave_require_permission(uint64_t xfeature, const char *name) +void __vm_xsave_require_permission(u64 xfeature, const char *name) { int kvm_fd; u64 bitmask; @@ -860,7 +1013,7 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, - uint32_t value) + u32 value) { struct kvm_cpuid_entry2 *entry; @@ -875,7 +1028,7 @@ void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value); } -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function) { struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); @@ -904,7 +1057,7 @@ void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_cpuid(vcpu); } -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) +u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index) { struct { struct kvm_msrs header; @@ -919,7 +1072,7 @@ uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) return buffer.entry.data; } -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) +int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value) { struct { struct kvm_msrs header; @@ -947,28 +1100,28 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) vcpu_regs_get(vcpu, ®s); if (num >= 1) - regs.rdi = va_arg(ap, uint64_t); + regs.rdi = va_arg(ap, u64); if (num >= 2) - regs.rsi = va_arg(ap, uint64_t); + regs.rsi = va_arg(ap, u64); if (num >= 3) - regs.rdx = va_arg(ap, uint64_t); + regs.rdx = va_arg(ap, u64); if (num >= 4) - regs.rcx = va_arg(ap, uint64_t); + regs.rcx = va_arg(ap, u64); if (num >= 5) - regs.r8 = va_arg(ap, uint64_t); + regs.r8 = va_arg(ap, u64); if (num >= 6) - regs.r9 = va_arg(ap, uint64_t); + regs.r9 = va_arg(ap, u64); vcpu_regs_set(vcpu, ®s); va_end(ap); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { struct kvm_regs regs; struct kvm_sregs sregs; @@ -1037,7 +1190,7 @@ const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) return list; } -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) +bool kvm_msr_is_in_save_restore_list(u32 msr_index) { const struct kvm_msr_list *list = kvm_get_msr_index_list(); int i; @@ -1158,7 +1311,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; @@ -1168,7 +1321,7 @@ void kvm_init_vm_address_properties(struct kvm_vm *vm) } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index) + u32 function, u32 index) { int i; @@ -1185,7 +1338,7 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, #define X86_HYPERCALL(inputs...) \ ({ \ - uint64_t r; \ + u64 r; \ \ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \ "jnz 1f\n\t" \ @@ -1194,23 +1347,23 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, "1: vmmcall\n\t" \ "2:" \ : "=a"(r) \ - : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \ + : [use_vmmcall] "r" (host_cpu_is_amd_compatible), \ + inputs); \ \ r; \ }) -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3) +u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3) { return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); } -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +u64 __xen_hypercall(u64 nr, u64 a0, void *a1) { return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1)); } -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +void xen_hypercall(u64 nr, u64 a0, void *a1) { GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } @@ -1219,7 +1372,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr, guest_maxphyaddr; + u8 maxphyaddr, guest_maxphyaddr; /* * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR @@ -1234,8 +1387,8 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; - /* Avoid reserved HyperTransport region on AMD processors. */ - if (!host_cpu_is_amd) + /* Avoid reserved HyperTransport region on AMD or Hygon processors. */ + if (!host_cpu_is_amd_compatible) return max_gfn; /* On parts with <40 physical address bits, the area is fully hidden */ @@ -1249,7 +1402,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* * Otherwise it's at the top of the physical address space, possibly - * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use + * reduced due to SME or CSV by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) @@ -1266,21 +1419,15 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); + host_cpu_is_hygon = this_cpu_is_hygon(); + host_cpu_is_amd_compatible = host_cpu_is_amd || host_cpu_is_hygon; is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); + + kvm_init_pmu_errata(); } bool sys_clocksource_is_based_on_tsc(void) @@ -1293,3 +1440,32 @@ bool sys_clocksource_is_based_on_tsc(void) return ret; } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa, + const void *smi_handler, size_t handler_size) +{ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa, + SMRAM_MEMSLOT) == smram_gpa, + "Could not allocate guest physical addresses for SMRAM"); + + memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size); + vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa); +} + +void inject_smi(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + vcpu_events_set(vcpu, &events); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index e9535ee20b7f..93f916903461 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -14,35 +14,46 @@ * and find the first range, but that's correct because the condition * expression would cause us to quit the loop. */ -static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region) +static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region, + u8 page_type, bool private) { const struct sparsebit *protected_phy_pages = region->protected_phy_pages; - const vm_paddr_t gpa_base = region->region.guest_phys_addr; + const gpa_t gpa_base = region->region.guest_phys_addr; const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift; sparsebit_idx_t i, j; if (!sparsebit_any_set(protected_phy_pages)) return; - sev_register_encrypted_memory(vm, region); + if (!is_sev_snp_vm(vm)) + sev_register_encrypted_memory(vm, region); sparsebit_for_each_set_range(protected_phy_pages, i, j) { - const uint64_t size = (j - i + 1) * vm->page_size; - const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; + const u64 size = (j - i + 1) * vm->page_size; + const u64 offset = (i - lowest_page_in_region) * vm->page_size; + + if (private) + vm_mem_set_private(vm, gpa_base + offset, size); + + if (is_sev_snp_vm(vm)) + snp_launch_update_data(vm, gpa_base + offset, + (u64)addr_gpa2hva(vm, gpa_base + offset), + size, page_type); + else + sev_launch_update_data(vm, gpa_base + offset, size); - sev_launch_update_data(vm, gpa_base + offset, size); } } void sev_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } @@ -50,17 +61,25 @@ void sev_vm_init(struct kvm_vm *vm) void sev_es_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_ES_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) +void snp_vm_init(struct kvm_vm *vm) +{ + struct kvm_sev_init init = { 0 }; + + TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); +} + +void sev_vm_launch(struct kvm_vm *vm, u32 policy) { struct kvm_sev_launch_start launch_start = { .policy = policy, @@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) - encrypt_region(vm, region); + encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false); if (policy & SEV_POLICY_ES) vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); @@ -84,7 +103,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) vm->arch.is_pt_protected = true; } -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement) +void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement) { struct kvm_sev_launch_measure launch_measure; struct kvm_sev_guest_status guest_status; @@ -112,7 +131,34 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, +void snp_vm_launch_start(struct kvm_vm *vm, u64 policy) +{ + struct kvm_sev_snp_launch_start launch_start = { + .policy = policy, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start); +} + +void snp_vm_launch_update(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + int ctr; + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true); + + vm->arch.is_pt_protected = true; +} + +void snp_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_snp_launch_finish launch_finish = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish); +} + +struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, struct kvm_vcpu **cpu) { struct vm_shape shape = { @@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, return vm; } -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement) { + if (is_sev_snp_vm(vm)) { + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); + + snp_vm_launch_start(vm, policy); + + snp_vm_launch_update(vm); + + snp_vm_launch_finish(vm); + + return; + } + sev_vm_launch(vm, policy); if (!measurement) diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 5495a92dfd5a..3b01605ab016 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/svm.c * Helpers used for nested SVM testing * Largely inspired from KVM unit test svm.c * @@ -29,24 +28,27 @@ u64 rflags; * Pointer to structure with the addresses of the SVM areas. */ struct svm_test_data * -vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) +vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva) { - vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm); + gva_t svm_gva = vm_alloc_page(vm); struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); - svm->vmcb = (void *)vm_vaddr_alloc_page(vm); + svm->vmcb = (void *)vm_alloc_page(vm); svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); - svm->save_area = (void *)vm_vaddr_alloc_page(vm); + svm->save_area = (void *)vm_alloc_page(vm); svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); - svm->msr = (void *)vm_vaddr_alloc_page(vm); + svm->msr = (void *)vm_alloc_page(vm); svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr); svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); memset(svm->msr_hva, 0, getpagesize()); + if (vm->stage2_mmu.pgd_created) + svm->ncr3_gpa = vm->stage2_mmu.pgd; + *p_svm_gva = svm_gva; return svm; } @@ -60,17 +62,36 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, seg->base = base; } +void vm_enable_npt(struct kvm_vm *vm) +{ + struct pte_masks pte_masks; + + TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT"); + + /* + * NPTs use the same PTE format, but deliberately drop the C-bit as the + * per-VM shared vs. private information is only meant for stage-1. + */ + pte_masks = vm->mmu.arch.pte_masks; + pte_masks.c = 0; + + /* NPT walks are treated as user accesses, so set the 'user' bit. */ + pte_masks.always_set = pte_masks.user; + + tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks); +} + void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; - uint64_t vmcb_gpa = svm->vmcb_gpa; + u64 vmcb_gpa = svm->vmcb_gpa; struct vmcb_save_area *save = &vmcb->save; struct vmcb_control_area *ctrl = &vmcb->control; u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; - uint64_t efer; + u64 efer; efer = rdmsr(MSR_EFER); wrmsr(MSR_EFER, efer | EFER_SVME); @@ -103,6 +124,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r vmcb->save.rip = (u64)guest_rip; vmcb->save.rsp = (u64)guest_rsp; guest_regs.rdi = (u64)svm; + + if (svm->ncr3_gpa) { + ctrl->misc_ctl |= SVM_MISC_ENABLE_NP; + ctrl->nested_cr3 = svm->ncr3_gpa; + } } /* @@ -132,7 +158,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r * for now. registers involved in LOAD/SAVE_GPR_C are eventually * unmodified so they do not need to be in the clobber list. */ -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) +void run_guest(struct vmcb *vmcb, u64 vmcb_gpa) { asm volatile ( "vmload %[vmcb_gpa]\n\t" diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1265cecc7dd1..e7dd5791959b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c @@ -6,9 +6,9 @@ */ #include "kvm_util.h" -#define UCALL_PIO_PORT ((uint16_t)0x1000) +#define UCALL_PIO_PORT ((u16)0x1000) -void ucall_arch_do_ucall(vm_vaddr_t uc) +void ucall_arch_do_ucall(gva_t uc) { /* * FIXME: Revert this hack (the entire commit that added it) once nVMX diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index d7ac122820bf..67642759e4a0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/vmx.c - * * Copyright (C) 2018, Google LLC. */ @@ -12,41 +10,24 @@ #include "processor.h" #include "vmx.h" -#define PAGE_SHIFT_4K 12 - #define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 +#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */ +#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */ +#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */ + +#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT) +#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */ +#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT) + bool enable_evmcs; struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; -struct eptPageTableEntry { - uint64_t readable:1; - uint64_t writable:1; - uint64_t executable:1; - uint64_t memory_type:3; - uint64_t ignore_pat:1; - uint64_t page_size:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t ignored_11_10:2; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t suppress_ve:1; -}; - -struct eptPageTablePointer { - uint64_t memory_type:3; - uint64_t page_walk_length:3; - uint64_t ad_enabled:1; - uint64_t reserved_11_07:5; - uint64_t address:40; - uint64_t reserved_63_52:12; -}; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { - uint16_t evmcs_ver; + u16 evmcs_ver; vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, (unsigned long)&evmcs_ver); @@ -60,6 +41,32 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) return evmcs_ver; } +void vm_enable_ept(struct kvm_vm *vm) +{ + struct pte_masks pte_masks; + + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); + + /* + * EPTs do not have 'present' or 'user' bits, instead bit 0 is the + * 'readable' bit. + */ + pte_masks = (struct pte_masks) { + .present = 0, + .user = 0, + .readable = BIT_ULL(0), + .writable = BIT_ULL(1), + .executable = BIT_ULL(2), + .huge = BIT_ULL(7), + .accessed = BIT_ULL(8), + .dirty = BIT_ULL(9), + .nx = 0, + }; + + /* TODO: Add support for 5-level EPT. */ + tdp_mmu_init(vm, 4, &pte_masks); +} + /* Allocate memory regions for nested VMX tests. * * Input Args: @@ -72,51 +79,54 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) * Pointer to structure with the addresses of the VMX areas. */ struct vmx_pages * -vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) +vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva) { - vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); + gva_t vmx_gva = vm_alloc_page(vm); struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); /* Setup of a region of guest memory for the vmxon region. */ - vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); + vmx->vmxon = (void *)vm_alloc_page(vm); vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); /* Setup of a region of guest memory for a vmcs. */ - vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->vmcs = (void *)vm_alloc_page(vm); vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); /* Setup of a region of guest memory for the MSR bitmap. */ - vmx->msr = (void *)vm_vaddr_alloc_page(vm); + vmx->msr = (void *)vm_alloc_page(vm); vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); memset(vmx->msr_hva, 0, getpagesize()); /* Setup of a region of guest memory for the shadow VMCS. */ - vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->shadow_vmcs = (void *)vm_alloc_page(vm); vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ - vmx->vmread = (void *)vm_vaddr_alloc_page(vm); + vmx->vmread = (void *)vm_alloc_page(vm); vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); memset(vmx->vmread_hva, 0, getpagesize()); - vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); + vmx->vmwrite = (void *)vm_alloc_page(vm); vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); + if (vm->stage2_mmu.pgd_created) + vmx->eptp_gpa = vm->stage2_mmu.pgd; + *p_vmx_gva = vmx_gva; return vmx; } bool prepare_for_vmx_operation(struct vmx_pages *vmx) { - uint64_t feature_control; - uint64_t required; + u64 feature_control; + u64 required; unsigned long cr0; unsigned long cr4; @@ -150,7 +160,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); /* Enter VMX root operation. */ - *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + *(u32 *)(vmx->vmxon) = vmcs_revision(); if (vmxon(vmx->vmxon_gpa)) return false; @@ -160,7 +170,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + *(u32 *)(vmx->vmcs) = vmcs_revision(); if (vmclear(vmx->vmcs_gpa)) return false; @@ -168,14 +178,14 @@ bool load_vmcs(struct vmx_pages *vmx) return false; /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + *(u32 *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; if (vmclear(vmx->shadow_vmcs_gpa)) return false; return true; } -static bool ept_vpid_cap_supported(uint64_t mask) +static bool ept_vpid_cap_supported(u64 mask) { return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; } @@ -190,7 +200,7 @@ bool ept_1g_pages_supported(void) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { - uint32_t sec_exec_ctl = 0; + u32 sec_exec_ctl = 0; vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); @@ -198,16 +208,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); if (vmx->eptp_gpa) { - uint64_t ept_paddr; - struct eptPageTablePointer eptp = { - .memory_type = X86_MEMTYPE_WB, - .page_walk_length = 3, /* + 1 */ - .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), - .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, - }; - - memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); - vmwrite(EPT_POINTER, ept_paddr); + u64 eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; + + TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0, + "Illegal bits set in vmx->eptp_gpa"); + + if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS)) + eptp |= EPTP_AD_ENABLED; + + vmwrite(EPT_POINTER, eptp); sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; } @@ -250,7 +259,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) */ static inline void init_vmcs_host_state(void) { - uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + u32 exit_controls = vmreadz(VM_EXIT_CONTROLS); vmwrite(HOST_ES_SELECTOR, get_es()); vmwrite(HOST_CS_SELECTOR, get_cs()); @@ -349,8 +358,8 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp) vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); vmwrite(GUEST_DR7, 0x400); - vmwrite(GUEST_RSP, (uint64_t)rsp); - vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RSP, (u64)rsp); + vmwrite(GUEST_RIP, (u64)rip); vmwrite(GUEST_RFLAGS, 2); vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); @@ -364,169 +373,12 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -static void nested_create_pte(struct kvm_vm *vm, - struct eptPageTableEntry *pte, - uint64_t nested_paddr, - uint64_t paddr, - int current_level, - int target_level) -{ - if (!pte->readable) { - pte->writable = true; - pte->readable = true; - pte->executable = true; - pte->page_size = (current_level == target_level); - if (pte->page_size) - pte->address = paddr >> vm->page_shift; - else - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - } -} - - -void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, int target_level) -{ - const uint64_t page_size = PG_LEVEL_SIZE(target_level); - struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; - uint16_t index; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", - nested_paddr); - TEST_ASSERT((nested_paddr % page_size) == 0, - "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx page_size: 0x%lx", - nested_paddr, page_size); - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx page_size: 0x%lx", - paddr, page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - pte = &pt[index]; - - nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - - if (pte->page_size) - break; - - pt = addr_gpa2hva(vm, pte->address * vm->page_size); - } - - /* - * For now mark these as accessed and dirty because the only - * testcase we have needs that. Can be reconsidered later. - */ - pte->accessed = true; - pte->dirty = true; - -} - -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr) -{ - __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); -} - -/* - * Map a range of EPT guest physical addresses to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * nested_paddr - Nested guest physical address to map - * paddr - VM Physical Address - * size - The size of the range to map - * level - The level at which to map the range - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a nested guest translation for the - * page range starting at nested_paddr to the page range starting at paddr. - */ -void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - int level) -{ - size_t page_size = PG_LEVEL_SIZE(level); - size_t npages = size / page_size; - - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); - - while (npages--) { - __nested_pg_map(vmx, vm, nested_paddr, paddr, level); - nested_paddr += page_size; - paddr += page_size; - } -} - -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size) -{ - __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); -} - -/* Prepare an identity extended page table that maps all the - * physical pages in VM. - */ -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot) -{ - sparsebit_idx_t i, last; - struct userspace_mem_region *region = - memslot2region(vm, memslot); - - i = (region->region.guest_phys_addr >> vm->page_shift) - 1; - last = i + (region->region.memory_size >> vm->page_shift); - for (;;) { - i = sparsebit_next_clear(region->unused_phy_pages, i); - if (i > last) - break; - - nested_map(vmx, vm, - (uint64_t)i << vm->page_shift, - (uint64_t)i << vm->page_shift, - 1 << vm->page_shift); - } -} - -/* Identity map a region with 1GiB Pages. */ -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size) -{ - __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); -} - bool kvm_cpu_has_ept(void) { - uint64_t ctrl; + u64 ctrl; + + if (!kvm_cpu_has(X86_FEATURE_VMX)) + return false; ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) @@ -536,19 +388,9 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) -{ - TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); - - vmx->eptp = (void *)vm_vaddr_alloc_page(vm); - vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); - vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); -} - void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) { - vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); + vmx->apic_access = (void *)vm_alloc_page(vm); vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c deleted file mode 100644 index f31f0427c17c..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2023, Tencent, Inc. - */ - -#include <stdint.h> - -#include <linux/kernel.h> - -#include "kvm_util.h" -#include "pmu.h" - -const uint64_t intel_pmu_arch_events[] = { - INTEL_ARCH_CPU_CYCLES, - INTEL_ARCH_INSTRUCTIONS_RETIRED, - INTEL_ARCH_REFERENCE_CYCLES, - INTEL_ARCH_LLC_REFERENCES, - INTEL_ARCH_LLC_MISSES, - INTEL_ARCH_BRANCHES_RETIRED, - INTEL_ARCH_BRANCHES_MISPREDICTED, - INTEL_ARCH_TOPDOWN_SLOTS, -}; -kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); - -const uint64_t amd_pmu_zen_events[] = { - AMD_ZEN_CORE_CYCLES, - AMD_ZEN_INSTRUCTIONS_RETIRED, - AMD_ZEN_BRANCHES_RETIRED, - AMD_ZEN_BRANCHES_MISPREDICTED, -}; -kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); |
