summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-04-13 11:49:54 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2026-04-13 11:49:54 +0200
commite74c3a8891c05f88eeb87121de7e12dc95766a4a (patch)
tree3016a634244192f7d3e520ddeaf38cfc9d0470ec /include
parent05578316ca7246b45fb7d9eaf81308c0e0f3ee10 (diff)
parent94b4ae79ebb42a8a6f2124b4d4b033b15a98e4f9 (diff)
downloadlwn-e74c3a8891c05f88eeb87121de7e12dc95766a4a.tar.gz
lwn-e74c3a8891c05f88eeb87121de7e12dc95766a4a.zip
Merge tag 'kvmarm-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 7.1 * New features: - Add support for tracing in the standalone EL2 hypervisor code, which should help both debugging and performance analysis. This comes with a full infrastructure for 'remote' trace buffers that can be exposed by non-kernel entities such as firmware. - Add support for GICv5 Per Processor Interrupts (PPIs), as the starting point for supporting the new GIC architecture in KVM. - Finally add support for pKVM protected guests, with anonymous memory being used as a backing store. About time! * Improvements and bug fixes: - Rework the dreaded user_mem_abort() function to make it more maintainable, reducing the amount of state being exposed to the various helpers and rendering a substantial amount of state immutable. - Expand the Stage-2 page table dumper to support NV shadow page tables on a per-VM basis. - Tidy up the pKVM PSCI proxy code to be slightly less hard to follow. - Fix both SPE and TRBE in non-VHE configurations so that they do not generate spurious, out of context table walks that ultimately lead to very bad HW lockups. - A small set of patches fixing the Stage-2 MMU freeing in error cases. - Tighten-up accepted SMC immediate value to be only #0 for host SMCCC calls. - The usual cleanups and other selftest churn.
Diffstat (limited to 'include')
-rw-r--r--include/kvm/arm_arch_timer.h8
-rw-r--r--include/kvm/arm_pmu.h5
-rw-r--r--include/kvm/arm_vgic.h191
-rw-r--r--include/linux/irqchip/arm-gic-v5.h27
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/ring_buffer.h58
-rw-r--r--include/linux/ring_buffer_types.h41
-rw-r--r--include/linux/simple_ring_buffer.h65
-rw-r--r--include/linux/trace_remote.h48
-rw-r--r--include/linux/trace_remote_event.h33
-rw-r--r--include/trace/define_remote_events.h73
-rw-r--r--include/uapi/linux/kvm.h7
-rw-r--r--include/uapi/linux/trace_mmap.h8
13 files changed, 553 insertions, 12 deletions
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 7310841f4512..bf8cc9589bd0 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -10,6 +10,8 @@
#include <linux/clocksource.h>
#include <linux/hrtimer.h>
+#include <linux/irqchip/arm-gic-v5.h>
+
enum kvm_arch_timers {
TIMER_PTIMER,
TIMER_VTIMER,
@@ -47,7 +49,7 @@ struct arch_timer_vm_data {
u64 poffset;
/* The PPI for each timer, global to the VM */
- u8 ppi[NR_KVM_TIMERS];
+ u32 ppi[NR_KVM_TIMERS];
};
struct arch_timer_context {
@@ -130,6 +132,10 @@ void kvm_timer_init_vhe(void);
#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data))
#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])
+#define get_vgic_ppi(k, i) (((k)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V5) ? \
+ (i) : (FIELD_PREP(GICV5_HWIRQ_ID, i) | \
+ FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI)))
+
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
enum kvm_arch_timers tmr,
enum kvm_arch_timer_regs treg);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 96754b51b411..0a36a3d5c894 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -12,6 +12,9 @@
#define KVM_ARMV8_PMU_MAX_COUNTERS 32
+/* PPI #23 - architecturally specified for GICv5 */
+#define KVM_ARMV8_PMU_GICV5_IRQ 0x20000017
+
#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
struct kvm_pmc {
u8 idx; /* index into the pmu->pmc array */
@@ -38,7 +41,7 @@ struct arm_pmu_entry {
};
bool kvm_supports_guest_pmuv3(void);
-#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
+#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num != 0)
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f2eafc65bbf4..1388dc6028a9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -19,7 +19,9 @@
#include <linux/jump_label.h>
#include <linux/irqchip/arm-gic-v4.h>
+#include <linux/irqchip/arm-gic-v5.h>
+#define VGIC_V5_MAX_CPUS 512
#define VGIC_V3_MAX_CPUS 512
#define VGIC_V2_MAX_CPUS 8
#define VGIC_NR_IRQS_LEGACY 256
@@ -31,9 +33,96 @@
#define VGIC_MIN_LPI 8192
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
-#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
-#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \
- (irq) <= VGIC_MAX_SPI)
+/*
+ * GICv5 supports 128 PPIs, but only the first 64 are architected. We only
+ * support the timers and PMU in KVM, both of which are architected. Rather than
+ * handling twice the state, we instead opt to only support the architected set
+ * in KVM for now. At a future stage, this can be bumped up to 128, if required.
+ */
+#define VGIC_V5_NR_PRIVATE_IRQS 64
+
+#define is_v5_type(t, i) (FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t))
+
+#define __irq_is_sgi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = false; \
+ break; \
+ default: \
+ __ret = (i) < VGIC_NR_SGIS; \
+ } \
+ \
+ __ret; \
+ })
+
+#define __irq_is_ppi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = is_v5_type(GICV5_HWIRQ_TYPE_PPI, (i)); \
+ break; \
+ default: \
+ __ret = (i) >= VGIC_NR_SGIS; \
+ __ret &= (i) < VGIC_NR_PRIVATE_IRQS; \
+ } \
+ \
+ __ret; \
+ })
+
+#define __irq_is_spi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = is_v5_type(GICV5_HWIRQ_TYPE_SPI, (i)); \
+ break; \
+ default: \
+ __ret = (i) <= VGIC_MAX_SPI; \
+ __ret &= (i) >= VGIC_NR_PRIVATE_IRQS; \
+ } \
+ \
+ __ret; \
+ })
+
+#define __irq_is_lpi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = is_v5_type(GICV5_HWIRQ_TYPE_LPI, (i)); \
+ break; \
+ default: \
+ __ret = (i) >= 8192; \
+ } \
+ \
+ __ret; \
+ })
+
+#define irq_is_sgi(k, i) __irq_is_sgi((k)->arch.vgic.vgic_model, i)
+#define irq_is_ppi(k, i) __irq_is_ppi((k)->arch.vgic.vgic_model, i)
+#define irq_is_spi(k, i) __irq_is_spi((k)->arch.vgic.vgic_model, i)
+#define irq_is_lpi(k, i) __irq_is_lpi((k)->arch.vgic.vgic_model, i)
+
+#define irq_is_private(k, i) (irq_is_ppi(k, i) || irq_is_sgi(k, i))
+
+#define vgic_v5_get_hwirq_id(x) FIELD_GET(GICV5_HWIRQ_ID, (x))
+#define vgic_v5_set_hwirq_id(x) FIELD_PREP(GICV5_HWIRQ_ID, (x))
+
+#define __vgic_v5_set_type(t) (FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_##t))
+#define vgic_v5_make_ppi(x) (__vgic_v5_set_type(PPI) | vgic_v5_set_hwirq_id(x))
+#define vgic_v5_make_spi(x) (__vgic_v5_set_type(SPI) | vgic_v5_set_hwirq_id(x))
+#define vgic_v5_make_lpi(x) (__vgic_v5_set_type(LPI) | vgic_v5_set_hwirq_id(x))
+
+#define __vgic_is_v(k, v) ((k)->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V##v)
+#define vgic_is_v3(k) (__vgic_is_v(k, 3))
+#define vgic_is_v5(k) (__vgic_is_v(k, 5))
enum vgic_type {
VGIC_V2, /* Good ol' GICv2 */
@@ -101,6 +190,8 @@ enum vgic_irq_config {
VGIC_CONFIG_LEVEL
};
+struct vgic_irq;
+
/*
* Per-irq ops overriding some common behavious.
*
@@ -119,6 +210,19 @@ struct irq_ops {
* peaking into the physical GIC.
*/
bool (*get_input_level)(int vintid);
+
+ /*
+ * Function pointer to override the queuing of an IRQ.
+ */
+ bool (*queue_irq_unlock)(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags) __releases(&irq->irq_lock);
+
+ /*
+ * Callback function pointer to either enable or disable direct
+ * injection for a mapped interrupt.
+ */
+ void (*set_direct_injection)(struct kvm_vcpu *vcpu,
+ struct vgic_irq *irq, bool direct);
};
struct vgic_irq {
@@ -238,6 +342,26 @@ struct vgic_redist_region {
struct list_head list;
};
+struct vgic_v5_vm {
+ /*
+ * We only expose a subset of PPIs to the guest. This subset is a
+ * combination of the PPIs that are actually implemented and what we
+ * actually choose to expose.
+ */
+ DECLARE_BITMAP(vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+
+ /* A mask of the PPIs that are exposed for userspace to drive. */
+ DECLARE_BITMAP(userspace_ppis, VGIC_V5_NR_PRIVATE_IRQS);
+
+ /*
+ * The HMR itself is handled by the hardware, but we still need to have
+ * a mask that we can use when merging in pending state (only the state
+ * of Edge PPIs is merged back in from the guest an the HMR provides a
+ * convenient way to do that).
+ */
+ DECLARE_BITMAP(vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS);
+};
+
struct vgic_dist {
bool in_kernel;
bool ready;
@@ -310,6 +434,11 @@ struct vgic_dist {
* else.
*/
struct its_vm its_vm;
+
+ /*
+ * GICv5 per-VM data.
+ */
+ struct vgic_v5_vm gicv5_vm;
};
struct vgic_v2_cpu_if {
@@ -340,11 +469,40 @@ struct vgic_v3_cpu_if {
unsigned int used_lrs;
};
+struct vgic_v5_cpu_if {
+ u64 vgic_apr;
+ u64 vgic_vmcr;
+
+ /* PPI register state */
+ DECLARE_BITMAP(vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(vgic_ppi_activer, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(vgic_ppi_enabler, VGIC_V5_NR_PRIVATE_IRQS);
+ /* We have one byte (of which 5 bits are used) per PPI for priority */
+ u64 vgic_ppi_priorityr[VGIC_V5_NR_PRIVATE_IRQS / 8];
+
+ /*
+ * The ICSR is re-used across host and guest, and hence it needs to be
+ * saved/restored. Only one copy is required as the host should block
+ * preemption between executing GIC CDRCFG and acccessing the
+ * ICC_ICSR_EL1. A guest, of course, can never guarantee this, and hence
+ * it is the hyp's responsibility to keep the state constistent.
+ */
+ u64 vgic_icsr;
+
+ struct gicv5_vpe gicv5_vpe;
+};
+
+/* What PPI capabilities does a GICv5 host have */
+struct vgic_v5_ppi_caps {
+ DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+};
+
struct vgic_cpu {
/* CPU vif control registers for world switch */
union {
struct vgic_v2_cpu_if vgic_v2;
struct vgic_v3_cpu_if vgic_v3;
+ struct vgic_v5_cpu_if vgic_v5;
};
struct vgic_irq *private_irqs;
@@ -392,13 +550,17 @@ int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm);
+void kvm_vgic_finalize_idregs(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
unsigned int intid, bool level, void *owner);
+void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
+ struct irq_ops *ops);
+void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, struct irq_ops *ops);
+ u32 vintid);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
@@ -414,8 +576,20 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) ((k)->arch.vgic.initialized)
-#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
- ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
+#define vgic_valid_spi(k, i) \
+ ({ \
+ bool __ret = irq_is_spi(k, i); \
+ \
+ switch ((k)->arch.vgic.vgic_model) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret &= FIELD_GET(GICV5_HWIRQ_ID, i) < (k)->arch.vgic.nr_spis; \
+ break; \
+ default: \
+ __ret &= (i) < ((k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); \
+ } \
+ \
+ __ret; \
+ })
bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
@@ -455,6 +629,11 @@ int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
int vgic_v4_put(struct kvm_vcpu *vcpu);
+int vgic_v5_finalize_ppi_state(struct kvm *kvm);
+bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags);
+void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi);
+
bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
/* CPU HP callbacks */
diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
index b78488df6c98..40d2fce68294 100644
--- a/include/linux/irqchip/arm-gic-v5.h
+++ b/include/linux/irqchip/arm-gic-v5.h
@@ -25,6 +25,28 @@
#define GICV5_HWIRQ_TYPE_SPI UL(0x3)
/*
+ * Architected PPIs
+ */
+#define GICV5_ARCH_PPI_S_DB_PPI 0x0
+#define GICV5_ARCH_PPI_RL_DB_PPI 0x1
+#define GICV5_ARCH_PPI_NS_DB_PPI 0x2
+#define GICV5_ARCH_PPI_SW_PPI 0x3
+#define GICV5_ARCH_PPI_HACDBSIRQ 0xf
+#define GICV5_ARCH_PPI_CNTHVS 0x13
+#define GICV5_ARCH_PPI_CNTHPS 0x14
+#define GICV5_ARCH_PPI_PMBIRQ 0x15
+#define GICV5_ARCH_PPI_COMMIRQ 0x16
+#define GICV5_ARCH_PPI_PMUIRQ 0x17
+#define GICV5_ARCH_PPI_CTIIRQ 0x18
+#define GICV5_ARCH_PPI_GICMNT 0x19
+#define GICV5_ARCH_PPI_CNTHP 0x1a
+#define GICV5_ARCH_PPI_CNTV 0x1b
+#define GICV5_ARCH_PPI_CNTHV 0x1c
+#define GICV5_ARCH_PPI_CNTPS 0x1d
+#define GICV5_ARCH_PPI_CNTP 0x1e
+#define GICV5_ARCH_PPI_TRBIRQ 0x1f
+
+/*
* Tables attributes
*/
#define GICV5_NO_READ_ALLOC 0b0
@@ -365,6 +387,11 @@ int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type);
int gicv5_irs_iste_alloc(u32 lpi);
void gicv5_irs_syncr(void);
+/* Embedded in kvm.arch */
+struct gicv5_vpe {
+ bool resident;
+};
+
struct gicv5_its_devtab_cfg {
union {
struct {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6b76e7a6f4c2..779d9ed85cbf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2366,6 +2366,7 @@ void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v5_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d862fa610270..994f52b34344 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -251,4 +251,62 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
+
+struct ring_buffer_desc {
+ int cpu;
+ unsigned int nr_page_va; /* excludes the meta page */
+ unsigned long meta_va;
+ unsigned long page_va[] __counted_by(nr_page_va);
+};
+
+struct trace_buffer_desc {
+ int nr_cpus;
+ size_t struct_len;
+ char __data[]; /* list of ring_buffer_desc */
+};
+
+static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc)
+{
+ size_t len = struct_size(desc, page_va, desc->nr_page_va);
+
+ return (struct ring_buffer_desc *)((void *)desc + len);
+}
+
+static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc)
+{
+ return (struct ring_buffer_desc *)(&desc->__data[0]);
+}
+
+static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus)
+{
+ unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
+ struct ring_buffer_desc *rbdesc;
+
+ return size_add(offsetof(struct trace_buffer_desc, __data),
+ size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages)));
+}
+
+#define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc) \
+ for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0; \
+ (__cpu) < (__trace_pdesc)->nr_cpus; \
+ (__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc))
+
+struct ring_buffer_remote {
+ struct trace_buffer_desc *desc;
+ int (*swap_reader_page)(unsigned int cpu, void *priv);
+ int (*reset)(unsigned int cpu, void *priv);
+ void *priv;
+};
+
+int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu);
+
+struct trace_buffer *
+__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
+ struct lock_class_key *key);
+
+#define ring_buffer_alloc_remote(remote) \
+({ \
+ static struct lock_class_key __key; \
+ __ring_buffer_alloc_remote(remote, &__key); \
+})
#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/ring_buffer_types.h b/include/linux/ring_buffer_types.h
new file mode 100644
index 000000000000..54577021a49d
--- /dev/null
+++ b/include/linux/ring_buffer_types.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RING_BUFFER_TYPES_H
+#define _LINUX_RING_BUFFER_TYPES_H
+
+#include <asm/local.h>
+
+#define TS_SHIFT 27
+#define TS_MASK ((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST (~TS_MASK)
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline bool test_time_stamp(u64 delta)
+{
+ return !!(delta & TS_DELTA_TEST);
+}
+
+#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
+
+#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
+#define RB_ALIGNMENT 4U
+#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
+
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
+#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
+
+struct buffer_data_page {
+ u64 time_stamp; /* page time stamp */
+ local_t commit; /* write committed index */
+ unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
+};
+#endif
diff --git a/include/linux/simple_ring_buffer.h b/include/linux/simple_ring_buffer.h
new file mode 100644
index 000000000000..21aec556293e
--- /dev/null
+++ b/include/linux/simple_ring_buffer.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SIMPLE_RING_BUFFER_H
+#define _LINUX_SIMPLE_RING_BUFFER_H
+
+#include <linux/list.h>
+#include <linux/ring_buffer.h>
+#include <linux/ring_buffer_types.h>
+#include <linux/types.h>
+
+/*
+ * Ideally those struct would stay private but the caller needs to know
+ * the allocation size for simple_ring_buffer_init().
+ */
+struct simple_buffer_page {
+ struct list_head link;
+ struct buffer_data_page *page;
+ u64 entries;
+ u32 write;
+ u32 id;
+};
+
+struct simple_rb_per_cpu {
+ struct simple_buffer_page *tail_page;
+ struct simple_buffer_page *reader_page;
+ struct simple_buffer_page *head_page;
+ struct simple_buffer_page *bpages;
+ struct trace_buffer_meta *meta;
+ u32 nr_pages;
+
+#define SIMPLE_RB_UNAVAILABLE 0
+#define SIMPLE_RB_READY 1
+#define SIMPLE_RB_WRITING 2
+ u32 status;
+
+ u64 last_overrun;
+ u64 write_stamp;
+
+ struct simple_rb_cbs *cbs;
+};
+
+int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
+ const struct ring_buffer_desc *desc);
+
+void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer);
+
+void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
+ u64 timestamp);
+
+void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable);
+
+int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
+ struct simple_buffer_page *bpages,
+ const struct ring_buffer_desc *desc,
+ void *(*load_page)(unsigned long va),
+ void (*unload_page)(void *va));
+
+void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
+ void (*unload_page)(void *));
+#endif
diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h
new file mode 100644
index 000000000000..fcd1d46ea466
--- /dev/null
+++ b/include/linux/trace_remote.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TRACE_REMOTE_H
+#define _LINUX_TRACE_REMOTE_H
+
+#include <linux/dcache.h>
+#include <linux/ring_buffer.h>
+#include <linux/trace_remote_event.h>
+
+/**
+ * struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
+ * @init: Called once the remote has been registered. Allows the
+ * caller to extend the Tracefs remote directory
+ * @load_trace_buffer: Called before Tracefs accesses the trace buffer for the first
+ * time. Must return a &trace_buffer_desc
+ * (most likely filled with trace_remote_alloc_buffer())
+ * @unload_trace_buffer:
+ * Called once Tracefs has no use for the trace buffer
+ * (most likely call trace_remote_free_buffer())
+ * @enable_tracing: Called on Tracefs tracing_on. It is expected from the
+ * remote to allow writing.
+ * @swap_reader_page: Called when Tracefs consumes a new page from a
+ * ring-buffer. It is expected from the remote to isolate a
+ * @reset: Called on `echo 0 > trace`. It is expected from the
+ * remote to reset all ring-buffer pages.
+ * new reader-page from the @cpu ring-buffer.
+ * @enable_event: Called on events/event_name/enable. It is expected from
+ * the remote to allow the writing event @id.
+ */
+struct trace_remote_callbacks {
+ int (*init)(struct dentry *d, void *priv);
+ struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv);
+ void (*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
+ int (*enable_tracing)(bool enable, void *priv);
+ int (*swap_reader_page)(unsigned int cpu, void *priv);
+ int (*reset)(unsigned int cpu, void *priv);
+ int (*enable_event)(unsigned short id, bool enable, void *priv);
+};
+
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
+ struct remote_event *events, size_t nr_events);
+
+int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
+ const struct cpumask *cpumask);
+
+void trace_remote_free_buffer(struct trace_buffer_desc *desc);
+
+#endif
diff --git a/include/linux/trace_remote_event.h b/include/linux/trace_remote_event.h
new file mode 100644
index 000000000000..c8ae1e1f5e72
--- /dev/null
+++ b/include/linux/trace_remote_event.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TRACE_REMOTE_EVENTS_H
+#define _LINUX_TRACE_REMOTE_EVENTS_H
+
+struct trace_remote;
+struct trace_event_fields;
+struct trace_seq;
+
+struct remote_event_hdr {
+ unsigned short id;
+};
+
+#define REMOTE_EVENT_NAME_MAX 30
+struct remote_event {
+ char name[REMOTE_EVENT_NAME_MAX];
+ unsigned short id;
+ bool enabled;
+ struct trace_remote *remote;
+ struct trace_event_fields *fields;
+ char *print_fmt;
+ void (*print)(void *evt, struct trace_seq *seq);
+};
+
+#define RE_STRUCT(__args...) __args
+#define re_field(__type, __field) __type __field;
+
+#define REMOTE_EVENT_FORMAT(__name, __struct) \
+ struct remote_event_format_##__name { \
+ struct remote_event_hdr hdr; \
+ __struct \
+ }
+#endif
diff --git a/include/trace/define_remote_events.h b/include/trace/define_remote_events.h
new file mode 100644
index 000000000000..676e803dc144
--- /dev/null
+++ b/include/trace/define_remote_events.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/trace_events.h>
+#include <linux/trace_remote_event.h>
+#include <linux/trace_seq.h>
+#include <linux/stringify.h>
+
+#define REMOTE_EVENT_INCLUDE(__file) __stringify(../../__file)
+
+#ifdef REMOTE_EVENT_SECTION
+# define __REMOTE_EVENT_SECTION(__name) __used __section(REMOTE_EVENT_SECTION"."#__name)
+#else
+# define __REMOTE_EVENT_SECTION(__name)
+#endif
+
+#define REMOTE_PRINTK_COUNT_ARGS(__args...) \
+ __COUNT_ARGS(, ##__args, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0)
+
+#define __remote_printk0() \
+ trace_seq_putc(seq, '\n')
+
+#define __remote_printk1(__fmt) \
+ trace_seq_puts(seq, " " __fmt "\n") \
+
+#define __remote_printk2(__fmt, __args...) \
+do { \
+ trace_seq_putc(seq, ' '); \
+ trace_seq_printf(seq, __fmt, __args); \
+ trace_seq_putc(seq, '\n'); \
+} while (0)
+
+/* Apply the appropriate trace_seq sequence according to the number of arguments */
+#define remote_printk(__args...) \
+ CONCATENATE(__remote_printk, REMOTE_PRINTK_COUNT_ARGS(__args))(__args)
+
+#define RE_PRINTK(__args...) __args
+
+#define REMOTE_EVENT(__name, __id, __struct, __printk) \
+ REMOTE_EVENT_FORMAT(__name, __struct); \
+ static void remote_event_print_##__name(void *evt, struct trace_seq *seq) \
+ { \
+ struct remote_event_format_##__name __maybe_unused *__entry = evt; \
+ trace_seq_puts(seq, #__name); \
+ remote_printk(__printk); \
+ }
+#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
+
+#undef REMOTE_EVENT
+#undef RE_PRINTK
+#undef re_field
+#define re_field(__type, __field) \
+ { \
+ .type = #__type, .name = #__field, \
+ .size = sizeof(__type), .align = __alignof__(__type), \
+ .is_signed = is_signed_type(__type), \
+ },
+#define __entry REC
+#define RE_PRINTK(__fmt, __args...) "\"" __fmt "\", " __stringify(__args)
+#define REMOTE_EVENT(__name, __id, __struct, __printk) \
+ static struct trace_event_fields remote_event_fields_##__name[] = { \
+ __struct \
+ {} \
+ }; \
+ static char remote_event_print_fmt_##__name[] = __printk; \
+ static struct remote_event __REMOTE_EVENT_SECTION(__name) \
+ remote_event_##__name = { \
+ .name = #__name, \
+ .id = __id, \
+ .fields = remote_event_fields_##__name, \
+ .print_fmt = remote_event_print_fmt_##__name, \
+ .print = remote_event_print_##__name, \
+ }
+#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7fb4fea9d38e..1cc0c9463f71 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -704,6 +704,11 @@ struct kvm_enable_cap {
#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL
#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \
((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+
+#define KVM_VM_TYPE_ARM_PROTECTED (1UL << 31)
+#define KVM_VM_TYPE_ARM_MASK (KVM_VM_TYPE_ARM_IPA_SIZE_MASK | \
+ KVM_VM_TYPE_ARM_PROTECTED)
+
/*
* ioctls for /dev/kvm fds:
*/
@@ -1227,6 +1232,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
KVM_DEV_TYPE_LOONGARCH_DMSINTC,
#define KVM_DEV_TYPE_LOONGARCH_DMSINTC KVM_DEV_TYPE_LOONGARCH_DMSINTC
+ KVM_DEV_TYPE_ARM_VGIC_V5,
+#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5
KVM_DEV_TYPE_MAX,
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index c102ef35d11e..e8185889a1c8 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -17,8 +17,8 @@
* @entries: Number of entries in the ring-buffer.
* @overrun: Number of entries lost in the ring-buffer.
* @read: Number of entries that have been read.
- * @Reserved1: Internal use only.
- * @Reserved2: Internal use only.
+ * @pages_lost: Number of pages overwritten by the writer.
+ * @pages_touched: Number of pages written by the writer.
*/
struct trace_buffer_meta {
__u32 meta_page_size;
@@ -39,8 +39,8 @@ struct trace_buffer_meta {
__u64 overrun;
__u64 read;
- __u64 Reserved1;
- __u64 Reserved2;
+ __u64 pages_lost;
+ __u64 pages_touched;
};
#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20)