summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp_trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/hyp_trace.c')
-rw-r--r--arch/arm64/kvm/hyp_trace.c445
1 files changed, 445 insertions, 0 deletions
diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c
new file mode 100644
index 000000000000..c4b3ee552131
--- /dev/null
+++ b/arch/arm64/kvm/hyp_trace.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <linux/cpumask.h>
+#include <linux/trace_remote.h>
+#include <linux/tracefs.h>
+#include <linux/simple_ring_buffer.h>
+
+#include <asm/arch_timer.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_hyptrace.h>
+#include <asm/kvm_mmu.h>
+
+#include "hyp_trace.h"
+
+/* Same 10min used by clocksource when width is more than 32-bits */
+#define CLOCK_MAX_CONVERSION_S 600
+/*
+ * Time to give for the clock init. Long enough to get a good mult/shift
+ * estimation. Short enough to not delay the tracing start too much.
+ */
+#define CLOCK_INIT_MS 100
+/*
+ * Time between clock checks. Must be small enough to catch clock deviation when
+ * it is still tiny.
+ */
+#define CLOCK_UPDATE_MS 500
+
+static struct hyp_trace_clock {
+ u64 cycles;
+ u64 cyc_overflow64;
+ u64 boot;
+ u32 mult;
+ u32 shift;
+ struct delayed_work work;
+ struct completion ready;
+ struct mutex lock;
+ bool running;
+} hyp_clock;
+
+static void __hyp_clock_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct hyp_trace_clock *hyp_clock;
+ struct system_time_snapshot snap;
+ u64 rate, delta_cycles;
+ u64 boot, delta_boot;
+
+ hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
+
+ ktime_get_snapshot(&snap);
+ boot = ktime_to_ns(snap.boot);
+
+ delta_boot = boot - hyp_clock->boot;
+ delta_cycles = snap.cycles - hyp_clock->cycles;
+
+ /* Compare hyp clock with the kernel boot clock */
+ if (hyp_clock->mult) {
+ u64 err, cur = delta_cycles;
+
+ if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) {
+ __uint128_t tmp = (__uint128_t)cur * hyp_clock->mult;
+
+ cur = tmp >> hyp_clock->shift;
+ } else {
+ cur *= hyp_clock->mult;
+ cur >>= hyp_clock->shift;
+ }
+ cur += hyp_clock->boot;
+
+ err = abs_diff(cur, boot);
+ /* No deviation, only update epoch if necessary */
+ if (!err) {
+ if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1))
+ goto fast_forward;
+
+ goto resched;
+ }
+
+ /* Warn if the error is above tracing precision (1us) */
+ if (err > NSEC_PER_USEC)
+ pr_warn_ratelimited("hyp trace clock off by %lluus\n",
+ err / NSEC_PER_USEC);
+ }
+
+ rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
+
+ clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
+ rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
+
+ /* Add a comfortable 50% margin */
+ hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1;
+
+fast_forward:
+ hyp_clock->cycles = snap.cycles;
+ hyp_clock->boot = boot;
+ kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult,
+ hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
+ complete(&hyp_clock->ready);
+
+resched:
+ schedule_delayed_work(&hyp_clock->work,
+ msecs_to_jiffies(CLOCK_UPDATE_MS));
+}
+
+static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable)
+{
+ struct system_time_snapshot snap;
+
+ if (hyp_clock->running == enable)
+ return;
+
+ if (!enable) {
+ cancel_delayed_work_sync(&hyp_clock->work);
+ hyp_clock->running = false;
+ }
+
+ ktime_get_snapshot(&snap);
+
+ hyp_clock->boot = ktime_to_ns(snap.boot);
+ hyp_clock->cycles = snap.cycles;
+ hyp_clock->mult = 0;
+
+ init_completion(&hyp_clock->ready);
+ INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
+ schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
+ wait_for_completion(&hyp_clock->ready);
+ hyp_clock->running = true;
+}
+
+/* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
+static struct hyp_trace_buffer {
+ struct hyp_trace_desc *desc;
+ size_t desc_size;
+} trace_buffer;
+
+static int __map_hyp(void *start, size_t size)
+{
+ if (is_protected_kvm_enabled())
+ return 0;
+
+ return create_hyp_mappings(start, start + size, PAGE_HYP);
+}
+
+static int __share_page(unsigned long va)
+{
+ return kvm_share_hyp((void *)va, (void *)va + 1);
+}
+
+static void __unshare_page(unsigned long va)
+{
+ kvm_unshare_hyp((void *)va, (void *)va + 1);
+}
+
+static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
+{
+ int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
+ size_t backing_size;
+ void *start;
+
+ backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
+ num_possible_cpus());
+
+ start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
+ if (!start)
+ return -ENOMEM;
+
+ trace_buffer->desc->bpages_backing_start = (unsigned long)start;
+ trace_buffer->desc->bpages_backing_size = backing_size;
+
+ return __map_hyp(start, backing_size);
+}
+
+static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
+{
+ free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
+ trace_buffer->desc->bpages_backing_size);
+}
+
+static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu)
+{
+ struct ring_buffer_desc *rb_desc;
+ int cpu, p;
+
+ for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
+ if (cpu > last_cpu)
+ break;
+
+ __unshare_page(rb_desc->meta_va);
+ for (p = 0; p < rb_desc->nr_page_va; p++)
+ __unshare_page(rb_desc->page_va[p]);
+ }
+}
+
+static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer)
+{
+ struct ring_buffer_desc *rb_desc;
+ int cpu, p, ret = 0;
+
+ for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
+ ret = __share_page(rb_desc->meta_va);
+ if (ret)
+ break;
+
+ for (p = 0; p < rb_desc->nr_page_va; p++) {
+ ret = __share_page(rb_desc->page_va[p]);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ while (--p >= 0)
+ __unshare_page(rb_desc->page_va[p]);
+ __unshare_page(rb_desc->meta_va);
+ break;
+ }
+ }
+
+ if (ret)
+ hyp_trace_buffer_unshare_hyp(trace_buffer, --cpu);
+
+ return ret;
+}
+
+static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
+{
+ struct hyp_trace_buffer *trace_buffer = priv;
+ struct hyp_trace_desc *desc;
+ size_t desc_size;
+ int ret;
+
+ if (WARN_ON(trace_buffer->desc))
+ return ERR_PTR(-EINVAL);
+
+ desc_size = trace_buffer_desc_size(size, num_possible_cpus());
+ if (desc_size == SIZE_MAX)
+ return ERR_PTR(-E2BIG);
+
+ desc_size = PAGE_ALIGN(desc_size);
+ desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
+ if (!desc)
+ return ERR_PTR(-ENOMEM);
+
+ ret = __map_hyp(desc, desc_size);
+ if (ret)
+ goto err_free_desc;
+
+ trace_buffer->desc = desc;
+ trace_buffer->desc_size = desc_size;
+
+ ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size);
+ if (ret)
+ goto err_free_desc;
+
+ ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size,
+ cpu_possible_mask);
+ if (ret)
+ goto err_free_backing;
+
+ ret = hyp_trace_buffer_share_hyp(trace_buffer);
+ if (ret)
+ goto err_free_buffer;
+
+ ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size);
+ if (ret)
+ goto err_unload_pages;
+
+ return &desc->trace_buffer_desc;
+
+err_unload_pages:
+ hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
+
+err_free_buffer:
+ trace_remote_free_buffer(&desc->trace_buffer_desc);
+
+err_free_backing:
+ hyp_trace_buffer_free_bpages_backing(trace_buffer);
+
+err_free_desc:
+ free_pages_exact(desc, desc_size);
+ trace_buffer->desc = NULL;
+
+ return ERR_PTR(ret);
+}
+
+static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv)
+{
+ struct hyp_trace_buffer *trace_buffer = priv;
+
+ if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc))
+ return;
+
+ kvm_call_hyp_nvhe(__tracing_unload);
+ hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
+ trace_remote_free_buffer(desc);
+ hyp_trace_buffer_free_bpages_backing(trace_buffer);
+ free_pages_exact(trace_buffer->desc, trace_buffer->desc_size);
+ trace_buffer->desc = NULL;
+ trace_buffer->desc_size = 0;
+}
+
+static int hyp_trace_enable_tracing(bool enable, void *priv)
+{
+ hyp_trace_clock_enable(&hyp_clock, enable);
+
+ return kvm_call_hyp_nvhe(__tracing_enable, enable);
+}
+
+static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv)
+{
+ return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu);
+}
+
+static int hyp_trace_reset(unsigned int cpu, void *priv)
+{
+ return kvm_call_hyp_nvhe(__tracing_reset, cpu);
+}
+
+static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
+{
+ struct hyp_event_id *event_id = lm_alias(&__hyp_event_ids_start[id]);
+ struct page *page;
+ atomic_t *enabled;
+ void *map;
+
+ if (is_protected_kvm_enabled())
+ return kvm_call_hyp_nvhe(__tracing_enable_event, id, enable);
+
+ enabled = &event_id->enabled;
+ page = virt_to_page(enabled);
+ map = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
+ if (!map)
+ return -ENOMEM;
+
+ enabled = map + offset_in_page(enabled);
+ atomic_set(enabled, enable);
+
+ vunmap(map);
+
+ return 0;
+}
+
+static int hyp_trace_clock_show(struct seq_file *m, void *v)
+{
+ seq_puts(m, "[boot]\n");
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
+
+static ssize_t hyp_trace_write_event_write(struct file *f, const char __user *ubuf,
+ size_t cnt, loff_t *pos)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ kvm_call_hyp_nvhe(__tracing_write_event, val);
+
+ return cnt;
+}
+
+static const struct file_operations hyp_trace_write_event_fops = {
+ .write = hyp_trace_write_event_write,
+};
+
+static int hyp_trace_init_tracefs(struct dentry *d, void *priv)
+{
+ if (!tracefs_create_file("write_event", 0200, d, NULL, &hyp_trace_write_event_fops))
+ return -ENOMEM;
+
+ return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ?
+ 0 : -ENOMEM;
+}
+
+static struct trace_remote_callbacks trace_remote_callbacks = {
+ .init = hyp_trace_init_tracefs,
+ .load_trace_buffer = hyp_trace_load,
+ .unload_trace_buffer = hyp_trace_unload,
+ .enable_tracing = hyp_trace_enable_tracing,
+ .swap_reader_page = hyp_trace_swap_reader_page,
+ .reset = hyp_trace_reset,
+ .enable_event = hyp_trace_enable_event,
+};
+
+static const char *__hyp_enter_exit_reason_str(u8 reason);
+
+#include <asm/kvm_define_hypevents.h>
+
+static const char *__hyp_enter_exit_reason_str(u8 reason)
+{
+ static const char strs[][12] = {
+ "smc",
+ "hvc",
+ "psci",
+ "host_abort",
+ "guest_exit",
+ "eret_host",
+ "eret_guest",
+ "unknown",
+ };
+
+ return strs[min(reason, HYP_REASON_UNKNOWN)];
+}
+
+static void __init hyp_trace_init_events(void)
+{
+ struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
+ struct remote_event *event = __hyp_events_start;
+ int id = 0;
+
+ /* Events on both sides hypervisor are sorted */
+ for (; event < __hyp_events_end; event++, hyp_event_id++, id++)
+ event->id = hyp_event_id->id = id;
+}
+
+int __init kvm_hyp_trace_init(void)
+{
+ int cpu;
+
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ const struct arch_timer_erratum_workaround *wa =
+ per_cpu(timer_unstable_counter_workaround, cpu);
+
+ if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) &&
+ wa && wa->read_cntvct_el0) {
+ pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ hyp_trace_init_events();
+
+ return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
+ __hyp_events_start, __hyp_events_end - __hyp_events_start);
+}