summaryrefslogtreecommitdiff
path: root/kernel/trace/trace_remote.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace_remote.c')
-rw-r--r--kernel/trace/trace_remote.c1384
1 files changed, 1384 insertions, 0 deletions
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
new file mode 100644
index 000000000000..d6c3f94d67cd
--- /dev/null
+++ b/kernel/trace/trace_remote.c
@@ -0,0 +1,1384 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 - Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <linux/kstrtox.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/tracefs.h>
+#include <linux/trace_remote.h>
+#include <linux/trace_seq.h>
+#include <linux/types.h>
+
+#include "trace.h"
+
+#define TRACEFS_DIR "remotes"
+#define TRACEFS_MODE_WRITE 0640
+#define TRACEFS_MODE_READ 0440
+
+enum tri_type {
+ TRI_CONSUMING,
+ TRI_NONCONSUMING,
+};
+
+struct trace_remote_iterator {
+ struct trace_remote *remote;
+ struct trace_seq seq;
+ struct delayed_work poll_work;
+ unsigned long lost_events;
+ u64 ts;
+ struct ring_buffer_iter *rb_iter;
+ struct ring_buffer_iter **rb_iters;
+ struct remote_event_hdr *evt;
+ int cpu;
+ int evt_cpu;
+ loff_t pos;
+ enum tri_type type;
+};
+
+struct trace_remote {
+ struct trace_remote_callbacks *cbs;
+ void *priv;
+ struct trace_buffer *trace_buffer;
+ struct trace_buffer_desc *trace_buffer_desc;
+ struct dentry *dentry;
+ struct eventfs_inode *eventfs;
+ struct remote_event *events;
+ unsigned long nr_events;
+ unsigned long trace_buffer_size;
+ struct ring_buffer_remote rb_remote;
+ struct mutex lock;
+ struct rw_semaphore reader_lock;
+ struct rw_semaphore *pcpu_reader_locks;
+ unsigned int nr_readers;
+ unsigned int poll_ms;
+ bool tracing_on;
+};
+
+static bool trace_remote_loaded(struct trace_remote *remote)
+{
+ return !!remote->trace_buffer;
+}
+
+static int trace_remote_load(struct trace_remote *remote)
+{
+ struct ring_buffer_remote *rb_remote = &remote->rb_remote;
+ struct trace_buffer_desc *desc;
+
+ lockdep_assert_held(&remote->lock);
+
+ if (trace_remote_loaded(remote))
+ return 0;
+
+ desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
+ if (IS_ERR(desc))
+ return PTR_ERR(desc);
+
+ rb_remote->desc = desc;
+ rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
+ rb_remote->priv = remote->priv;
+ rb_remote->reset = remote->cbs->reset;
+ remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
+ if (!remote->trace_buffer) {
+ remote->cbs->unload_trace_buffer(desc, remote->priv);
+ return -ENOMEM;
+ }
+
+ remote->trace_buffer_desc = desc;
+
+ return 0;
+}
+
+static void trace_remote_try_unload(struct trace_remote *remote)
+{
+ lockdep_assert_held(&remote->lock);
+
+ if (!trace_remote_loaded(remote))
+ return;
+
+ /* The buffer is being read or writable */
+ if (remote->nr_readers || remote->tracing_on)
+ return;
+
+ /* The buffer has readable data */
+ if (!ring_buffer_empty(remote->trace_buffer))
+ return;
+
+ ring_buffer_free(remote->trace_buffer);
+ remote->trace_buffer = NULL;
+ remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
+}
+
+static int trace_remote_enable_tracing(struct trace_remote *remote)
+{
+ int ret;
+
+ lockdep_assert_held(&remote->lock);
+
+ if (remote->tracing_on)
+ return 0;
+
+ ret = trace_remote_load(remote);
+ if (ret)
+ return ret;
+
+ ret = remote->cbs->enable_tracing(true, remote->priv);
+ if (ret) {
+ trace_remote_try_unload(remote);
+ return ret;
+ }
+
+ remote->tracing_on = true;
+
+ return 0;
+}
+
+static int trace_remote_disable_tracing(struct trace_remote *remote)
+{
+ int ret;
+
+ lockdep_assert_held(&remote->lock);
+
+ if (!remote->tracing_on)
+ return 0;
+
+ ret = remote->cbs->enable_tracing(false, remote->priv);
+ if (ret)
+ return ret;
+
+ ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
+ remote->tracing_on = false;
+ trace_remote_try_unload(remote);
+
+ return 0;
+}
+
+static void trace_remote_reset(struct trace_remote *remote, int cpu)
+{
+ lockdep_assert_held(&remote->lock);
+
+ if (!trace_remote_loaded(remote))
+ return;
+
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ ring_buffer_reset(remote->trace_buffer);
+ else
+ ring_buffer_reset_cpu(remote->trace_buffer, cpu);
+
+ trace_remote_try_unload(remote);
+}
+
+static ssize_t
+tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct seq_file *seq = filp->private_data;
+ struct trace_remote *remote = seq->private;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&remote->lock);
+
+ ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
+ if (ret)
+ return ret;
+
+ return cnt;
+}
+static int tracing_on_show(struct seq_file *s, void *unused)
+{
+ struct trace_remote *remote = s->private;
+
+ seq_printf(s, "%d\n", remote->tracing_on);
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
+
+static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct seq_file *seq = filp->private_data;
+ struct trace_remote *remote = seq->private;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ /* KiB to Bytes */
+ if (!val || check_shl_overflow(val, 10, &val))
+ return -EINVAL;
+
+ guard(mutex)(&remote->lock);
+
+ if (trace_remote_loaded(remote))
+ return -EBUSY;
+
+ remote->trace_buffer_size = val;
+
+ return cnt;
+}
+
+static int buffer_size_kb_show(struct seq_file *s, void *unused)
+{
+ struct trace_remote *remote = s->private;
+
+ seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
+ trace_remote_loaded(remote) ? "loaded" : "unloaded");
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
+
+static int trace_remote_get(struct trace_remote *remote, int cpu)
+{
+ int ret;
+
+ if (remote->nr_readers == UINT_MAX)
+ return -EBUSY;
+
+ ret = trace_remote_load(remote);
+ if (ret)
+ return ret;
+
+ if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
+ int lock_cpu;
+
+ remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
+ GFP_KERNEL);
+ if (!remote->pcpu_reader_locks) {
+ trace_remote_try_unload(remote);
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(lock_cpu)
+ init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
+ }
+
+ remote->nr_readers++;
+
+ return 0;
+}
+
+static void trace_remote_put(struct trace_remote *remote)
+{
+ if (WARN_ON(!remote->nr_readers))
+ return;
+
+ remote->nr_readers--;
+ if (remote->nr_readers)
+ return;
+
+ kfree(remote->pcpu_reader_locks);
+ remote->pcpu_reader_locks = NULL;
+
+ trace_remote_try_unload(remote);
+}
+
+static bool trace_remote_has_cpu(struct trace_remote *remote, int cpu)
+{
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return true;
+
+ return ring_buffer_poll_remote(remote->trace_buffer, cpu) == 0;
+}
+
+static void __poll_remote(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct trace_remote_iterator *iter;
+
+ iter = container_of(dwork, struct trace_remote_iterator, poll_work);
+ ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
+ schedule_delayed_work((struct delayed_work *)work,
+ msecs_to_jiffies(iter->remote->poll_ms));
+}
+
+static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ ring_buffer_read_finish(iter->rb_iter);
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ if (iter->rb_iters[cpu])
+ ring_buffer_read_finish(iter->rb_iters[cpu]);
+ }
+
+ kfree(iter->rb_iters);
+}
+
+static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
+
+ return iter->rb_iter ? 0 : -ENOMEM;
+ }
+
+ iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
+ if (!iter->rb_iters)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
+ GFP_KERNEL);
+ if (!iter->rb_iters[cpu]) {
+ /* This CPU isn't part of trace_buffer. Skip it */
+ if (!trace_remote_has_cpu(iter->remote, cpu))
+ continue;
+
+ __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static struct trace_remote_iterator
+*trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
+{
+ struct trace_remote_iterator *iter = NULL;
+ int ret;
+
+ lockdep_assert_held(&remote->lock);
+
+ if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
+ return NULL;
+
+ ret = trace_remote_get(remote, cpu);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!trace_remote_has_cpu(remote, cpu)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ iter = kzalloc_obj(*iter);
+ if (iter) {
+ iter->remote = remote;
+ iter->cpu = cpu;
+ iter->type = type;
+ trace_seq_init(&iter->seq);
+
+ switch (type) {
+ case TRI_CONSUMING:
+ ring_buffer_poll_remote(remote->trace_buffer, cpu);
+ INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
+ schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
+ break;
+ case TRI_NONCONSUMING:
+ ret = __alloc_ring_buffer_iter(iter, cpu);
+ break;
+ }
+
+ if (ret)
+ goto err;
+
+ return iter;
+ }
+ ret = -ENOMEM;
+
+err:
+ kfree(iter);
+ trace_remote_put(remote);
+
+ return ERR_PTR(ret);
+}
+
+static void trace_remote_iter_free(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote;
+
+ if (!iter)
+ return;
+
+ remote = iter->remote;
+
+ lockdep_assert_held(&remote->lock);
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ cancel_delayed_work_sync(&iter->poll_work);
+ break;
+ case TRI_NONCONSUMING:
+ __free_ring_buffer_iter(iter, iter->cpu);
+ break;
+ }
+
+ kfree(iter);
+ trace_remote_put(remote);
+}
+
+static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote = iter->remote;
+ int cpu = iter->cpu;
+
+ /* Acquire global reader lock */
+ if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+ down_write(&remote->reader_lock);
+ else
+ down_read(&remote->reader_lock);
+
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return;
+
+ /*
+ * No need for the remote lock here, iter holds a reference on
+ * remote->nr_readers
+ */
+
+ /* Get the per-CPU one */
+ if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
+ return;
+
+ if (iter->type == TRI_CONSUMING)
+ down_write(&remote->pcpu_reader_locks[cpu]);
+ else
+ down_read(&remote->pcpu_reader_locks[cpu]);
+}
+
+static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote = iter->remote;
+ int cpu = iter->cpu;
+
+ /* Release per-CPU reader lock */
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ /*
+ * No need for the remote lock here, iter holds a reference on
+ * remote->nr_readers
+ */
+ if (iter->type == TRI_CONSUMING)
+ up_write(&remote->pcpu_reader_locks[cpu]);
+ else
+ up_read(&remote->pcpu_reader_locks[cpu]);
+ }
+
+ /* Release global reader lock */
+ if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+ up_write(&remote->reader_lock);
+ else
+ up_read(&remote->reader_lock);
+}
+
+static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
+}
+
+static struct ring_buffer_event *
+__peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
+{
+ struct ring_buffer_event *rb_evt;
+ struct ring_buffer_iter *rb_iter;
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
+ case TRI_NONCONSUMING:
+ rb_iter = __get_rb_iter(iter, cpu);
+ if (!rb_iter)
+ return NULL;
+
+ rb_evt = ring_buffer_iter_peek(rb_iter, ts);
+ if (!rb_evt)
+ return NULL;
+
+ *lost_events = ring_buffer_iter_dropped(rb_iter);
+
+ return rb_evt;
+ }
+
+ return NULL;
+}
+
+static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
+{
+ struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+ struct ring_buffer_event *rb_evt;
+ int cpu = iter->cpu;
+
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ if (ring_buffer_empty_cpu(trace_buffer, cpu))
+ return false;
+
+ rb_evt = __peek_event(iter, cpu, &iter->ts, &iter->lost_events);
+ if (!rb_evt)
+ return false;
+
+ iter->evt_cpu = cpu;
+ iter->evt = ring_buffer_event_data(rb_evt);
+ return true;
+ }
+
+ iter->ts = U64_MAX;
+ for_each_possible_cpu(cpu) {
+ unsigned long lost_events;
+ u64 ts;
+
+ if (ring_buffer_empty_cpu(trace_buffer, cpu))
+ continue;
+
+ rb_evt = __peek_event(iter, cpu, &ts, &lost_events);
+ if (!rb_evt)
+ continue;
+
+ if (ts >= iter->ts)
+ continue;
+
+ iter->ts = ts;
+ iter->evt_cpu = cpu;
+ iter->evt = ring_buffer_event_data(rb_evt);
+ iter->lost_events = lost_events;
+ }
+
+ return iter->ts != U64_MAX;
+}
+
+static void trace_remote_iter_move(struct trace_remote_iterator *iter)
+{
+ struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+ break;
+ case TRI_NONCONSUMING:
+ ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
+ break;
+ }
+}
+
+static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id);
+
+static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
+{
+ struct remote_event *evt;
+ unsigned long usecs_rem;
+ u64 ts = iter->ts;
+
+ if (iter->lost_events)
+ trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+ iter->evt_cpu, iter->lost_events);
+
+ do_div(ts, 1000);
+ usecs_rem = do_div(ts, USEC_PER_SEC);
+
+ trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
+ ts, usecs_rem);
+
+ evt = trace_remote_find_event(iter->remote, iter->evt->id);
+ if (!evt)
+ trace_seq_printf(&iter->seq, "UNKNOWN id=%d\n", iter->evt->id);
+ else
+ evt->print(iter->evt, &iter->seq);
+
+ return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
+}
+
+static int trace_pipe_open(struct inode *inode, struct file *filp)
+{
+ struct trace_remote *remote = inode->i_private;
+ struct trace_remote_iterator *iter;
+ int cpu = tracing_get_cpu(inode);
+
+ guard(mutex)(&remote->lock);
+
+ iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ filp->private_data = iter;
+
+ return IS_ERR(iter) ? PTR_ERR(iter) : 0;
+}
+
+static int trace_pipe_release(struct inode *inode, struct file *filp)
+{
+ struct trace_remote_iterator *iter = filp->private_data;
+ struct trace_remote *remote = iter->remote;
+
+ guard(mutex)(&remote->lock);
+
+ trace_remote_iter_free(iter);
+
+ return 0;
+}
+
+static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct trace_remote_iterator *iter = filp->private_data;
+ struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+ int ret;
+
+copy_to_user:
+ ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (ret != -EBUSY)
+ return ret;
+
+ trace_seq_init(&iter->seq);
+
+ ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
+ if (ret < 0)
+ return ret;
+
+ trace_remote_iter_read_start(iter);
+
+ while (trace_remote_iter_read_event(iter)) {
+ int prev_len = iter->seq.seq.len;
+
+ if (trace_remote_iter_print_event(iter)) {
+ iter->seq.seq.len = prev_len;
+ break;
+ }
+
+ trace_remote_iter_move(iter);
+ }
+
+ trace_remote_iter_read_finished(iter);
+
+ goto copy_to_user;
+}
+
+static const struct file_operations trace_pipe_fops = {
+ .open = trace_pipe_open,
+ .read = trace_pipe_read,
+ .release = trace_pipe_release,
+};
+
+static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_remote_iterator *iter = m->private;
+
+ ++*pos;
+
+ if (!iter || !trace_remote_iter_read_event(iter))
+ return NULL;
+
+ trace_remote_iter_move(iter);
+ iter->pos++;
+
+ return iter;
+}
+
+static void *trace_start(struct seq_file *m, loff_t *pos)
+{
+ struct trace_remote_iterator *iter = m->private;
+ loff_t i;
+
+ if (!iter)
+ return NULL;
+
+ trace_remote_iter_read_start(iter);
+
+ if (!*pos) {
+ iter->pos = -1;
+ return trace_next(m, NULL, &i);
+ }
+
+ i = iter->pos;
+ while (i < *pos) {
+ iter = trace_next(m, NULL, &i);
+ if (!iter)
+ return NULL;
+ }
+
+ return iter;
+}
+
+static int trace_show(struct seq_file *m, void *v)
+{
+ struct trace_remote_iterator *iter = v;
+
+ trace_seq_init(&iter->seq);
+
+ if (trace_remote_iter_print_event(iter)) {
+ seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
+ return 0;
+ }
+
+ return trace_print_seq(m, &iter->seq);
+}
+
+static void trace_stop(struct seq_file *m, void *v)
+{
+ struct trace_remote_iterator *iter = m->private;
+
+ if (iter)
+ trace_remote_iter_read_finished(iter);
+}
+
+static const struct seq_operations trace_sops = {
+ .start = trace_start,
+ .next = trace_next,
+ .show = trace_show,
+ .stop = trace_stop,
+};
+
+static int trace_open(struct inode *inode, struct file *filp)
+{
+ struct trace_remote *remote = inode->i_private;
+ struct trace_remote_iterator *iter = NULL;
+ int cpu = tracing_get_cpu(inode);
+ int ret;
+
+ if (!(filp->f_mode & FMODE_READ))
+ return 0;
+
+ guard(mutex)(&remote->lock);
+
+ iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ ret = seq_open(filp, &trace_sops);
+ if (ret) {
+ trace_remote_iter_free(iter);
+ return ret;
+ }
+
+ ((struct seq_file *)filp->private_data)->private = (void *)iter;
+
+ return 0;
+}
+
+static int trace_release(struct inode *inode, struct file *filp)
+{
+ struct trace_remote_iterator *iter;
+
+ if (!(filp->f_mode & FMODE_READ))
+ return 0;
+
+ iter = ((struct seq_file *)filp->private_data)->private;
+ seq_release(inode, filp);
+
+ if (!iter)
+ return 0;
+
+ guard(mutex)(&iter->remote->lock);
+
+ trace_remote_iter_free(iter);
+
+ return 0;
+}
+
+static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct inode *inode = file_inode(filp);
+ struct trace_remote *remote = inode->i_private;
+ int cpu = tracing_get_cpu(inode);
+
+ guard(mutex)(&remote->lock);
+
+ trace_remote_reset(remote, cpu);
+
+ return cnt;
+}
+
+static const struct file_operations trace_fops = {
+ .open = trace_open,
+ .write = trace_write,
+ .read = seq_read,
+ .read_iter = seq_read_iter,
+ .release = trace_release,
+};
+
+static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
+{
+ struct dentry *remote_d, *percpu_d, *d;
+ static struct dentry *root;
+ static DEFINE_MUTEX(lock);
+ bool root_inited = false;
+ int cpu;
+
+ guard(mutex)(&lock);
+
+ if (!root) {
+ root = tracefs_create_dir(TRACEFS_DIR, NULL);
+ if (!root) {
+ pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
+ return -ENOMEM;
+ }
+ root_inited = true;
+ }
+
+ remote_d = tracefs_create_dir(name, root);
+ if (!remote_d) {
+ pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
+ goto err;
+ }
+
+ d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
+ if (!d)
+ goto err;
+
+ d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
+ &buffer_size_kb_fops);
+ if (!d)
+ goto err;
+
+ d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
+ if (!d)
+ goto err;
+
+ d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops);
+ if (!d)
+ goto err;
+
+ percpu_d = tracefs_create_dir("per_cpu", remote_d);
+ if (!percpu_d) {
+ pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
+ goto err;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct dentry *cpu_d;
+ char cpu_name[16];
+
+ snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
+ cpu_d = tracefs_create_dir(cpu_name, percpu_d);
+ if (!cpu_d) {
+ pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
+ name, cpu);
+ goto err;
+ }
+
+ d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
+ &trace_pipe_fops);
+ if (!d)
+ goto err;
+
+ d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu,
+ &trace_fops);
+ if (!d)
+ goto err;
+ }
+
+ remote->dentry = remote_d;
+
+ return 0;
+
+err:
+ if (root_inited) {
+ tracefs_remove(root);
+ root = NULL;
+ } else {
+ tracefs_remove(remote_d);
+ }
+
+ return -ENOMEM;
+}
+
+static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
+ struct remote_event *events, size_t nr_events);
+
+/**
+ * trace_remote_register() - Register a Tracefs remote
+ * @name: Name of the remote, used for the Tracefs remotes/ directory.
+ * @cbs: Set of callbacks used to control the remote.
+ * @priv: Private data, passed to each callback from @cbs.
+ * @events: Array of events. &remote_event.name and &remote_event.id must be
+ * filled by the caller.
+ * @nr_events: Number of events in the @events array.
+ *
+ * A trace remote is an entity, outside of the kernel (most likely firmware or
+ * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
+ * The kernel would then act as a reader.
+ *
+ * The registered remote will be found under the Tracefs directory
+ * remotes/<name>.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
+ struct remote_event *events, size_t nr_events)
+{
+ struct trace_remote *remote;
+ int ret;
+
+ remote = kzalloc_obj(*remote);
+ if (!remote)
+ return -ENOMEM;
+
+ remote->cbs = cbs;
+ remote->priv = priv;
+ remote->trace_buffer_size = 7 << 10;
+ remote->poll_ms = 100;
+ mutex_init(&remote->lock);
+ init_rwsem(&remote->reader_lock);
+
+ if (trace_remote_init_tracefs(name, remote)) {
+ kfree(remote);
+ return -ENOMEM;
+ }
+
+ ret = trace_remote_register_events(name, remote, events, nr_events);
+ if (ret) {
+ pr_err("Failed to register events for trace remote '%s' (%d)\n",
+ name, ret);
+ return ret;
+ }
+
+ ret = cbs->init ? cbs->init(remote->dentry, priv) : 0;
+ if (ret)
+ pr_err("Init failed for trace remote '%s' (%d)\n", name, ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(trace_remote_register);
+
+/**
+ * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
+ * @desc: Descriptor of the per-CPU ring-buffers, originally filled by
+ * trace_remote_alloc_buffer()
+ *
+ * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
+ */
+void trace_remote_free_buffer(struct trace_buffer_desc *desc)
+{
+ struct ring_buffer_desc *rb_desc;
+ int cpu;
+
+ for_each_ring_buffer_desc(rb_desc, cpu, desc) {
+ unsigned int id;
+
+ free_page(rb_desc->meta_va);
+
+ for (id = 0; id < rb_desc->nr_page_va; id++)
+ free_page(rb_desc->page_va[id]);
+ }
+}
+EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
+
+/**
+ * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
+ * @desc: Uninitialized trace_buffer_desc
+ * @desc_size: Size of the trace_buffer_desc. Must be at least equal to
+ * trace_buffer_desc_size()
+ * @buffer_size: Size in bytes of each per-CPU ring-buffer
+ * @cpumask: CPUs to allocate a ring-buffer for
+ *
+ * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
+ * for each CPU from @cpumask and fill @desc. Most likely called from
+ * &trace_remote_callbacks.load_trace_buffer.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
+ const struct cpumask *cpumask)
+{
+ unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
+ void *desc_end = desc + desc_size;
+ struct ring_buffer_desc *rb_desc;
+ int cpu, ret = -ENOMEM;
+
+ if (desc_size < struct_size(desc, __data, 0))
+ return -EINVAL;
+
+ desc->nr_cpus = 0;
+ desc->struct_len = struct_size(desc, __data, 0);
+
+ rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
+
+ for_each_cpu(cpu, cpumask) {
+ unsigned int id;
+
+ if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ rb_desc->cpu = cpu;
+ rb_desc->nr_page_va = 0;
+ rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
+ if (!rb_desc->meta_va)
+ goto err;
+
+ for (id = 0; id < nr_pages; id++) {
+ rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
+ if (!rb_desc->page_va[id])
+ goto err;
+
+ rb_desc->nr_page_va++;
+ }
+ desc->nr_cpus++;
+ desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
+ desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
+ rb_desc = __next_ring_buffer_desc(rb_desc);
+ }
+
+ return 0;
+
+err:
+ trace_remote_free_buffer(desc);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
+
+static int
+trace_remote_enable_event(struct trace_remote *remote, struct remote_event *evt, bool enable)
+{
+ int ret;
+
+ lockdep_assert_held(&remote->lock);
+
+ if (evt->enabled == enable)
+ return 0;
+
+ ret = remote->cbs->enable_event(evt->id, enable, remote->priv);
+ if (ret)
+ return ret;
+
+ evt->enabled = enable;
+
+ return 0;
+}
+
+static int remote_event_enable_show(struct seq_file *s, void *unused)
+{
+ struct remote_event *evt = s->private;
+
+ seq_printf(s, "%d\n", evt->enabled);
+
+ return 0;
+}
+
+static ssize_t remote_event_enable_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *seq = filp->private_data;
+ struct remote_event *evt = seq->private;
+ struct trace_remote *remote = evt->remote;
+ u8 enable;
+ int ret;
+
+ ret = kstrtou8_from_user(ubuf, count, 10, &enable);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&remote->lock);
+
+ ret = trace_remote_enable_event(remote, evt, enable);
+ if (ret)
+ return ret;
+
+ return count;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(remote_event_enable);
+
+static int remote_event_id_show(struct seq_file *s, void *unused)
+{
+ struct remote_event *evt = s->private;
+
+ seq_printf(s, "%d\n", evt->id);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(remote_event_id);
+
+static int remote_event_format_show(struct seq_file *s, void *unused)
+{
+ size_t offset = sizeof(struct remote_event_hdr);
+ struct remote_event *evt = s->private;
+ struct trace_event_fields *field;
+
+ seq_printf(s, "name: %s\n", evt->name);
+ seq_printf(s, "ID: %d\n", evt->id);
+ seq_puts(s,
+ "format:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\n");
+
+ field = &evt->fields[0];
+ while (field->name) {
+ seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, offset, field->size,
+ field->is_signed);
+ offset += field->size;
+ field++;
+ }
+
+ if (field != &evt->fields[0])
+ seq_puts(s, "\n");
+
+ seq_printf(s, "print fmt: %s\n", evt->print_fmt);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(remote_event_format);
+
+static int remote_event_callback(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops)
+{
+ if (!strcmp(name, "enable")) {
+ *mode = TRACEFS_MODE_WRITE;
+ *fops = &remote_event_enable_fops;
+ return 1;
+ }
+
+ if (!strcmp(name, "id")) {
+ *mode = TRACEFS_MODE_READ;
+ *fops = &remote_event_id_fops;
+ return 1;
+ }
+
+ if (!strcmp(name, "format")) {
+ *mode = TRACEFS_MODE_READ;
+ *fops = &remote_event_format_fops;
+ return 1;
+ }
+
+ return 0;
+}
+
+static ssize_t remote_events_dir_enable_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_remote *remote = file_inode(filp)->i_private;
+ int i, ret;
+ u8 enable;
+
+ ret = kstrtou8_from_user(ubuf, count, 10, &enable);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&remote->lock);
+
+ for (i = 0; i < remote->nr_events; i++) {
+ struct remote_event *evt = &remote->events[i];
+
+ trace_remote_enable_event(remote, evt, enable);
+ }
+
+ return count;
+}
+
+static ssize_t remote_events_dir_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct trace_remote *remote = file_inode(filp)->i_private;
+ const char enabled_char[] = {'0', '1', 'X'};
+ char enabled_str[] = " \n";
+ int i, enabled = -1;
+
+ guard(mutex)(&remote->lock);
+
+ for (i = 0; i < remote->nr_events; i++) {
+ struct remote_event *evt = &remote->events[i];
+
+ if (enabled == -1) {
+ enabled = evt->enabled;
+ } else if (enabled != evt->enabled) {
+ enabled = 2;
+ break;
+ }
+ }
+
+ enabled_str[0] = enabled_char[enabled == -1 ? 0 : enabled];
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, enabled_str, 2);
+}
+
+static const struct file_operations remote_events_dir_enable_fops = {
+ .write = remote_events_dir_enable_write,
+ .read = remote_events_dir_enable_read,
+};
+
+static ssize_t
+remote_events_dir_header_page_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct trace_seq *s;
+ int ret;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ ring_buffer_print_page_header(NULL, s);
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s));
+ kfree(s);
+
+ return ret;
+}
+
+static const struct file_operations remote_events_dir_header_page_fops = {
+ .read = remote_events_dir_header_page_read,
+};
+
+static ssize_t
+remote_events_dir_header_event_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct trace_seq *s;
+ int ret;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ ring_buffer_print_entry_header(s);
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s));
+ kfree(s);
+
+ return ret;
+}
+
+static const struct file_operations remote_events_dir_header_event_fops = {
+ .read = remote_events_dir_header_event_read,
+};
+
+static int remote_events_dir_callback(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops)
+{
+ if (!strcmp(name, "enable")) {
+ *mode = TRACEFS_MODE_WRITE;
+ *fops = &remote_events_dir_enable_fops;
+ return 1;
+ }
+
+ if (!strcmp(name, "header_page")) {
+ *mode = TRACEFS_MODE_READ;
+ *fops = &remote_events_dir_header_page_fops;
+ return 1;
+ }
+
+ if (!strcmp(name, "header_event")) {
+ *mode = TRACEFS_MODE_READ;
+ *fops = &remote_events_dir_header_event_fops;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int trace_remote_init_eventfs(const char *remote_name, struct trace_remote *remote,
+ struct remote_event *evt)
+{
+ struct eventfs_inode *eventfs = remote->eventfs;
+ static struct eventfs_entry dir_entries[] = {
+ {
+ .name = "enable",
+ .callback = remote_events_dir_callback,
+ }, {
+ .name = "header_page",
+ .callback = remote_events_dir_callback,
+ }, {
+ .name = "header_event",
+ .callback = remote_events_dir_callback,
+ }
+ };
+ static struct eventfs_entry entries[] = {
+ {
+ .name = "enable",
+ .callback = remote_event_callback,
+ }, {
+ .name = "id",
+ .callback = remote_event_callback,
+ }, {
+ .name = "format",
+ .callback = remote_event_callback,
+ }
+ };
+ bool eventfs_create = false;
+
+ if (!eventfs) {
+ eventfs = eventfs_create_events_dir("events", remote->dentry, dir_entries,
+ ARRAY_SIZE(dir_entries), remote);
+ if (IS_ERR(eventfs))
+ return PTR_ERR(eventfs);
+
+ /*
+ * Create similar hierarchy as local events even if a single system is supported at
+ * the moment
+ */
+ eventfs = eventfs_create_dir(remote_name, eventfs, NULL, 0, NULL);
+ if (IS_ERR(eventfs))
+ return PTR_ERR(eventfs);
+
+ remote->eventfs = eventfs;
+ eventfs_create = true;
+ }
+
+ eventfs = eventfs_create_dir(evt->name, eventfs, entries, ARRAY_SIZE(entries), evt);
+ if (IS_ERR(eventfs)) {
+ if (eventfs_create) {
+ eventfs_remove_events_dir(remote->eventfs);
+ remote->eventfs = NULL;
+ }
+ return PTR_ERR(eventfs);
+ }
+
+ return 0;
+}
+
+static int trace_remote_attach_events(struct trace_remote *remote, struct remote_event *events,
+ size_t nr_events)
+{
+ int i;
+
+ for (i = 0; i < nr_events; i++) {
+ struct remote_event *evt = &events[i];
+
+ if (evt->remote)
+ return -EEXIST;
+
+ evt->remote = remote;
+
+ /* We need events to be sorted for efficient lookup */
+ if (i && evt->id <= events[i - 1].id)
+ return -EINVAL;
+ }
+
+ remote->events = events;
+ remote->nr_events = nr_events;
+
+ return 0;
+}
+
+static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
+ struct remote_event *events, size_t nr_events)
+{
+ int i, ret;
+
+ ret = trace_remote_attach_events(remote, events, nr_events);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nr_events; i++) {
+ struct remote_event *evt = &events[i];
+
+ ret = trace_remote_init_eventfs(remote_name, remote, evt);
+ if (ret)
+ pr_warn("Failed to init eventfs for event '%s' (%d)",
+ evt->name, ret);
+ }
+
+ return 0;
+}
+
+static int __cmp_events(const void *key, const void *data)
+{
+ const struct remote_event *evt = data;
+ int id = (int)((long)key);
+
+ return id - (int)evt->id;
+}
+
+static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id)
+{
+ return bsearch((const void *)(unsigned long)id, remote->events, remote->nr_events,
+ sizeof(*remote->events), __cmp_events);
+}