summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c1
-rw-r--r--kernel/bpf/cpumap.c146
-rw-r--r--kernel/bpf/offload.c11
-rw-r--r--kernel/capability.c16
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/events/core.c14
-rw-r--r--kernel/irq/Kconfig5
-rw-r--r--kernel/irq/chip.c47
-rw-r--r--kernel/irq/msi.c163
-rw-r--r--kernel/kexec_core.c4
-rw-r--r--kernel/livepatch/core.c9
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/power/Kconfig3
-rw-r--r--kernel/power/energy_model.c67
-rw-r--r--kernel/power/hibernate.c27
-rw-r--r--kernel/power/snapshot.c16
-rw-r--r--kernel/power/suspend.c22
-rw-r--r--kernel/power/swap.c58
-rw-r--r--kernel/printk/internal.h1
-rw-r--r--kernel/printk/printk.c55
-rw-r--r--kernel/printk/printk_ringbuffer.c13
-rw-r--r--kernel/sched/core.c23
-rw-r--r--kernel/static_call_inline.c2
-rw-r--r--kernel/sysctl.c239
-rw-r--r--kernel/time/posix-clock.c3
-rw-r--r--kernel/trace/Kconfig12
-rw-r--r--kernel/trace/fgraph.c2
-rw-r--r--kernel/trace/ftrace.c57
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/rv/Kconfig7
-rw-r--r--kernel/trace/rv/Makefile7
-rw-r--r--kernel/trace/rv/monitors/sched/Kconfig11
-rw-r--r--kernel/trace/rv/monitors/sched/sched.c38
-rw-r--r--kernel/trace/rv/monitors/sched/sched.h3
-rw-r--r--kernel/trace/rv/monitors/sco/Kconfig14
-rw-r--r--kernel/trace/rv/monitors/sco/sco.c88
-rw-r--r--kernel/trace/rv/monitors/sco/sco.h47
-rw-r--r--kernel/trace/rv/monitors/sco/sco_trace.h15
-rw-r--r--kernel/trace/rv/monitors/scpd/Kconfig15
-rw-r--r--kernel/trace/rv/monitors/scpd/scpd.c96
-rw-r--r--kernel/trace/rv/monitors/scpd/scpd.h49
-rw-r--r--kernel/trace/rv/monitors/scpd/scpd_trace.h15
-rw-r--r--kernel/trace/rv/monitors/sncid/Kconfig15
-rw-r--r--kernel/trace/rv/monitors/sncid/sncid.c96
-rw-r--r--kernel/trace/rv/monitors/sncid/sncid.h49
-rw-r--r--kernel/trace/rv/monitors/sncid/sncid_trace.h15
-rw-r--r--kernel/trace/rv/monitors/snep/Kconfig15
-rw-r--r--kernel/trace/rv/monitors/snep/snep.c96
-rw-r--r--kernel/trace/rv/monitors/snep/snep.h49
-rw-r--r--kernel/trace/rv/monitors/snep/snep_trace.h15
-rw-r--r--kernel/trace/rv/monitors/snroc/Kconfig14
-rw-r--r--kernel/trace/rv/monitors/snroc/snroc.c85
-rw-r--r--kernel/trace/rv/monitors/snroc/snroc.h47
-rw-r--r--kernel/trace/rv/monitors/snroc/snroc_trace.h15
-rw-r--r--kernel/trace/rv/monitors/tss/Kconfig14
-rw-r--r--kernel/trace/rv/monitors/tss/tss.c91
-rw-r--r--kernel/trace/rv/monitors/tss/tss.h47
-rw-r--r--kernel/trace/rv/monitors/tss/tss_trace.h15
-rw-r--r--kernel/trace/rv/monitors/wip/Kconfig2
-rw-r--r--kernel/trace/rv/monitors/wip/wip.c2
-rw-r--r--kernel/trace/rv/monitors/wip/wip.h1
-rw-r--r--kernel/trace/rv/monitors/wwnr/Kconfig2
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.c2
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.h1
-rw-r--r--kernel/trace/rv/rv.c154
-rw-r--r--kernel/trace/rv/rv.h4
-rw-r--r--kernel/trace/rv/rv_reactors.c28
-rw-r--r--kernel/trace/rv/rv_trace.h6
-rw-r--r--kernel/trace/trace.c25
-rw-r--r--kernel/trace/trace.h9
-rw-r--r--kernel/trace/trace_entries.h12
-rw-r--r--kernel/trace/trace_eprobe.c8
-rw-r--r--kernel/trace/trace_event_perf.c4
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_events_hist.c20
-rw-r--r--kernel/trace/trace_events_synth.c40
-rw-r--r--kernel/trace/trace_events_trigger.c38
-rw-r--r--kernel/trace/trace_events_user.c7
-rw-r--r--kernel/trace/trace_fprobe.c5
-rw-r--r--kernel/trace/trace_functions.c46
-rw-r--r--kernel/trace/trace_functions_graph.c176
-rw-r--r--kernel/trace/trace_irqsoff.c14
-rw-r--r--kernel/trace/trace_kprobe.c5
-rw-r--r--kernel/trace/trace_osnoise.c56
-rw-r--r--kernel/trace/trace_output.c122
-rw-r--r--kernel/trace/trace_output.h9
-rw-r--r--kernel/trace/trace_probe.c28
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_sched_wakeup.c6
-rw-r--r--kernel/trace/trace_uprobe.c9
-rw-r--r--kernel/tracepoint.c2
91 files changed, 2216 insertions, 735 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c3223e0db2f5..eacb701bc2be 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -8526,6 +8526,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_SOCK_OPS:
return BTF_KFUNC_HOOK_CGROUP;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 774accbd4a22..67e8a2fc1a99 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -33,8 +33,8 @@
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>
-#include <linux/netdevice.h> /* netif_receive_skb_list */
-#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/netdevice.h>
+#include <net/gro.h>
/* General idea: XDP packets getting XDP redirected to another CPU,
* will maximum be stored/queued for one driver ->poll() call. It is
@@ -68,6 +68,7 @@ struct bpf_cpu_map_entry {
struct bpf_cpumap_val value;
struct bpf_prog *prog;
+ struct gro_node gro;
struct completion kthread_running;
struct rcu_work free_work;
@@ -133,22 +134,23 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
}
}
-static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
- struct list_head *listp,
- struct xdp_cpumap_stats *stats)
+static u32 cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+ void **skbs, u32 skb_n,
+ struct xdp_cpumap_stats *stats)
{
- struct sk_buff *skb, *tmp;
struct xdp_buff xdp;
- u32 act;
+ u32 act, pass = 0;
int err;
- list_for_each_entry_safe(skb, tmp, listp, list) {
+ for (u32 i = 0; i < skb_n; i++) {
+ struct sk_buff *skb = skbs[i];
+
act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
switch (act) {
case XDP_PASS:
+ skbs[pass++] = skb;
break;
case XDP_REDIRECT:
- skb_list_del_init(skb);
err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
rcpu->prog);
if (unlikely(err)) {
@@ -157,7 +159,7 @@ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
} else {
stats->redirect++;
}
- return;
+ break;
default:
bpf_warn_invalid_xdp_action(NULL, rcpu->prog, act);
fallthrough;
@@ -165,12 +167,15 @@ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
trace_xdp_exception(skb->dev, rcpu->prog, act);
fallthrough;
case XDP_DROP:
- skb_list_del_init(skb);
- kfree_skb(skb);
+ napi_consume_skb(skb, true);
stats->drop++;
- return;
+ break;
}
}
+
+ stats->pass += pass;
+
+ return pass;
}
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
@@ -204,7 +209,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
stats->drop++;
} else {
frames[nframes++] = xdpf;
- stats->pass++;
}
break;
case XDP_REDIRECT:
@@ -228,43 +232,65 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
}
xdp_clear_return_frame_no_direct();
+ stats->pass += nframes;
return nframes;
}
#define CPUMAP_BATCH 8
-static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
- int xdp_n, struct xdp_cpumap_stats *stats,
- struct list_head *list)
+struct cpu_map_ret {
+ u32 xdp_n;
+ u32 skb_n;
+};
+
+static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
+ void **skbs, struct cpu_map_ret *ret,
+ struct xdp_cpumap_stats *stats)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
- int nframes;
if (!rcpu->prog)
- return xdp_n;
+ goto out;
- rcu_read_lock_bh();
+ rcu_read_lock();
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
- nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
+ ret->xdp_n = cpu_map_bpf_prog_run_xdp(rcpu, frames, ret->xdp_n, stats);
+ if (unlikely(ret->skb_n))
+ ret->skb_n = cpu_map_bpf_prog_run_skb(rcpu, skbs, ret->skb_n,
+ stats);
if (stats->redirect)
xdp_do_flush();
- if (unlikely(!list_empty(list)))
- cpu_map_bpf_prog_run_skb(rcpu, list, stats);
-
bpf_net_ctx_clear(bpf_net_ctx);
- rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
+ rcu_read_unlock();
- return nframes;
+out:
+ if (unlikely(ret->skb_n) && ret->xdp_n)
+ memmove(&skbs[ret->xdp_n], skbs, ret->skb_n * sizeof(*skbs));
+}
+
+static void cpu_map_gro_flush(struct bpf_cpu_map_entry *rcpu, bool empty)
+{
+ /*
+ * If the ring is not empty, there'll be a new iteration soon, and we
+ * only need to do a full flush if a tick is long (> 1 ms).
+ * If the ring is empty, to not hold GRO packets in the stack for too
+ * long, do a full flush.
+ * This is equivalent to how NAPI decides whether to perform a full
+ * flush.
+ */
+ gro_flush(&rcpu->gro, !empty && HZ >= 1000);
+ gro_normal_list(&rcpu->gro);
}
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
unsigned long last_qs = jiffies;
+ u32 packets = 0;
complete(&rcpu->kthread_running);
set_current_state(TASK_INTERRUPTIBLE);
@@ -277,11 +303,11 @@ static int cpu_map_kthread_run(void *data)
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
struct xdp_cpumap_stats stats = {}; /* zero stats */
unsigned int kmem_alloc_drops = 0, sched = 0;
- gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
- int i, n, m, nframes, xdp_n;
+ struct cpu_map_ret ret = { };
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
- LIST_HEAD(list);
+ u32 i, n, m;
+ bool empty;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@@ -306,7 +332,7 @@ static int cpu_map_kthread_run(void *data)
*/
n = __ptr_ring_consume_batched(rcpu->queue, frames,
CPUMAP_BATCH);
- for (i = 0, xdp_n = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page;
@@ -314,11 +340,11 @@ static int cpu_map_kthread_run(void *data)
struct sk_buff *skb = f;
__ptr_clear_bit(0, &skb);
- list_add_tail(&skb->list, &list);
+ skbs[ret.skb_n++] = skb;
continue;
}
- frames[xdp_n++] = f;
+ frames[ret.xdp_n++] = f;
page = virt_to_page(f);
/* Bring struct page memory area to curr CPU. Read by
@@ -328,40 +354,51 @@ static int cpu_map_kthread_run(void *data)
prefetchw(page);
}
+ local_bh_disable();
+
/* Support running another XDP prog on this CPU */
- nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
- if (nframes) {
- m = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- gfp, nframes, skbs);
- if (unlikely(m == 0)) {
- for (i = 0; i < nframes; i++)
- skbs[i] = NULL; /* effect: xdp_return_frame */
- kmem_alloc_drops += nframes;
- }
+ cpu_map_bpf_prog_run(rcpu, frames, skbs, &ret, &stats);
+ if (!ret.xdp_n)
+ goto stats;
+
+ m = napi_skb_cache_get_bulk(skbs, ret.xdp_n);
+ if (unlikely(m < ret.xdp_n)) {
+ for (i = m; i < ret.xdp_n; i++)
+ xdp_return_frame(frames[i]);
+
+ if (ret.skb_n)
+ memmove(&skbs[m], &skbs[ret.xdp_n],
+ ret.skb_n * sizeof(*skbs));
+
+ kmem_alloc_drops += ret.xdp_n - m;
+ ret.xdp_n = m;
}
- local_bh_disable();
- for (i = 0; i < nframes; i++) {
+ for (i = 0; i < ret.xdp_n; i++) {
struct xdp_frame *xdpf = frames[i];
- struct sk_buff *skb = skbs[i];
- skb = __xdp_build_skb_from_frame(xdpf, skb,
- xdpf->dev_rx);
- if (!skb) {
- xdp_return_frame(xdpf);
- continue;
- }
-
- list_add_tail(&skb->list, &list);
+ /* Can fail only when !skb -- already handled above */
+ __xdp_build_skb_from_frame(xdpf, skbs[i], xdpf->dev_rx);
}
+stats:
/* Feedback loop via tracepoint.
* NB: keep before recv to allow measuring enqueue/dequeue latency.
*/
trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
sched, &stats);
- netif_receive_skb_list(&list);
+ for (i = 0; i < ret.xdp_n + ret.skb_n; i++)
+ gro_receive_skb(&rcpu->gro, skbs[i]);
+
+ /* Flush either every 64 packets or in case of empty ring */
+ packets += n;
+ empty = __ptr_ring_empty(rcpu->queue);
+ if (packets >= NAPI_POLL_WEIGHT || empty) {
+ cpu_map_gro_flush(rcpu, empty);
+ packets = 0;
+ }
+
local_bh_enable(); /* resched point, may call do_softirq() */
}
__set_current_state(TASK_RUNNING);
@@ -430,6 +467,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
rcpu->cpu = cpu;
rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
+ gro_init(&rcpu->gro);
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
goto free_ptr_ring;
@@ -458,6 +496,7 @@ free_prog:
if (rcpu->prog)
bpf_prog_put(rcpu->prog);
free_ptr_ring:
+ gro_cleanup(&rcpu->gro);
ptr_ring_cleanup(rcpu->queue, NULL);
free_queue:
kfree(rcpu->queue);
@@ -487,6 +526,7 @@ static void __cpu_map_entry_free(struct work_struct *work)
if (rcpu->prog)
bpf_prog_put(rcpu->prog);
+ gro_cleanup(&rcpu->gro);
/* The queue should be empty at this point */
__cpu_map_ring_cleanup(rcpu->queue);
ptr_ring_cleanup(rcpu->queue, NULL);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 1a4fec330eaa..42ae8d595c2c 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -25,6 +25,7 @@
#include <linux/rhashtable.h>
#include <linux/rtnetlink.h>
#include <linux/rwsem.h>
+#include <net/netdev_lock.h>
#include <net/xdp.h>
/* Protects offdevs, members of bpf_offload_netdev and offload members
@@ -528,13 +529,14 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&offmap->map, attr);
-
rtnl_lock();
- down_write(&bpf_devs_lock);
offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
err = bpf_dev_offload_check(offmap->netdev);
if (err)
- goto err_unlock;
+ goto err_unlock_rtnl;
+
+ netdev_lock_ops(offmap->netdev);
+ down_write(&bpf_devs_lock);
ondev = bpf_offload_find_netdev(offmap->netdev);
if (!ondev) {
@@ -548,12 +550,15 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
list_add_tail(&offmap->offloads, &ondev->maps);
up_write(&bpf_devs_lock);
+ netdev_unlock_ops(offmap->netdev);
rtnl_unlock();
return &offmap->map;
err_unlock:
up_write(&bpf_devs_lock);
+ netdev_unlock_ops(offmap->netdev);
+err_unlock_rtnl:
rtnl_unlock();
bpf_map_area_free(offmap);
return ERR_PTR(err);
diff --git a/kernel/capability.c b/kernel/capability.c
index e089d2628c29..829f49ae07b9 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -286,22 +286,6 @@ bool has_ns_capability(struct task_struct *t,
}
/**
- * has_capability - Does a task have a capability in init_user_ns
- * @t: The task in question
- * @cap: The capability to be tested for
- *
- * Return true if the specified task has the given superior capability
- * currently in effect to the initial user namespace, false if not.
- *
- * Note that this does not set PF_SUPERPRIV on the task.
- */
-bool has_capability(struct task_struct *t, int cap)
-{
- return has_ns_capability(t, &init_user_ns, cap);
-}
-EXPORT_SYMBOL(has_capability);
-
-/**
* has_ns_capability_noaudit - Does a task have a capability (unaudited)
* in a specific user ns.
* @t: The task in question
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ad755db29efd..b08bb34b1718 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -526,6 +526,7 @@ void lockdep_assert_cpus_held(void)
percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
+EXPORT_SYMBOL_GPL(lockdep_assert_cpus_held);
#ifdef CONFIG_LOCKDEP
int lockdep_is_cpus_held(void)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5d2221ec6d7c..0bb21659e252 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4886,7 +4886,7 @@ find_get_context(struct task_struct *task, struct perf_event *event)
if (!task) {
/* Must be root to operate on a CPU event: */
- err = perf_allow_cpu(&event->attr);
+ err = perf_allow_cpu();
if (err)
return ERR_PTR(err);
@@ -12848,7 +12848,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
}
/* privileged levels capture (kernel, hv): check permissions */
if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
- ret = perf_allow_kernel(attr);
+ ret = perf_allow_kernel();
if (ret)
return ret;
}
@@ -13105,12 +13105,12 @@ SYSCALL_DEFINE5(perf_event_open,
return err;
/* Do we allow access to perf_event_open(2) ? */
- err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
+ err = security_perf_event_open(PERF_SECURITY_OPEN);
if (err)
return err;
if (!attr.exclude_kernel) {
- err = perf_allow_kernel(&attr);
+ err = perf_allow_kernel();
if (err)
return err;
}
@@ -13130,7 +13130,7 @@ SYSCALL_DEFINE5(perf_event_open,
/* Only privileged users can get physical addresses */
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
- err = perf_allow_kernel(&attr);
+ err = perf_allow_kernel();
if (err)
return err;
}
@@ -13969,12 +13969,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
return &event->attr;
}
-int perf_allow_kernel(struct perf_event_attr *attr)
+int perf_allow_kernel(void)
{
if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
return -EACCES;
- return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
+ return security_perf_event_open(PERF_SECURITY_KERNEL);
}
EXPORT_SYMBOL_GPL(perf_allow_kernel);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 875f25ed6f71..3f02a0e45254 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -47,10 +47,6 @@ config GENERIC_IRQ_INJECTION
config HARDIRQS_SW_RESEND
bool
-# Edge style eoi based handler (cell)
-config IRQ_EDGE_EOI_HANDLER
- bool
-
# Generic configurable interrupt chip implementation
config GENERIC_IRQ_CHIP
bool
@@ -96,6 +92,7 @@ config GENERIC_MSI_IRQ
bool
select IRQ_DOMAIN_HIERARCHY
+# irqchip drivers should select this if they call iommu_dma_prepare_msi()
config IRQ_MSI_IOMMU
bool
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 0ff987d3a799..36cf1b09cc84 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -838,53 +838,6 @@ out_unlock:
}
EXPORT_SYMBOL(handle_edge_irq);
-#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
-/**
- * handle_edge_eoi_irq - edge eoi type IRQ handler
- * @desc: the interrupt description structure for this irq
- *
- * Similar as the above handle_edge_irq, but using eoi and w/o the
- * mask/unmask logic.
- */
-void handle_edge_eoi_irq(struct irq_desc *desc)
-{
- struct irq_chip *chip = irq_desc_get_chip(desc);
-
- raw_spin_lock(&desc->lock);
-
- desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-
- if (!irq_may_run(desc)) {
- desc->istate |= IRQS_PENDING;
- goto out_eoi;
- }
-
- /*
- * If its disabled or no action available then mask it and get
- * out of here.
- */
- if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
- desc->istate |= IRQS_PENDING;
- goto out_eoi;
- }
-
- kstat_incr_irqs_this_cpu(desc);
-
- do {
- if (unlikely(!desc->action))
- goto out_eoi;
-
- handle_irq_event(desc);
-
- } while ((desc->istate & IRQS_PENDING) &&
- !irqd_irq_disabled(&desc->irq_data));
-
-out_eoi:
- chip->irq_eoi(&desc->irq_data);
- raw_spin_unlock(&desc->lock);
-}
-#endif
-
/**
* handle_percpu_irq - Per CPU local irq handler
* @desc: the interrupt description structure for this irq
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index ad4a85e31160..5c8d43cdb0a3 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -270,11 +270,16 @@ fail:
return ret;
}
+void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
+ *msg = entry->msg;
+}
+
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
{
struct msi_desc *entry = irq_get_msi_desc(irq);
- *msg = entry->msg;
+ __get_cached_msi_msg(entry, msg);
}
EXPORT_SYMBOL_GPL(get_cached_msi_msg);
@@ -338,30 +343,26 @@ int msi_setup_device_data(struct device *dev)
}
/**
- * __msi_lock_descs - Lock the MSI descriptor storage of a device
+ * msi_lock_descs - Lock the MSI descriptor storage of a device
* @dev: Device to operate on
- *
- * Internal function for guard(msi_descs_lock). Don't use in code.
*/
-void __msi_lock_descs(struct device *dev)
+void msi_lock_descs(struct device *dev)
{
mutex_lock(&dev->msi.data->mutex);
}
-EXPORT_SYMBOL_GPL(__msi_lock_descs);
+EXPORT_SYMBOL_GPL(msi_lock_descs);
/**
- * __msi_unlock_descs - Unlock the MSI descriptor storage of a device
+ * msi_unlock_descs - Unlock the MSI descriptor storage of a device
* @dev: Device to operate on
- *
- * Internal function for guard(msi_descs_lock). Don't use in code.
*/
-void __msi_unlock_descs(struct device *dev)
+void msi_unlock_descs(struct device *dev)
{
/* Invalidate the index which was cached by the iterator */
dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
mutex_unlock(&dev->msi.data->mutex);
}
-EXPORT_SYMBOL_GPL(__msi_unlock_descs);
+EXPORT_SYMBOL_GPL(msi_unlock_descs);
static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
enum msi_desc_filter filter)
@@ -447,6 +448,7 @@ EXPORT_SYMBOL_GPL(msi_next_desc);
unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
{
struct msi_desc *desc;
+ unsigned int ret = 0;
bool pcimsi = false;
struct xarray *xa;
@@ -460,7 +462,7 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
pcimsi = to_pci_dev(dev)->msi_enabled;
- guard(msi_descs_lock)(dev);
+ msi_lock_descs(dev);
xa = &dev->msi.data->__domains[domid].store;
desc = xa_load(xa, pcimsi ? 0 : index);
if (desc && desc->irq) {
@@ -469,12 +471,16 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne
* PCI-MSIX and platform MSI use a descriptor per
* interrupt.
*/
- if (!pcimsi)
- return desc->irq;
- if (index < desc->nvec_used)
- return desc->irq + index;
+ if (pcimsi) {
+ if (index < desc->nvec_used)
+ ret = desc->irq + index;
+ } else {
+ ret = desc->irq;
+ }
}
- return 0;
+
+ msi_unlock_descs(dev);
+ return ret;
}
EXPORT_SYMBOL_GPL(msi_domain_get_virq);
@@ -992,8 +998,9 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
void *chip_data)
{
struct irq_domain *domain, *parent = dev->msi.domain;
+ struct fwnode_handle *fwnode, *fwnalloced = NULL;
+ struct msi_domain_template *bundle;
const struct msi_parent_ops *pops;
- struct fwnode_handle *fwnode;
if (!irq_domain_is_msi_parent(parent))
return false;
@@ -1001,8 +1008,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
return false;
- struct msi_domain_template *bundle __free(kfree) =
- bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
+ bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
if (!bundle)
return false;
@@ -1025,36 +1031,41 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
* node as they are not guaranteed to have a fwnode. They are never
* looked up and always handled in the context of the device.
*/
- struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL;
-
- if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE))
- fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name);
- else
+ if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)
fwnode = dev->fwnode;
+ else
+ fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name);
if (!fwnode)
- return false;
+ goto free_bundle;
if (msi_setup_device_data(dev))
- return false;
+ goto free_fwnode;
+
+ msi_lock_descs(dev);
- guard(msi_descs_lock)(dev);
if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
- return false;
+ goto fail;
if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
- return false;
+ goto fail;
domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
if (!domain)
- return false;
+ goto fail;
- /* @bundle and @fwnode_alloced are now in use. Prevent cleanup */
- retain_ptr(bundle);
- retain_ptr(fwnode_alloced);
domain->dev = dev;
dev->msi.data->__domains[domid].domain = domain;
+ msi_unlock_descs(dev);
return true;
+
+fail:
+ msi_unlock_descs(dev);
+free_fwnode:
+ irq_domain_free_fwnode(fwnalloced);
+free_bundle:
+ kfree(bundle);
+ return false;
}
/**
@@ -1068,10 +1079,12 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
struct msi_domain_info *info;
struct irq_domain *domain;
- guard(msi_descs_lock)(dev);
+ msi_lock_descs(dev);
+
domain = msi_get_device_domain(dev, domid);
+
if (!domain || !irq_domain_is_msi_device(domain))
- return;
+ goto unlock;
dev->msi.data->__domains[domid].domain = NULL;
info = domain->host_data;
@@ -1080,6 +1093,9 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
irq_domain_remove(domain);
irq_domain_free_fwnode(fwnode);
kfree(container_of(info, struct msi_domain_template, info));
+
+unlock:
+ msi_unlock_descs(dev);
}
/**
@@ -1095,14 +1111,16 @@ bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
{
struct msi_domain_info *info;
struct irq_domain *domain;
+ bool ret = false;
- guard(msi_descs_lock)(dev);
+ msi_lock_descs(dev);
domain = msi_get_device_domain(dev, domid);
if (domain && irq_domain_is_msi_device(domain)) {
info = domain->host_data;
- return info->bus_token == bus_token;
+ ret = info->bus_token == bus_token;
}
- return false;
+ msi_unlock_descs(dev);
+ return ret;
}
static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
@@ -1144,7 +1162,7 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
return false;
- if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
+ if (info->flags & MSI_FLAG_NO_MASK)
return false;
/*
@@ -1334,17 +1352,21 @@ static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
}
/**
- * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
+ * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
* @dev: Pointer to device struct of the device for which the interrupts
* are allocated
* @domid: Id of the interrupt domain to operate on
* @first: First index to allocate (inclusive)
* @last: Last index to allocate (inclusive)
*
+ * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
+ * pair. Use this for MSI irqdomains which implement their own descriptor
+ * allocation/free.
+ *
* Return: %0 on success or an error code.
*/
-int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
- unsigned int first, unsigned int last)
+int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
{
struct msi_ctrl ctrl = {
.domid = domid,
@@ -1353,9 +1375,29 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
.nirqs = last + 1 - first,
};
- guard(msi_descs_lock)(dev);
return msi_domain_alloc_locked(dev, &ctrl);
}
+
+/**
+ * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are allocated
+ * @domid: Id of the interrupt domain to operate on
+ * @first: First index to allocate (inclusive)
+ * @last: Last index to allocate (inclusive)
+ *
+ * Return: %0 on success or an error code.
+ */
+int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
+{
+ int ret;
+
+ msi_lock_descs(dev);
+ ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
+ msi_unlock_descs(dev);
+ return ret;
+}
EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
/**
@@ -1458,8 +1500,12 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
const struct irq_affinity_desc *affdesc,
union msi_instance_cookie *icookie)
{
- guard(msi_descs_lock)(dev);
- return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
+ struct msi_map map;
+
+ msi_lock_descs(dev);
+ map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
+ msi_unlock_descs(dev);
+ return map;
}
/**
@@ -1496,11 +1542,13 @@ int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
icookie.value = ((u64)type << 32) | hwirq;
- guard(msi_descs_lock)(dev);
+ msi_lock_descs(dev);
if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
map.index = -EINVAL;
else
map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
+ msi_unlock_descs(dev);
+
return map.index >= 0 ? map.virq : map.index;
}
@@ -1570,8 +1618,8 @@ static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
* @first: First index to free (inclusive)
* @last: Last index to free (inclusive)
*/
-static void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
- unsigned int first, unsigned int last)
+void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
{
struct msi_ctrl ctrl = {
.domid = domid,
@@ -1593,8 +1641,9 @@ static void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int d
void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
unsigned int first, unsigned int last)
{
- guard(msi_descs_lock)(dev);
+ msi_lock_descs(dev);
msi_domain_free_irqs_range_locked(dev, domid, first, last);
+ msi_unlock_descs(dev);
}
EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
@@ -1624,8 +1673,9 @@ void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
*/
void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
{
- guard(msi_descs_lock)(dev);
+ msi_lock_descs(dev);
msi_domain_free_irqs_all_locked(dev, domid);
+ msi_unlock_descs(dev);
}
/**
@@ -1644,11 +1694,12 @@ void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
return;
- guard(msi_descs_lock)(dev);
- if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain))
- return;
- msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
- desc->msi_index);
+ msi_lock_descs(dev);
+ if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) {
+ msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
+ desc->msi_index);
+ }
+ msi_unlock_descs(dev);
}
/**
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c0bdc1686154..c22ad51c4317 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1013,7 +1013,7 @@ int kernel_kexec(void)
error = -EBUSY;
goto Restore_console;
}
- suspend_console();
+ console_suspend_all();
error = dpm_suspend_start(PMSG_FREEZE);
if (error)
goto Resume_console;
@@ -1072,7 +1072,7 @@ int kernel_kexec(void)
Resume_devices:
dpm_resume_end(PMSG_RESTORE);
Resume_console:
- resume_console();
+ console_resume_all();
thaw_processes();
Restore_console:
pm_restore_console();
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 0cd39954d5a1..4a0fb7978d0d 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -601,9 +601,12 @@ static int klp_add_object_nops(struct klp_patch *patch,
}
/*
- * Add 'nop' functions which simply return to the caller to run
- * the original function. The 'nop' functions are added to a
- * patch to facilitate a 'replace' mode.
+ * Add 'nop' functions which simply return to the caller to run the
+ * original function.
+ *
+ * They are added only when the atomic replace mode is used and only for
+ * functions which are currently livepatched but are no longer included
+ * in the new livepatch.
*/
static int klp_add_nops(struct klp_patch *patch)
{
diff --git a/kernel/panic.c b/kernel/panic.c
index d8635d5cecb2..0c55eec9e874 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -511,6 +511,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
TAINT_FLAG(AUX, 'X', ' ', true),
TAINT_FLAG(RANDSTRUCT, 'T', ' ', true),
TAINT_FLAG(TEST, 'N', ' ', true),
+ TAINT_FLAG(FWCTL, 'J', ' ', true),
};
#undef TAINT_FLAG
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ca947ed32e3d..54a623680019 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -380,8 +380,7 @@ config CPU_PM
config ENERGY_MODEL
bool "Energy Model for devices with DVFS (CPUs, GPUs, etc)"
- depends on SMP
- depends on CPU_FREQ
+ depends on CPU_FREQ || PM_DEVFREQ
help
Several subsystems (thermal and/or the task scheduler for example)
can leverage information about the energy consumed by devices to
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 3874f0e97651..d9b7e2b38c7a 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -161,22 +161,10 @@ static void em_debug_create_pd(struct device *dev) {}
static void em_debug_remove_pd(struct device *dev) {}
#endif
-static void em_destroy_table_rcu(struct rcu_head *rp)
-{
- struct em_perf_table __rcu *table;
-
- table = container_of(rp, struct em_perf_table, rcu);
- kfree(table);
-}
-
static void em_release_table_kref(struct kref *kref)
{
- struct em_perf_table __rcu *table;
-
/* It was the last owner of this table so we can free */
- table = container_of(kref, struct em_perf_table, kref);
-
- call_rcu(&table->rcu, em_destroy_table_rcu);
+ kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu);
}
/**
@@ -185,7 +173,7 @@ static void em_release_table_kref(struct kref *kref)
*
* No return values.
*/
-void em_table_free(struct em_perf_table __rcu *table)
+void em_table_free(struct em_perf_table *table)
{
kref_put(&table->kref, em_release_table_kref);
}
@@ -198,9 +186,9 @@ void em_table_free(struct em_perf_table __rcu *table)
* has a user.
* Returns allocated table or NULL.
*/
-struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
+struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{
- struct em_perf_table __rcu *table;
+ struct em_perf_table *table;
int table_size;
table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
@@ -239,7 +227,7 @@ static void em_init_performance(struct device *dev, struct em_perf_domain *pd,
}
static int em_compute_costs(struct device *dev, struct em_perf_state *table,
- struct em_data_callback *cb, int nr_states,
+ const struct em_data_callback *cb, int nr_states,
unsigned long flags)
{
unsigned long prev_cost = ULONG_MAX;
@@ -308,9 +296,9 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
* Return 0 on success or an error code on failure.
*/
int em_dev_update_perf_domain(struct device *dev,
- struct em_perf_table __rcu *new_table)
+ struct em_perf_table *new_table)
{
- struct em_perf_table __rcu *old_table;
+ struct em_perf_table *old_table;
struct em_perf_domain *pd;
if (!dev)
@@ -327,7 +315,8 @@ int em_dev_update_perf_domain(struct device *dev,
kref_get(&new_table->kref);
- old_table = pd->em_table;
+ old_table = rcu_dereference_protected(pd->em_table,
+ lockdep_is_held(&em_pd_mutex));
rcu_assign_pointer(pd->em_table, new_table);
em_cpufreq_update_efficiencies(dev, new_table->state);
@@ -341,7 +330,7 @@ EXPORT_SYMBOL_GPL(em_dev_update_perf_domain);
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
struct em_perf_state *table,
- struct em_data_callback *cb,
+ const struct em_data_callback *cb,
unsigned long flags)
{
unsigned long power, freq, prev_freq = 0;
@@ -396,10 +385,11 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
}
static int em_create_pd(struct device *dev, int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus,
unsigned long flags)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
struct em_perf_domain *pd;
struct device *cpu_dev;
int cpu, ret, num_cpus;
@@ -556,9 +546,10 @@ EXPORT_SYMBOL_GPL(em_cpu_get);
* Return 0 on success
*/
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus,
- bool microwatts)
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
{
+ struct em_perf_table *em_table;
unsigned long cap, prev_cap = 0;
unsigned long flags = 0;
int cpu, ret;
@@ -631,7 +622,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
dev->em_pd->min_perf_state = 0;
dev->em_pd->max_perf_state = nr_states - 1;
- em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
+ em_table = rcu_dereference_protected(dev->em_pd->em_table,
+ lockdep_is_held(&em_pd_mutex));
+ em_cpufreq_update_efficiencies(dev, em_table->state);
em_debug_create_pd(dev);
dev_info(dev, "EM: created perf domain\n");
@@ -668,7 +661,8 @@ void em_dev_unregister_perf_domain(struct device *dev)
mutex_lock(&em_pd_mutex);
em_debug_remove_pd(dev);
- em_table_free(dev->em_pd->em_table);
+ em_table_free(rcu_dereference_protected(dev->em_pd->em_table,
+ lockdep_is_held(&em_pd_mutex)));
kfree(dev->em_pd);
dev->em_pd = NULL;
@@ -676,9 +670,9 @@ void em_dev_unregister_perf_domain(struct device *dev)
}
EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
-static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd)
+static struct em_perf_table *em_table_dup(struct em_perf_domain *pd)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
struct em_perf_state *ps, *new_ps;
int ps_size;
@@ -700,7 +694,7 @@ static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd)
}
static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd,
- struct em_perf_table __rcu *em_table)
+ struct em_perf_table *em_table)
{
int ret;
@@ -728,10 +722,9 @@ free_em_table:
* are correctly calculated.
*/
static void em_adjust_new_capacity(struct device *dev,
- struct em_perf_domain *pd,
- u64 max_cap)
+ struct em_perf_domain *pd)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
em_table = em_table_dup(pd);
if (!em_table) {
@@ -775,7 +768,8 @@ static void em_check_capacity_update(void)
}
cpufreq_cpu_put(policy);
- pd = em_cpu_get(cpu);
+ dev = get_cpu_device(cpu);
+ pd = em_pd_get(dev);
if (!pd || em_is_artificial(pd))
continue;
@@ -799,8 +793,7 @@ static void em_check_capacity_update(void)
pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
cpu, cpu_capacity, em_max_perf);
- dev = get_cpu_device(cpu);
- em_adjust_new_capacity(dev, pd, cpu_capacity);
+ em_adjust_new_capacity(dev, pd);
}
free_cpumask_var(cpu_done_mask);
@@ -822,7 +815,7 @@ static void em_update_workfn(struct work_struct *work)
*/
int em_dev_update_chip_binning(struct device *dev)
{
- struct em_perf_table __rcu *em_table;
+ struct em_perf_table *em_table;
struct em_perf_domain *pd;
int i, ret;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 10a01af63a80..23c0f4e6cb2f 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) "PM: hibernation: " fmt
+#include <crypto/acompress.h>
#include <linux/blkdev.h>
#include <linux/export.h>
#include <linux/suspend.h>
@@ -411,7 +412,7 @@ int hibernation_snapshot(int platform_mode)
goto Thaw;
}
- suspend_console();
+ console_suspend_all();
pm_restrict_gfp_mask();
error = dpm_suspend(PMSG_FREEZE);
@@ -437,7 +438,7 @@ int hibernation_snapshot(int platform_mode)
if (error || !in_suspend)
pm_restore_gfp_mask();
- resume_console();
+ console_resume_all();
dpm_complete(msg);
Close:
@@ -547,7 +548,7 @@ int hibernation_restore(int platform_mode)
int error;
pm_prepare_console();
- suspend_console();
+ console_suspend_all();
pm_restrict_gfp_mask();
error = dpm_suspend_start(PMSG_QUIESCE);
if (!error) {
@@ -561,7 +562,7 @@ int hibernation_restore(int platform_mode)
}
dpm_resume_end(PMSG_RECOVER);
pm_restore_gfp_mask();
- resume_console();
+ console_resume_all();
pm_restore_console();
return error;
}
@@ -586,7 +587,7 @@ int hibernation_platform_enter(void)
goto Close;
entering_platform_hibernation = true;
- suspend_console();
+ console_suspend_all();
error = dpm_suspend_start(PMSG_HIBERNATE);
if (error) {
if (hibernation_ops->recover)
@@ -639,7 +640,7 @@ int hibernation_platform_enter(void)
Resume_devices:
entering_platform_hibernation = false;
dpm_resume_end(PMSG_RESTORE);
- resume_console();
+ console_resume_all();
Close:
hibernation_ops->end();
@@ -757,7 +758,7 @@ int hibernate(void)
*/
if (!nocompress) {
strscpy(hib_comp_algo, hibernate_compressor, sizeof(hib_comp_algo));
- if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) {
+ if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) {
pr_err("%s compression is not available\n", hib_comp_algo);
return -EOPNOTSUPP;
}
@@ -901,7 +902,7 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
if (error)
goto dpm_complete;
- suspend_console();
+ console_suspend_all();
error = dpm_suspend(PMSG_FREEZE);
if (error)
@@ -925,7 +926,7 @@ skip:
dpm_resume:
dpm_resume(PMSG_THAW);
- resume_console();
+ console_resume_all();
dpm_complete:
dpm_complete(PMSG_THAW);
@@ -1008,7 +1009,7 @@ static int software_resume(void)
strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4, sizeof(hib_comp_algo));
else
strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO, sizeof(hib_comp_algo));
- if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) {
+ if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) {
pr_err("%s compression is not available\n", hib_comp_algo);
error = -EOPNOTSUPP;
goto Unlock;
@@ -1446,10 +1447,10 @@ static const char * const comp_alg_enabled[] = {
static int hibernate_compressor_param_set(const char *compressor,
const struct kernel_param *kp)
{
- unsigned int sleep_flags;
int index, ret;
- sleep_flags = lock_system_sleep();
+ if (!mutex_trylock(&system_transition_mutex))
+ return -EBUSY;
index = sysfs_match_string(comp_alg_enabled, compressor);
if (index >= 0) {
@@ -1461,7 +1462,7 @@ static int hibernate_compressor_param_set(const char *compressor,
ret = index;
}
- unlock_system_sleep(sleep_flags);
+ mutex_unlock(&system_transition_mutex);
if (ret)
pr_debug("Cannot set specified compressor %s\n",
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index c9fb559a6399..4e6e24e8b854 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -2270,9 +2270,9 @@ int snapshot_read_next(struct snapshot_handle *handle)
*/
void *kaddr;
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
copy_page(buffer, kaddr);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
handle->buffer = buffer;
} else {
handle->buffer = page_address(page);
@@ -2561,9 +2561,9 @@ static void copy_last_highmem_page(void)
if (last_highmem_page) {
void *dst;
- dst = kmap_atomic(last_highmem_page);
+ dst = kmap_local_page(last_highmem_page);
copy_page(dst, buffer);
- kunmap_atomic(dst);
+ kunmap_local(dst);
last_highmem_page = NULL;
}
}
@@ -2881,13 +2881,13 @@ static inline void swap_two_pages_data(struct page *p1, struct page *p2,
{
void *kaddr1, *kaddr2;
- kaddr1 = kmap_atomic(p1);
- kaddr2 = kmap_atomic(p2);
+ kaddr1 = kmap_local_page(p1);
+ kaddr2 = kmap_local_page(p2);
copy_page(buf, kaddr1);
copy_page(kaddr1, kaddr2);
copy_page(kaddr2, buf);
- kunmap_atomic(kaddr2);
- kunmap_atomic(kaddr1);
+ kunmap_local(kaddr2);
+ kunmap_local(kaddr1);
}
/**
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 09f8397bae15..8eaec4ab121d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -91,6 +91,16 @@ static void s2idle_enter(void)
{
trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true);
+ /*
+ * The correctness of the code below depends on the number of online
+ * CPUs being stable, but CPUs cannot be taken offline or put online
+ * while it is running.
+ *
+ * The s2idle_lock must be acquired before the pending wakeup check to
+ * prevent pm_system_wakeup() from running as a whole between that check
+ * and the subsequent s2idle_state update in which case a wakeup event
+ * would get lost.
+ */
raw_spin_lock_irq(&s2idle_lock);
if (pm_wakeup_pending())
goto out;
@@ -98,8 +108,6 @@ static void s2idle_enter(void)
s2idle_state = S2IDLE_STATE_ENTER;
raw_spin_unlock_irq(&s2idle_lock);
- cpus_read_lock();
-
/* Push all the CPUs into the idle loop. */
wake_up_all_idle_cpus();
/* Make the current CPU wait so it can enter the idle loop too. */
@@ -112,8 +120,6 @@ static void s2idle_enter(void)
*/
wake_up_all_idle_cpus();
- cpus_read_unlock();
-
raw_spin_lock_irq(&s2idle_lock);
out:
@@ -502,7 +508,7 @@ int suspend_devices_and_enter(suspend_state_t state)
if (error)
goto Close;
- suspend_console();
+ console_suspend_all();
suspend_test_start();
error = dpm_suspend_start(PMSG_SUSPEND);
if (error) {
@@ -521,9 +527,9 @@ int suspend_devices_and_enter(suspend_state_t state)
suspend_test_start();
dpm_resume_end(PMSG_RESUME);
suspend_test_finish("resume devices");
- trace_suspend_resume(TPS("resume_console"), state, true);
- resume_console();
- trace_suspend_resume(TPS("resume_console"), state, false);
+ trace_suspend_resume(TPS("console_resume_all"), state, true);
+ console_resume_all();
+ trace_suspend_resume(TPS("console_resume_all"), state, false);
Close:
platform_resume_end(state);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 82b884b67152..80ff5f933a62 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) "PM: " fmt
+#include <crypto/acompress.h>
#include <linux/module.h>
#include <linux/file.h>
#include <linux/delay.h>
@@ -635,7 +636,8 @@ static int crc32_threadfn(void *data)
*/
struct cmp_data {
struct task_struct *thr; /* thread */
- struct crypto_comp *cc; /* crypto compressor stream */
+ struct crypto_acomp *cc; /* crypto compressor */
+ struct acomp_req *cr; /* crypto request */
atomic_t ready; /* ready to start flag */
atomic_t stop; /* ready to stop flag */
int ret; /* return code */
@@ -656,7 +658,6 @@ static atomic_t compressed_size = ATOMIC_INIT(0);
static int compress_threadfn(void *data)
{
struct cmp_data *d = data;
- unsigned int cmp_len = 0;
while (1) {
wait_event(d->go, atomic_read_acquire(&d->ready) ||
@@ -670,11 +671,13 @@ static int compress_threadfn(void *data)
}
atomic_set(&d->ready, 0);
- cmp_len = CMP_SIZE - CMP_HEADER;
- d->ret = crypto_comp_compress(d->cc, d->unc, d->unc_len,
- d->cmp + CMP_HEADER,
- &cmp_len);
- d->cmp_len = cmp_len;
+ acomp_request_set_callback(d->cr, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+ acomp_request_set_src_nondma(d->cr, d->unc, d->unc_len);
+ acomp_request_set_dst_nondma(d->cr, d->cmp + CMP_HEADER,
+ CMP_SIZE - CMP_HEADER);
+ d->ret = crypto_acomp_compress(d->cr);
+ d->cmp_len = d->cr->dlen;
atomic_set(&compressed_size, atomic_read(&compressed_size) + d->cmp_len);
atomic_set_release(&d->stop, 1);
@@ -745,13 +748,20 @@ static int save_compressed_image(struct swap_map_handle *handle,
init_waitqueue_head(&data[thr].go);
init_waitqueue_head(&data[thr].done);
- data[thr].cc = crypto_alloc_comp(hib_comp_algo, 0, 0);
+ data[thr].cc = crypto_alloc_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR_OR_NULL(data[thr].cc)) {
pr_err("Could not allocate comp stream %ld\n", PTR_ERR(data[thr].cc));
ret = -EFAULT;
goto out_clean;
}
+ data[thr].cr = acomp_request_alloc(data[thr].cc);
+ if (!data[thr].cr) {
+ pr_err("Could not allocate comp request\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+
data[thr].thr = kthread_run(compress_threadfn,
&data[thr],
"image_compress/%u", thr);
@@ -899,8 +909,8 @@ out_clean:
for (thr = 0; thr < nr_threads; thr++) {
if (data[thr].thr)
kthread_stop(data[thr].thr);
- if (data[thr].cc)
- crypto_free_comp(data[thr].cc);
+ acomp_request_free(data[thr].cr);
+ crypto_free_acomp(data[thr].cc);
}
vfree(data);
}
@@ -1142,7 +1152,8 @@ static int load_image(struct swap_map_handle *handle,
*/
struct dec_data {
struct task_struct *thr; /* thread */
- struct crypto_comp *cc; /* crypto compressor stream */
+ struct crypto_acomp *cc; /* crypto compressor */
+ struct acomp_req *cr; /* crypto request */
atomic_t ready; /* ready to start flag */
atomic_t stop; /* ready to stop flag */
int ret; /* return code */
@@ -1160,7 +1171,6 @@ struct dec_data {
static int decompress_threadfn(void *data)
{
struct dec_data *d = data;
- unsigned int unc_len = 0;
while (1) {
wait_event(d->go, atomic_read_acquire(&d->ready) ||
@@ -1174,10 +1184,13 @@ static int decompress_threadfn(void *data)
}
atomic_set(&d->ready, 0);
- unc_len = UNC_SIZE;
- d->ret = crypto_comp_decompress(d->cc, d->cmp + CMP_HEADER, d->cmp_len,
- d->unc, &unc_len);
- d->unc_len = unc_len;
+ acomp_request_set_callback(d->cr, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+ acomp_request_set_src_nondma(d->cr, d->cmp + CMP_HEADER,
+ d->cmp_len);
+ acomp_request_set_dst_nondma(d->cr, d->unc, UNC_SIZE);
+ d->ret = crypto_acomp_decompress(d->cr);
+ d->unc_len = d->cr->dlen;
if (clean_pages_on_decompress)
flush_icache_range((unsigned long)d->unc,
@@ -1254,13 +1267,20 @@ static int load_compressed_image(struct swap_map_handle *handle,
init_waitqueue_head(&data[thr].go);
init_waitqueue_head(&data[thr].done);
- data[thr].cc = crypto_alloc_comp(hib_comp_algo, 0, 0);
+ data[thr].cc = crypto_alloc_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR_OR_NULL(data[thr].cc)) {
pr_err("Could not allocate comp stream %ld\n", PTR_ERR(data[thr].cc));
ret = -EFAULT;
goto out_clean;
}
+ data[thr].cr = acomp_request_alloc(data[thr].cc);
+ if (!data[thr].cr) {
+ pr_err("Could not allocate comp request\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+
data[thr].thr = kthread_run(decompress_threadfn,
&data[thr],
"image_decompress/%u", thr);
@@ -1507,8 +1527,8 @@ out_clean:
for (thr = 0; thr < nr_threads; thr++) {
if (data[thr].thr)
kthread_stop(data[thr].thr);
- if (data[thr].cc)
- crypto_free_comp(data[thr].cc);
+ acomp_request_free(data[thr].cr);
+ crypto_free_acomp(data[thr].cc);
}
vfree(data);
}
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index a91bdf802967..48a24e7b309d 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -64,6 +64,7 @@ struct dev_printk_info;
extern struct printk_ringbuffer *prb;
extern bool printk_kthreads_running;
+extern bool debug_non_panic_cpus;
__printf(4, 0)
int vprintk_store(int facility, int level,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 057db78876cd..1eea80d0648e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2375,6 +2375,22 @@ void printk_legacy_allow_panic_sync(void)
}
}
+bool __read_mostly debug_non_panic_cpus;
+
+#ifdef CONFIG_PRINTK_CALLER
+static int __init debug_non_panic_cpus_setup(char *str)
+{
+ debug_non_panic_cpus = true;
+ pr_info("allow messages from non-panic CPUs in panic()\n");
+
+ return 0;
+}
+early_param("debug_non_panic_cpus", debug_non_panic_cpus_setup);
+module_param(debug_non_panic_cpus, bool, 0644);
+MODULE_PARM_DESC(debug_non_panic_cpus,
+ "allow messages from non-panic CPUs in panic()");
+#endif
+
asmlinkage int vprintk_emit(int facility, int level,
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
@@ -2391,7 +2407,9 @@ asmlinkage int vprintk_emit(int facility, int level,
* non-panic CPUs are generating any messages, they will be
* silently dropped.
*/
- if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace)
+ if (other_cpu_in_panic() &&
+ !debug_non_panic_cpus &&
+ !panic_triggering_all_cpu_backtrace)
return 0;
printk_get_console_flush_type(&ft);
@@ -2731,11 +2749,11 @@ module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool
MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc");
/**
- * suspend_console - suspend the console subsystem
+ * console_suspend_all - suspend the console subsystem
*
* This disables printk() while we go into suspend states
*/
-void suspend_console(void)
+void console_suspend_all(void)
{
struct console *con;
@@ -2758,7 +2776,7 @@ void suspend_console(void)
synchronize_srcu(&console_srcu);
}
-void resume_console(void)
+void console_resume_all(void)
{
struct console_flush_type ft;
struct console *con;
@@ -3340,7 +3358,12 @@ void console_unblank(void)
*/
cookie = console_srcu_read_lock();
for_each_console_srcu(c) {
- if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) {
+ short flags = console_srcu_read_flags(c);
+
+ if (flags & CON_SUSPENDED)
+ continue;
+
+ if ((flags & CON_ENABLED) && c->unblank) {
found_unblank = true;
break;
}
@@ -3377,7 +3400,12 @@ void console_unblank(void)
cookie = console_srcu_read_lock();
for_each_console_srcu(c) {
- if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank)
+ short flags = console_srcu_read_flags(c);
+
+ if (flags & CON_SUSPENDED)
+ continue;
+
+ if ((flags & CON_ENABLED) && c->unblank)
c->unblank();
}
console_srcu_read_unlock(cookie);
@@ -3495,10 +3523,10 @@ struct tty_driver *console_device(int *index)
/*
* Prevent further output on the passed console device so that (for example)
- * serial drivers can disable console output before suspending a port, and can
+ * serial drivers can suspend console output before suspending a port, and can
* re-enable output afterwards.
*/
-void console_stop(struct console *console)
+void console_suspend(struct console *console)
{
__pr_flush(console, 1000, true);
console_list_lock();
@@ -3513,9 +3541,9 @@ void console_stop(struct console *console)
*/
synchronize_srcu(&console_srcu);
}
-EXPORT_SYMBOL(console_stop);
+EXPORT_SYMBOL(console_suspend);
-void console_start(struct console *console)
+void console_resume(struct console *console)
{
struct console_flush_type ft;
bool is_nbcon;
@@ -3540,7 +3568,7 @@ void console_start(struct console *console)
__pr_flush(console, 1000, true);
}
-EXPORT_SYMBOL(console_start);
+EXPORT_SYMBOL(console_resume);
#ifdef CONFIG_PRINTK
static int unregister_console_locked(struct console *console);
@@ -4275,6 +4303,11 @@ void __init console_init(void)
initcall_t call;
initcall_entry_t *ce;
+#ifdef CONFIG_NULL_TTY_DEFAULT_CONSOLE
+ if (!console_set_on_cmdline)
+ add_preferred_console("ttynull", 0, NULL);
+#endif
+
/* Setup the default TTY line discipline. */
n_tty_init();
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 88e8f3a61922..d9fb053cff67 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -2133,9 +2133,9 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
* there may be other finalized records beyond that
* need to be printed for a panic situation. If this
* is the panic CPU, skip this
- * non-existent/non-finalized record unless it is
- * at or beyond the head, in which case it is not
- * possible to continue.
+ * non-existent/non-finalized record unless non-panic
+ * CPUs are still running and their debugging is
+ * explicitly enabled.
*
* Note that new messages printed on panic CPU are
* finalized when we are here. The only exception
@@ -2143,10 +2143,13 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
* But it would have the sequence number returned
* by "prb_next_reserve_seq() - 1".
*/
- if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb)))
+ if (this_cpu_in_panic() &&
+ (!debug_non_panic_cpus || legacy_allow_panic_sync) &&
+ ((*seq + 1) < prb_next_reserve_seq(rb))) {
(*seq)++;
- else
+ } else {
return false;
+ }
}
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 87540217fc09..cfaca3040b2f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -488,6 +488,16 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
#endif /* CONFIG_SCHED_CORE */
+/* need a wrapper since we may need to trace from modules */
+EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);
+
+/* Call via the helper macro trace_set_current_state. */
+void __trace_set_current_state(int state_value)
+{
+ trace_sched_set_state_tp(current, state_value);
+}
+EXPORT_SYMBOL(__trace_set_current_state);
+
/*
* Serialization rules:
*
@@ -5295,6 +5305,12 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
*/
finish_task_switch(prev);
+ /*
+ * This is a special case: the newly created task has just
+ * switched the context for the first time. It is returning from
+ * schedule for the first time in this path.
+ */
+ trace_sched_exit_tp(true, CALLER_ADDR0);
preempt_enable();
if (current->set_child_tid)
@@ -6634,12 +6650,15 @@ static void __sched notrace __schedule(int sched_mode)
* as a preemption by schedule_debug() and RCU.
*/
bool preempt = sched_mode > SM_NONE;
+ bool is_switch = false;
unsigned long *switch_count;
unsigned long prev_state;
struct rq_flags rf;
struct rq *rq;
int cpu;
+ trace_sched_entry_tp(preempt, CALLER_ADDR0);
+
cpu = smp_processor_id();
rq = cpu_rq(cpu);
prev = rq->curr;
@@ -6705,7 +6724,8 @@ picked:
clear_preempt_need_resched();
rq->last_seen_need_resched_ns = 0;
- if (likely(prev != next)) {
+ is_switch = prev != next;
+ if (likely(is_switch)) {
rq->nr_switches++;
/*
* RCU users of rcu_dereference(rq->curr) may not see
@@ -6750,6 +6770,7 @@ picked:
__balance_callbacks(rq);
raw_spin_rq_unlock_irq(rq);
}
+ trace_sched_exit_tp(is_switch, CALLER_ADDR0);
}
void __noreturn do_task_dead(void)
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index bb7d066a7c39..a297790b7333 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -206,7 +206,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
continue;
}
- arch_static_call_transform(site_addr, NULL, func,
+ arch_static_call_transform(site_addr, tramp, func,
static_call_is_tail(site));
}
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4ebe6136b08d..3b7a7308e35b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -20,9 +20,6 @@
*/
#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/bitmap.h>
#include <linux/signal.h>
@@ -31,7 +28,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/ctype.h>
-#include <linux/kmemleak.h>
#include <linux/filter.h>
#include <linux/fs.h>
#include <linux/init.h>
@@ -42,25 +38,20 @@
#include <linux/highuid.h>
#include <linux/writeback.h>
#include <linux/ratelimit.h>
-#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/key.h>
#include <linux/times.h>
#include <linux/limits.h>
-#include <linux/dcache.h>
#include <linux/syscalls.h>
-#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
-#include <linux/oom.h>
#include <linux/kmod.h>
#include <linux/capability.h>
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/mount.h>
-#include <linux/userfaultfd_k.h>
#include <linux/pid.h>
#include "../lib/kstrtox.h"
@@ -122,12 +113,6 @@ enum sysctl_writes_mode {
static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
#endif /* CONFIG_PROC_SYSCTL */
-
-#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
- defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
-int sysctl_legacy_va_layout;
-#endif
-
#endif /* CONFIG_SYSCTL */
/*
@@ -1787,15 +1772,6 @@ static const struct ctl_table kern_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_MAXOLDUID,
},
-#ifdef CONFIG_S390
- {
- .procname = "userprocess_debug",
- .data = &show_unhandled_signals,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
{
.procname = "panic_on_oops",
.data = &panic_on_oops,
@@ -1843,15 +1819,6 @@ static const struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#if defined(CONFIG_S390) && defined(CONFIG_SMP)
- {
- .procname = "spin_retry",
- .data = &spin_retry,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
{
.procname = "ignore-unaligned-usertrap",
@@ -1901,215 +1868,9 @@ static const struct ctl_table kern_table[] = {
#endif
};
-static const struct ctl_table vm_table[] = {
- {
- .procname = "overcommit_memory",
- .data = &sysctl_overcommit_memory,
- .maxlen = sizeof(sysctl_overcommit_memory),
- .mode = 0644,
- .proc_handler = overcommit_policy_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_TWO,
- },
- {
- .procname = "overcommit_ratio",
- .data = &sysctl_overcommit_ratio,
- .maxlen = sizeof(sysctl_overcommit_ratio),
- .mode = 0644,
- .proc_handler = overcommit_ratio_handler,
- },
- {
- .procname = "overcommit_kbytes",
- .data = &sysctl_overcommit_kbytes,
- .maxlen = sizeof(sysctl_overcommit_kbytes),
- .mode = 0644,
- .proc_handler = overcommit_kbytes_handler,
- },
- {
- .procname = "page-cluster",
- .data = &page_cluster,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = (void *)&page_cluster_max,
- },
- {
- .procname = "dirtytime_expire_seconds",
- .data = &dirtytime_expire_interval,
- .maxlen = sizeof(dirtytime_expire_interval),
- .mode = 0644,
- .proc_handler = dirtytime_interval_handler,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "swappiness",
- .data = &vm_swappiness,
- .maxlen = sizeof(vm_swappiness),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_TWO_HUNDRED,
- },
-#ifdef CONFIG_NUMA
- {
- .procname = "numa_stat",
- .data = &sysctl_vm_numa_stat,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = sysctl_vm_numa_stat_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
- {
- .procname = "drop_caches",
- .data = &sysctl_drop_caches,
- .maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = drop_caches_sysctl_handler,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_FOUR,
- },
- {
- .procname = "page_lock_unfairness",
- .data = &sysctl_page_lock_unfairness,
- .maxlen = sizeof(sysctl_page_lock_unfairness),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
-#ifdef CONFIG_MMU
- {
- .procname = "max_map_count",
- .data = &sysctl_max_map_count,
- .maxlen = sizeof(sysctl_max_map_count),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
-#else
- {
- .procname = "nr_trim_pages",
- .data = &sysctl_nr_trim_pages,
- .maxlen = sizeof(sysctl_nr_trim_pages),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
-#endif
- {
- .procname = "vfs_cache_pressure",
- .data = &sysctl_vfs_cache_pressure,
- .maxlen = sizeof(sysctl_vfs_cache_pressure),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
-#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
- defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
- {
- .procname = "legacy_va_layout",
- .data = &sysctl_legacy_va_layout,
- .maxlen = sizeof(sysctl_legacy_va_layout),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
-#endif
-#ifdef CONFIG_NUMA
- {
- .procname = "zone_reclaim_mode",
- .data = &node_reclaim_mode,
- .maxlen = sizeof(node_reclaim_mode),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
-#endif
-#ifdef CONFIG_SMP
- {
- .procname = "stat_interval",
- .data = &sysctl_stat_interval,
- .maxlen = sizeof(sysctl_stat_interval),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "stat_refresh",
- .data = NULL,
- .maxlen = 0,
- .mode = 0600,
- .proc_handler = vmstat_refresh,
- },
-#endif
-#ifdef CONFIG_MMU
- {
- .procname = "mmap_min_addr",
- .data = &dac_mmap_min_addr,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = mmap_min_addr_handler,
- },
-#endif
-#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
- (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
- {
- .procname = "vdso_enabled",
-#ifdef CONFIG_X86_32
- .data = &vdso32_enabled,
- .maxlen = sizeof(vdso32_enabled),
-#else
- .data = &vdso_enabled,
- .maxlen = sizeof(vdso_enabled),
-#endif
- .mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = SYSCTL_ZERO,
- },
-#endif
- {
- .procname = "user_reserve_kbytes",
- .data = &sysctl_user_reserve_kbytes,
- .maxlen = sizeof(sysctl_user_reserve_kbytes),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "admin_reserve_kbytes",
- .data = &sysctl_admin_reserve_kbytes,
- .maxlen = sizeof(sysctl_admin_reserve_kbytes),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- },
-#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
- {
- .procname = "mmap_rnd_bits",
- .data = &mmap_rnd_bits,
- .maxlen = sizeof(mmap_rnd_bits),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&mmap_rnd_bits_min,
- .extra2 = (void *)&mmap_rnd_bits_max,
- },
-#endif
-#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
- {
- .procname = "mmap_rnd_compat_bits",
- .data = &mmap_rnd_compat_bits,
- .maxlen = sizeof(mmap_rnd_compat_bits),
- .mode = 0600,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&mmap_rnd_compat_bits_min,
- .extra2 = (void *)&mmap_rnd_compat_bits_max,
- },
-#endif
-};
-
int __init sysctl_init_bases(void)
{
register_sysctl_init("kernel", kern_table);
- register_sysctl_init("vm", vm_table);
return 0;
}
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 7f4e4fb7381e..101a0f7c43e0 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -109,6 +109,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
goto out;
}
pccontext->clk = clk;
+ pccontext->fp = fp;
if (clk->ops.open) {
err = clk->ops.open(pccontext, fp->f_mode);
if (err) {
@@ -229,7 +230,7 @@ static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx)
if (err)
return err;
- if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
+ if (tx->modes && (cd.fp->f_mode & FMODE_WRITE) == 0) {
err = -EACCES;
goto out;
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d570b8b9c0a9..033fba0633cf 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -263,6 +263,18 @@ config FUNCTION_GRAPH_RETADDR
the function is called. This feature is off by default, and you can
enable it via the trace option funcgraph-retaddr.
+config FUNCTION_TRACE_ARGS
+ bool
+ depends on HAVE_FUNCTION_ARG_ACCESS_API
+ depends on DEBUG_INFO_BTF
+ default y
+ help
+ If supported with function argument access API and BTF, then
+ the function tracer and function graph tracer will support printing
+ of function arguments. This feature is off by default, and can be
+ enabled via the trace option func-args (for the function tracer) and
+ funcgraph-args (for the function graph tracer)
+
config DYNAMIC_FTRACE
bool "enable/disable function tracing dynamically"
depends on FUNCTION_TRACER
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 5dddfc2149f6..8d925cbdce3a 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -865,7 +865,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
}
/*
- * After all architecures have selected HAVE_FUNCTION_GRAPH_FREGS, we can
+ * After all architectures have selected HAVE_FUNCTION_GRAPH_FREGS, we can
* leave only ftrace_return_to_handler(fregs).
*/
#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fc88e0688daf..92015de6203d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1293,6 +1293,8 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
void ftrace_free_filter(struct ftrace_ops *ops)
{
ftrace_ops_init(ops);
+ if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return;
free_ftrace_hash(ops->func_hash->filter_hash);
free_ftrace_hash(ops->func_hash->notrace_hash);
}
@@ -7016,6 +7018,7 @@ static int ftrace_process_locs(struct module *mod,
unsigned long *p;
unsigned long addr;
unsigned long flags = 0; /* Shut up gcc */
+ unsigned long pages;
int ret = -ENOMEM;
count = end - start;
@@ -7023,6 +7026,8 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
+ pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
+
/*
* Sorting mcount in vmlinux at build time depend on
* CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
@@ -7067,7 +7072,9 @@ static int ftrace_process_locs(struct module *mod,
pg = start_pg;
while (p < end) {
unsigned long end_offset;
- addr = ftrace_call_adjust(*p++);
+
+ addr = *p++;
+
/*
* Some architecture linkers will pad between
* the different mcount_loc sections of different
@@ -7079,6 +7086,19 @@ static int ftrace_process_locs(struct module *mod,
continue;
}
+ /*
+ * If this is core kernel, make sure the address is in core
+ * or inittext, as weak functions get zeroed and KASLR can
+ * move them to something other than zero. It just will not
+ * move it to an area where kernel text is.
+ */
+ if (!mod && !(is_kernel_text(addr) || is_kernel_inittext(addr))) {
+ skipped++;
+ continue;
+ }
+
+ addr = ftrace_call_adjust(addr);
+
end_offset = (pg->index+1) * sizeof(pg->records[0]);
if (end_offset > PAGE_SIZE << pg->order) {
/* We should have allocated enough */
@@ -7118,11 +7138,41 @@ static int ftrace_process_locs(struct module *mod,
/* We should have used all pages unless we skipped some */
if (pg_unuse) {
- WARN_ON(!skipped);
+ unsigned long pg_remaining, remaining = 0;
+ unsigned long skip;
+
+ /* Count the number of entries unused and compare it to skipped. */
+ pg_remaining = (ENTRIES_PER_PAGE << pg->order) - pg->index;
+
+ if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) {
+
+ skip = skipped - pg_remaining;
+
+ for (pg = pg_unuse; pg; pg = pg->next)
+ remaining += 1 << pg->order;
+
+ pages -= remaining;
+
+ skip = DIV_ROUND_UP(skip, ENTRIES_PER_PAGE);
+
+ /*
+ * Check to see if the number of pages remaining would
+ * just fit the number of entries skipped.
+ */
+ WARN(skip != remaining, "Extra allocated pages for ftrace: %lu with %lu skipped",
+ remaining, skipped);
+ }
/* Need to synchronize with ftrace_location_range() */
synchronize_rcu();
ftrace_free_pages(pg_unuse);
}
+
+ if (!mod) {
+ count -= skipped;
+ pr_info("ftrace: allocating %ld entries in %ld pages\n",
+ count, pages);
+ }
+
return ret;
}
@@ -7768,9 +7818,6 @@ void __init ftrace_init(void)
goto failed;
}
- pr_info("ftrace: allocating %ld entries in %ld pages\n",
- count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
-
ret = ftrace_process_locs(NULL,
__start_mcount_loc,
__stop_mcount_loc);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bb6089c2951e..9d4d951090d3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5318,7 +5318,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
* moving it. The page before the header page has the
* flag bit '1' set if it is pointing to the page we want.
* but if the writer is in the process of moving it
- * than it will be '2' or already moved '0'.
+ * then it will be '2' or already moved '0'.
*/
ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 8226352a0062..b39f36013ef2 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -27,6 +27,13 @@ menuconfig RV
source "kernel/trace/rv/monitors/wip/Kconfig"
source "kernel/trace/rv/monitors/wwnr/Kconfig"
+source "kernel/trace/rv/monitors/sched/Kconfig"
+source "kernel/trace/rv/monitors/tss/Kconfig"
+source "kernel/trace/rv/monitors/sco/Kconfig"
+source "kernel/trace/rv/monitors/snroc/Kconfig"
+source "kernel/trace/rv/monitors/scpd/Kconfig"
+source "kernel/trace/rv/monitors/snep/Kconfig"
+source "kernel/trace/rv/monitors/sncid/Kconfig"
# Add new monitors here
config RV_REACTORS
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index 188b64668e1f..f9b2cd0483c3 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -5,6 +5,13 @@ ccflags-y += -I $(src) # needed for trace events
obj-$(CONFIG_RV) += rv.o
obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o
obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o
+obj-$(CONFIG_RV_MON_SCHED) += monitors/sched/sched.o
+obj-$(CONFIG_RV_MON_TSS) += monitors/tss/tss.o
+obj-$(CONFIG_RV_MON_SCO) += monitors/sco/sco.o
+obj-$(CONFIG_RV_MON_SNROC) += monitors/snroc/snroc.o
+obj-$(CONFIG_RV_MON_SCPD) += monitors/scpd/scpd.o
+obj-$(CONFIG_RV_MON_SNEP) += monitors/snep/snep.o
+obj-$(CONFIG_RV_MON_SNCID) += monitors/sncid/sncid.o
# Add new monitors here
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
diff --git a/kernel/trace/rv/monitors/sched/Kconfig b/kernel/trace/rv/monitors/sched/Kconfig
new file mode 100644
index 000000000000..ae3eb410abd7
--- /dev/null
+++ b/kernel/trace/rv/monitors/sched/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SCHED
+ depends on RV
+ bool "sched monitor"
+ help
+ Collection of monitors to check the scheduler behaves according to specifications.
+ Enable this to enable all scheduler specification supported by the current kernel.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/sched/sched.c b/kernel/trace/rv/monitors/sched/sched.c
new file mode 100644
index 000000000000..905e03c3c934
--- /dev/null
+++ b/kernel/trace/rv/monitors/sched/sched.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+
+#define MODULE_NAME "sched"
+
+#include "sched.h"
+
+struct rv_monitor rv_sched;
+
+struct rv_monitor rv_sched = {
+ .name = "sched",
+ .description = "container for several scheduler monitor specifications.",
+ .enable = NULL,
+ .disable = NULL,
+ .reset = NULL,
+ .enabled = 0,
+};
+
+static int __init register_sched(void)
+{
+ rv_register_monitor(&rv_sched, NULL);
+ return 0;
+}
+
+static void __exit unregister_sched(void)
+{
+ rv_unregister_monitor(&rv_sched);
+}
+
+module_init(register_sched);
+module_exit(unregister_sched);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("sched: container for several scheduler monitor specifications.");
diff --git a/kernel/trace/rv/monitors/sched/sched.h b/kernel/trace/rv/monitors/sched/sched.h
new file mode 100644
index 000000000000..ba148dd8d48b
--- /dev/null
+++ b/kernel/trace/rv/monitors/sched/sched.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+extern struct rv_monitor rv_sched;
diff --git a/kernel/trace/rv/monitors/sco/Kconfig b/kernel/trace/rv/monitors/sco/Kconfig
new file mode 100644
index 000000000000..097c96cccdd7
--- /dev/null
+++ b/kernel/trace/rv/monitors/sco/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SCO
+ depends on RV
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_IMPLICIT
+ bool "sco monitor"
+ help
+ Monitor to ensure sched_set_state happens only in thread context.
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/sco/sco.c b/kernel/trace/rv/monitors/sco/sco.c
new file mode 100644
index 000000000000..4cff59220bfc
--- /dev/null
+++ b/kernel/trace/rv/monitors/sco/sco.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "sco"
+
+#include <trace/events/sched.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "sco.h"
+
+static struct rv_monitor rv_sco;
+DECLARE_DA_MON_PER_CPU(sco, unsigned char);
+
+static void handle_sched_set_state(void *data, struct task_struct *tsk, int state)
+{
+ da_handle_start_event_sco(sched_set_state_sco);
+}
+
+static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+{
+ da_handle_event_sco(schedule_entry_sco);
+}
+
+static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+{
+ da_handle_start_event_sco(schedule_exit_sco);
+}
+
+static int enable_sco(void)
+{
+ int retval;
+
+ retval = da_monitor_init_sco();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("sco", sched_set_state_tp, handle_sched_set_state);
+ rv_attach_trace_probe("sco", sched_entry_tp, handle_schedule_entry);
+ rv_attach_trace_probe("sco", sched_exit_tp, handle_schedule_exit);
+
+ return 0;
+}
+
+static void disable_sco(void)
+{
+ rv_sco.enabled = 0;
+
+ rv_detach_trace_probe("sco", sched_set_state_tp, handle_sched_set_state);
+ rv_detach_trace_probe("sco", sched_entry_tp, handle_schedule_entry);
+ rv_detach_trace_probe("sco", sched_exit_tp, handle_schedule_exit);
+
+ da_monitor_destroy_sco();
+}
+
+static struct rv_monitor rv_sco = {
+ .name = "sco",
+ .description = "scheduling context operations.",
+ .enable = enable_sco,
+ .disable = disable_sco,
+ .reset = da_monitor_reset_all_sco,
+ .enabled = 0,
+};
+
+static int __init register_sco(void)
+{
+ rv_register_monitor(&rv_sco, &rv_sched);
+ return 0;
+}
+
+static void __exit unregister_sco(void)
+{
+ rv_unregister_monitor(&rv_sco);
+}
+
+module_init(register_sco);
+module_exit(unregister_sco);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("sco: scheduling context operations.");
diff --git a/kernel/trace/rv/monitors/sco/sco.h b/kernel/trace/rv/monitors/sco/sco.h
new file mode 100644
index 000000000000..7a4c1f2d5ca1
--- /dev/null
+++ b/kernel/trace/rv/monitors/sco/sco.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of sco automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_sco {
+ thread_context_sco = 0,
+ scheduling_context_sco,
+ state_max_sco
+};
+
+#define INVALID_STATE state_max_sco
+
+enum events_sco {
+ sched_set_state_sco = 0,
+ schedule_entry_sco,
+ schedule_exit_sco,
+ event_max_sco
+};
+
+struct automaton_sco {
+ char *state_names[state_max_sco];
+ char *event_names[event_max_sco];
+ unsigned char function[state_max_sco][event_max_sco];
+ unsigned char initial_state;
+ bool final_states[state_max_sco];
+};
+
+static const struct automaton_sco automaton_sco = {
+ .state_names = {
+ "thread_context",
+ "scheduling_context"
+ },
+ .event_names = {
+ "sched_set_state",
+ "schedule_entry",
+ "schedule_exit"
+ },
+ .function = {
+ { thread_context_sco, scheduling_context_sco, INVALID_STATE },
+ { INVALID_STATE, INVALID_STATE, thread_context_sco },
+ },
+ .initial_state = thread_context_sco,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/sco/sco_trace.h b/kernel/trace/rv/monitors/sco/sco_trace.h
new file mode 100644
index 000000000000..b711cd9024ec
--- /dev/null
+++ b/kernel/trace/rv/monitors/sco/sco_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SCO
+DEFINE_EVENT(event_da_monitor, event_sco,
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor, error_sco,
+ TP_PROTO(char *state, char *event),
+ TP_ARGS(state, event));
+#endif /* CONFIG_RV_MON_SCO */
diff --git a/kernel/trace/rv/monitors/scpd/Kconfig b/kernel/trace/rv/monitors/scpd/Kconfig
new file mode 100644
index 000000000000..b9114fbf680f
--- /dev/null
+++ b/kernel/trace/rv/monitors/scpd/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SCPD
+ depends on RV
+ depends on PREEMPT_TRACER
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_IMPLICIT
+ bool "scpd monitor"
+ help
+ Monitor to ensure schedule is called with preemption disabled.
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/scpd/scpd.c b/kernel/trace/rv/monitors/scpd/scpd.c
new file mode 100644
index 000000000000..cbdd6a5f8d7f
--- /dev/null
+++ b/kernel/trace/rv/monitors/scpd/scpd.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "scpd"
+
+#include <trace/events/sched.h>
+#include <trace/events/preemptirq.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "scpd.h"
+
+static struct rv_monitor rv_scpd;
+DECLARE_DA_MON_PER_CPU(scpd, unsigned char);
+
+static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_scpd(preempt_disable_scpd);
+}
+
+static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_start_event_scpd(preempt_enable_scpd);
+}
+
+static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+{
+ da_handle_event_scpd(schedule_entry_scpd);
+}
+
+static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+{
+ da_handle_event_scpd(schedule_exit_scpd);
+}
+
+static int enable_scpd(void)
+{
+ int retval;
+
+ retval = da_monitor_init_scpd();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("scpd", preempt_disable, handle_preempt_disable);
+ rv_attach_trace_probe("scpd", preempt_enable, handle_preempt_enable);
+ rv_attach_trace_probe("scpd", sched_entry_tp, handle_schedule_entry);
+ rv_attach_trace_probe("scpd", sched_exit_tp, handle_schedule_exit);
+
+ return 0;
+}
+
+static void disable_scpd(void)
+{
+ rv_scpd.enabled = 0;
+
+ rv_detach_trace_probe("scpd", preempt_disable, handle_preempt_disable);
+ rv_detach_trace_probe("scpd", preempt_enable, handle_preempt_enable);
+ rv_detach_trace_probe("scpd", sched_entry_tp, handle_schedule_entry);
+ rv_detach_trace_probe("scpd", sched_exit_tp, handle_schedule_exit);
+
+ da_monitor_destroy_scpd();
+}
+
+static struct rv_monitor rv_scpd = {
+ .name = "scpd",
+ .description = "schedule called with preemption disabled.",
+ .enable = enable_scpd,
+ .disable = disable_scpd,
+ .reset = da_monitor_reset_all_scpd,
+ .enabled = 0,
+};
+
+static int __init register_scpd(void)
+{
+ rv_register_monitor(&rv_scpd, &rv_sched);
+ return 0;
+}
+
+static void __exit unregister_scpd(void)
+{
+ rv_unregister_monitor(&rv_scpd);
+}
+
+module_init(register_scpd);
+module_exit(unregister_scpd);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("scpd: schedule called with preemption disabled.");
diff --git a/kernel/trace/rv/monitors/scpd/scpd.h b/kernel/trace/rv/monitors/scpd/scpd.h
new file mode 100644
index 000000000000..295f735a5811
--- /dev/null
+++ b/kernel/trace/rv/monitors/scpd/scpd.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of scpd automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_scpd {
+ cant_sched_scpd = 0,
+ can_sched_scpd,
+ state_max_scpd
+};
+
+#define INVALID_STATE state_max_scpd
+
+enum events_scpd {
+ preempt_disable_scpd = 0,
+ preempt_enable_scpd,
+ schedule_entry_scpd,
+ schedule_exit_scpd,
+ event_max_scpd
+};
+
+struct automaton_scpd {
+ char *state_names[state_max_scpd];
+ char *event_names[event_max_scpd];
+ unsigned char function[state_max_scpd][event_max_scpd];
+ unsigned char initial_state;
+ bool final_states[state_max_scpd];
+};
+
+static const struct automaton_scpd automaton_scpd = {
+ .state_names = {
+ "cant_sched",
+ "can_sched"
+ },
+ .event_names = {
+ "preempt_disable",
+ "preempt_enable",
+ "schedule_entry",
+ "schedule_exit"
+ },
+ .function = {
+ { can_sched_scpd, INVALID_STATE, INVALID_STATE, INVALID_STATE },
+ { INVALID_STATE, cant_sched_scpd, can_sched_scpd, can_sched_scpd },
+ },
+ .initial_state = cant_sched_scpd,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/scpd/scpd_trace.h b/kernel/trace/rv/monitors/scpd/scpd_trace.h
new file mode 100644
index 000000000000..6b0f4aa4732e
--- /dev/null
+++ b/kernel/trace/rv/monitors/scpd/scpd_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SCPD
+DEFINE_EVENT(event_da_monitor, event_scpd,
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor, error_scpd,
+ TP_PROTO(char *state, char *event),
+ TP_ARGS(state, event));
+#endif /* CONFIG_RV_MON_SCPD */
diff --git a/kernel/trace/rv/monitors/sncid/Kconfig b/kernel/trace/rv/monitors/sncid/Kconfig
new file mode 100644
index 000000000000..76bcfef4fd10
--- /dev/null
+++ b/kernel/trace/rv/monitors/sncid/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SNCID
+ depends on RV
+ depends on IRQSOFF_TRACER
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_IMPLICIT
+ bool "sncid monitor"
+ help
+ Monitor to ensure schedule is not called with interrupt disabled.
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/sncid/sncid.c b/kernel/trace/rv/monitors/sncid/sncid.c
new file mode 100644
index 000000000000..f5037cd6214c
--- /dev/null
+++ b/kernel/trace/rv/monitors/sncid/sncid.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "sncid"
+
+#include <trace/events/sched.h>
+#include <trace/events/preemptirq.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "sncid.h"
+
+static struct rv_monitor rv_sncid;
+DECLARE_DA_MON_PER_CPU(sncid, unsigned char);
+
+static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_sncid(irq_disable_sncid);
+}
+
+static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_start_event_sncid(irq_enable_sncid);
+}
+
+static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+{
+ da_handle_start_event_sncid(schedule_entry_sncid);
+}
+
+static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+{
+ da_handle_start_event_sncid(schedule_exit_sncid);
+}
+
+static int enable_sncid(void)
+{
+ int retval;
+
+ retval = da_monitor_init_sncid();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("sncid", irq_disable, handle_irq_disable);
+ rv_attach_trace_probe("sncid", irq_enable, handle_irq_enable);
+ rv_attach_trace_probe("sncid", sched_entry_tp, handle_schedule_entry);
+ rv_attach_trace_probe("sncid", sched_exit_tp, handle_schedule_exit);
+
+ return 0;
+}
+
+static void disable_sncid(void)
+{
+ rv_sncid.enabled = 0;
+
+ rv_detach_trace_probe("sncid", irq_disable, handle_irq_disable);
+ rv_detach_trace_probe("sncid", irq_enable, handle_irq_enable);
+ rv_detach_trace_probe("sncid", sched_entry_tp, handle_schedule_entry);
+ rv_detach_trace_probe("sncid", sched_exit_tp, handle_schedule_exit);
+
+ da_monitor_destroy_sncid();
+}
+
+static struct rv_monitor rv_sncid = {
+ .name = "sncid",
+ .description = "schedule not called with interrupt disabled.",
+ .enable = enable_sncid,
+ .disable = disable_sncid,
+ .reset = da_monitor_reset_all_sncid,
+ .enabled = 0,
+};
+
+static int __init register_sncid(void)
+{
+ rv_register_monitor(&rv_sncid, &rv_sched);
+ return 0;
+}
+
+static void __exit unregister_sncid(void)
+{
+ rv_unregister_monitor(&rv_sncid);
+}
+
+module_init(register_sncid);
+module_exit(unregister_sncid);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("sncid: schedule not called with interrupt disabled.");
diff --git a/kernel/trace/rv/monitors/sncid/sncid.h b/kernel/trace/rv/monitors/sncid/sncid.h
new file mode 100644
index 000000000000..21304725142b
--- /dev/null
+++ b/kernel/trace/rv/monitors/sncid/sncid.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of sncid automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_sncid {
+ can_sched_sncid = 0,
+ cant_sched_sncid,
+ state_max_sncid
+};
+
+#define INVALID_STATE state_max_sncid
+
+enum events_sncid {
+ irq_disable_sncid = 0,
+ irq_enable_sncid,
+ schedule_entry_sncid,
+ schedule_exit_sncid,
+ event_max_sncid
+};
+
+struct automaton_sncid {
+ char *state_names[state_max_sncid];
+ char *event_names[event_max_sncid];
+ unsigned char function[state_max_sncid][event_max_sncid];
+ unsigned char initial_state;
+ bool final_states[state_max_sncid];
+};
+
+static const struct automaton_sncid automaton_sncid = {
+ .state_names = {
+ "can_sched",
+ "cant_sched"
+ },
+ .event_names = {
+ "irq_disable",
+ "irq_enable",
+ "schedule_entry",
+ "schedule_exit"
+ },
+ .function = {
+ { cant_sched_sncid, INVALID_STATE, can_sched_sncid, can_sched_sncid },
+ { INVALID_STATE, can_sched_sncid, INVALID_STATE, INVALID_STATE },
+ },
+ .initial_state = can_sched_sncid,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/sncid/sncid_trace.h b/kernel/trace/rv/monitors/sncid/sncid_trace.h
new file mode 100644
index 000000000000..3ce42a57671d
--- /dev/null
+++ b/kernel/trace/rv/monitors/sncid/sncid_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SNCID
+DEFINE_EVENT(event_da_monitor, event_sncid,
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor, error_sncid,
+ TP_PROTO(char *state, char *event),
+ TP_ARGS(state, event));
+#endif /* CONFIG_RV_MON_SNCID */
diff --git a/kernel/trace/rv/monitors/snep/Kconfig b/kernel/trace/rv/monitors/snep/Kconfig
new file mode 100644
index 000000000000..77527f971232
--- /dev/null
+++ b/kernel/trace/rv/monitors/snep/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SNEP
+ depends on RV
+ depends on PREEMPT_TRACER
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_IMPLICIT
+ bool "snep monitor"
+ help
+ Monitor to ensure schedule does not enable preempt.
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/snep/snep.c b/kernel/trace/rv/monitors/snep/snep.c
new file mode 100644
index 000000000000..0076ba6d7ea4
--- /dev/null
+++ b/kernel/trace/rv/monitors/snep/snep.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "snep"
+
+#include <trace/events/sched.h>
+#include <trace/events/preemptirq.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "snep.h"
+
+static struct rv_monitor rv_snep;
+DECLARE_DA_MON_PER_CPU(snep, unsigned char);
+
+static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_start_event_snep(preempt_disable_snep);
+}
+
+static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_start_event_snep(preempt_enable_snep);
+}
+
+static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+{
+ da_handle_event_snep(schedule_entry_snep);
+}
+
+static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+{
+ da_handle_start_event_snep(schedule_exit_snep);
+}
+
+static int enable_snep(void)
+{
+ int retval;
+
+ retval = da_monitor_init_snep();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("snep", preempt_disable, handle_preempt_disable);
+ rv_attach_trace_probe("snep", preempt_enable, handle_preempt_enable);
+ rv_attach_trace_probe("snep", sched_entry_tp, handle_schedule_entry);
+ rv_attach_trace_probe("snep", sched_exit_tp, handle_schedule_exit);
+
+ return 0;
+}
+
+static void disable_snep(void)
+{
+ rv_snep.enabled = 0;
+
+ rv_detach_trace_probe("snep", preempt_disable, handle_preempt_disable);
+ rv_detach_trace_probe("snep", preempt_enable, handle_preempt_enable);
+ rv_detach_trace_probe("snep", sched_entry_tp, handle_schedule_entry);
+ rv_detach_trace_probe("snep", sched_exit_tp, handle_schedule_exit);
+
+ da_monitor_destroy_snep();
+}
+
+static struct rv_monitor rv_snep = {
+ .name = "snep",
+ .description = "schedule does not enable preempt.",
+ .enable = enable_snep,
+ .disable = disable_snep,
+ .reset = da_monitor_reset_all_snep,
+ .enabled = 0,
+};
+
+static int __init register_snep(void)
+{
+ rv_register_monitor(&rv_snep, &rv_sched);
+ return 0;
+}
+
+static void __exit unregister_snep(void)
+{
+ rv_unregister_monitor(&rv_snep);
+}
+
+module_init(register_snep);
+module_exit(unregister_snep);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("snep: schedule does not enable preempt.");
diff --git a/kernel/trace/rv/monitors/snep/snep.h b/kernel/trace/rv/monitors/snep/snep.h
new file mode 100644
index 000000000000..6d16b9ad931e
--- /dev/null
+++ b/kernel/trace/rv/monitors/snep/snep.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of snep automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_snep {
+ non_scheduling_context_snep = 0,
+ scheduling_contex_snep,
+ state_max_snep
+};
+
+#define INVALID_STATE state_max_snep
+
+enum events_snep {
+ preempt_disable_snep = 0,
+ preempt_enable_snep,
+ schedule_entry_snep,
+ schedule_exit_snep,
+ event_max_snep
+};
+
+struct automaton_snep {
+ char *state_names[state_max_snep];
+ char *event_names[event_max_snep];
+ unsigned char function[state_max_snep][event_max_snep];
+ unsigned char initial_state;
+ bool final_states[state_max_snep];
+};
+
+static const struct automaton_snep automaton_snep = {
+ .state_names = {
+ "non_scheduling_context",
+ "scheduling_contex"
+ },
+ .event_names = {
+ "preempt_disable",
+ "preempt_enable",
+ "schedule_entry",
+ "schedule_exit"
+ },
+ .function = {
+ { non_scheduling_context_snep, non_scheduling_context_snep, scheduling_contex_snep, INVALID_STATE },
+ { INVALID_STATE, INVALID_STATE, INVALID_STATE, non_scheduling_context_snep },
+ },
+ .initial_state = non_scheduling_context_snep,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/snep/snep_trace.h b/kernel/trace/rv/monitors/snep/snep_trace.h
new file mode 100644
index 000000000000..01aad49a949a
--- /dev/null
+++ b/kernel/trace/rv/monitors/snep/snep_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SNEP
+DEFINE_EVENT(event_da_monitor, event_snep,
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor, error_snep,
+ TP_PROTO(char *state, char *event),
+ TP_ARGS(state, event));
+#endif /* CONFIG_RV_MON_SNEP */
diff --git a/kernel/trace/rv/monitors/snroc/Kconfig b/kernel/trace/rv/monitors/snroc/Kconfig
new file mode 100644
index 000000000000..6e4365a2fea3
--- /dev/null
+++ b/kernel/trace/rv/monitors/snroc/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SNROC
+ depends on RV
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_ID
+ bool "snroc monitor"
+ help
+ Monitor to ensure sched_set_state happens only in the respective task's context.
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/snroc/snroc.c b/kernel/trace/rv/monitors/snroc/snroc.c
new file mode 100644
index 000000000000..bb1f60d55296
--- /dev/null
+++ b/kernel/trace/rv/monitors/snroc/snroc.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "snroc"
+
+#include <trace/events/sched.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "snroc.h"
+
+static struct rv_monitor rv_snroc;
+DECLARE_DA_MON_PER_TASK(snroc, unsigned char);
+
+static void handle_sched_set_state(void *data, struct task_struct *tsk, int state)
+{
+ da_handle_event_snroc(tsk, sched_set_state_snroc);
+}
+
+static void handle_sched_switch(void *data, bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
+{
+ da_handle_start_event_snroc(prev, sched_switch_out_snroc);
+ da_handle_event_snroc(next, sched_switch_in_snroc);
+}
+
+static int enable_snroc(void)
+{
+ int retval;
+
+ retval = da_monitor_init_snroc();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("snroc", sched_set_state_tp, handle_sched_set_state);
+ rv_attach_trace_probe("snroc", sched_switch, handle_sched_switch);
+
+ return 0;
+}
+
+static void disable_snroc(void)
+{
+ rv_snroc.enabled = 0;
+
+ rv_detach_trace_probe("snroc", sched_set_state_tp, handle_sched_set_state);
+ rv_detach_trace_probe("snroc", sched_switch, handle_sched_switch);
+
+ da_monitor_destroy_snroc();
+}
+
+static struct rv_monitor rv_snroc = {
+ .name = "snroc",
+ .description = "set non runnable on its own context.",
+ .enable = enable_snroc,
+ .disable = disable_snroc,
+ .reset = da_monitor_reset_all_snroc,
+ .enabled = 0,
+};
+
+static int __init register_snroc(void)
+{
+ rv_register_monitor(&rv_snroc, &rv_sched);
+ return 0;
+}
+
+static void __exit unregister_snroc(void)
+{
+ rv_unregister_monitor(&rv_snroc);
+}
+
+module_init(register_snroc);
+module_exit(unregister_snroc);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("snroc: set non runnable on its own context.");
diff --git a/kernel/trace/rv/monitors/snroc/snroc.h b/kernel/trace/rv/monitors/snroc/snroc.h
new file mode 100644
index 000000000000..c3650a2b1b10
--- /dev/null
+++ b/kernel/trace/rv/monitors/snroc/snroc.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of snroc automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_snroc {
+ other_context_snroc = 0,
+ own_context_snroc,
+ state_max_snroc
+};
+
+#define INVALID_STATE state_max_snroc
+
+enum events_snroc {
+ sched_set_state_snroc = 0,
+ sched_switch_in_snroc,
+ sched_switch_out_snroc,
+ event_max_snroc
+};
+
+struct automaton_snroc {
+ char *state_names[state_max_snroc];
+ char *event_names[event_max_snroc];
+ unsigned char function[state_max_snroc][event_max_snroc];
+ unsigned char initial_state;
+ bool final_states[state_max_snroc];
+};
+
+static const struct automaton_snroc automaton_snroc = {
+ .state_names = {
+ "other_context",
+ "own_context"
+ },
+ .event_names = {
+ "sched_set_state",
+ "sched_switch_in",
+ "sched_switch_out"
+ },
+ .function = {
+ { INVALID_STATE, own_context_snroc, INVALID_STATE },
+ { own_context_snroc, INVALID_STATE, other_context_snroc },
+ },
+ .initial_state = other_context_snroc,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/snroc/snroc_trace.h b/kernel/trace/rv/monitors/snroc/snroc_trace.h
new file mode 100644
index 000000000000..50114cef5122
--- /dev/null
+++ b/kernel/trace/rv/monitors/snroc/snroc_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SNROC
+DEFINE_EVENT(event_da_monitor_id, event_snroc,
+ TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(id, state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor_id, error_snroc,
+ TP_PROTO(int id, char *state, char *event),
+ TP_ARGS(id, state, event));
+#endif /* CONFIG_RV_MON_SNROC */
diff --git a/kernel/trace/rv/monitors/tss/Kconfig b/kernel/trace/rv/monitors/tss/Kconfig
new file mode 100644
index 000000000000..479f86f52e60
--- /dev/null
+++ b/kernel/trace/rv/monitors/tss/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_TSS
+ depends on RV
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_IMPLICIT
+ bool "tss monitor"
+ help
+ Monitor to ensure sched_switch happens only in scheduling context.
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/tss/tss.c b/kernel/trace/rv/monitors/tss/tss.c
new file mode 100644
index 000000000000..542787e6524f
--- /dev/null
+++ b/kernel/trace/rv/monitors/tss/tss.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "tss"
+
+#include <trace/events/sched.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "tss.h"
+
+static struct rv_monitor rv_tss;
+DECLARE_DA_MON_PER_CPU(tss, unsigned char);
+
+static void handle_sched_switch(void *data, bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
+{
+ da_handle_event_tss(sched_switch_tss);
+}
+
+static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+{
+ da_handle_event_tss(schedule_entry_tss);
+}
+
+static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+{
+ da_handle_start_event_tss(schedule_exit_tss);
+}
+
+static int enable_tss(void)
+{
+ int retval;
+
+ retval = da_monitor_init_tss();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("tss", sched_switch, handle_sched_switch);
+ rv_attach_trace_probe("tss", sched_entry_tp, handle_schedule_entry);
+ rv_attach_trace_probe("tss", sched_exit_tp, handle_schedule_exit);
+
+ return 0;
+}
+
+static void disable_tss(void)
+{
+ rv_tss.enabled = 0;
+
+ rv_detach_trace_probe("tss", sched_switch, handle_sched_switch);
+ rv_detach_trace_probe("tss", sched_entry_tp, handle_schedule_entry);
+ rv_detach_trace_probe("tss", sched_exit_tp, handle_schedule_exit);
+
+ da_monitor_destroy_tss();
+}
+
+static struct rv_monitor rv_tss = {
+ .name = "tss",
+ .description = "task switch while scheduling.",
+ .enable = enable_tss,
+ .disable = disable_tss,
+ .reset = da_monitor_reset_all_tss,
+ .enabled = 0,
+};
+
+static int __init register_tss(void)
+{
+ rv_register_monitor(&rv_tss, &rv_sched);
+ return 0;
+}
+
+static void __exit unregister_tss(void)
+{
+ rv_unregister_monitor(&rv_tss);
+}
+
+module_init(register_tss);
+module_exit(unregister_tss);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("tss: task switch while scheduling.");
diff --git a/kernel/trace/rv/monitors/tss/tss.h b/kernel/trace/rv/monitors/tss/tss.h
new file mode 100644
index 000000000000..f0a36fda1b87
--- /dev/null
+++ b/kernel/trace/rv/monitors/tss/tss.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of tss automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_tss {
+ thread_tss = 0,
+ sched_tss,
+ state_max_tss
+};
+
+#define INVALID_STATE state_max_tss
+
+enum events_tss {
+ sched_switch_tss = 0,
+ schedule_entry_tss,
+ schedule_exit_tss,
+ event_max_tss
+};
+
+struct automaton_tss {
+ char *state_names[state_max_tss];
+ char *event_names[event_max_tss];
+ unsigned char function[state_max_tss][event_max_tss];
+ unsigned char initial_state;
+ bool final_states[state_max_tss];
+};
+
+static const struct automaton_tss automaton_tss = {
+ .state_names = {
+ "thread",
+ "sched"
+ },
+ .event_names = {
+ "sched_switch",
+ "schedule_entry",
+ "schedule_exit"
+ },
+ .function = {
+ { INVALID_STATE, sched_tss, INVALID_STATE },
+ { sched_tss, INVALID_STATE, thread_tss },
+ },
+ .initial_state = thread_tss,
+ .final_states = { 1, 0 },
+};
diff --git a/kernel/trace/rv/monitors/tss/tss_trace.h b/kernel/trace/rv/monitors/tss/tss_trace.h
new file mode 100644
index 000000000000..4619dbb50cc0
--- /dev/null
+++ b/kernel/trace/rv/monitors/tss/tss_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_TSS
+DEFINE_EVENT(event_da_monitor, event_tss,
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor, error_tss,
+ TP_PROTO(char *state, char *event),
+ TP_ARGS(state, event));
+#endif /* CONFIG_RV_MON_TSS */
diff --git a/kernel/trace/rv/monitors/wip/Kconfig b/kernel/trace/rv/monitors/wip/Kconfig
index 3ef664b5cd90..e464b9294865 100644
--- a/kernel/trace/rv/monitors/wip/Kconfig
+++ b/kernel/trace/rv/monitors/wip/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
config RV_MON_WIP
depends on RV
depends on PREEMPT_TRACER
diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c
index db7389157c87..ed758fec8608 100644
--- a/kernel/trace/rv/monitors/wip/wip.c
+++ b/kernel/trace/rv/monitors/wip/wip.c
@@ -71,7 +71,7 @@ static struct rv_monitor rv_wip = {
static int __init register_wip(void)
{
- rv_register_monitor(&rv_wip);
+ rv_register_monitor(&rv_wip, NULL);
return 0;
}
diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h
index 2e373f2c65ed..c7193748bf36 100644
--- a/kernel/trace/rv/monitors/wip/wip.h
+++ b/kernel/trace/rv/monitors/wip/wip.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Automatically generated C representation of wip automaton
* For further information about this format, see kernel documentation:
diff --git a/kernel/trace/rv/monitors/wwnr/Kconfig b/kernel/trace/rv/monitors/wwnr/Kconfig
index ee741aa6d6b8..d3bfc20037db 100644
--- a/kernel/trace/rv/monitors/wwnr/Kconfig
+++ b/kernel/trace/rv/monitors/wwnr/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
config RV_MON_WWNR
depends on RV
select DA_MON_EVENTS_ID
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c
index 3b16994a9984..172f31c4b0f3 100644
--- a/kernel/trace/rv/monitors/wwnr/wwnr.c
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.c
@@ -70,7 +70,7 @@ static struct rv_monitor rv_wwnr = {
static int __init register_wwnr(void)
{
- rv_register_monitor(&rv_wwnr);
+ rv_register_monitor(&rv_wwnr, NULL);
return 0;
}
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h
index d0d9c4b8121b..0a59d23edf61 100644
--- a/kernel/trace/rv/monitors/wwnr/wwnr.h
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Automatically generated C representation of wwnr automaton
* For further information about this format, see kernel documentation:
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 8657fc8806e7..50344aa9f7f9 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -162,7 +162,7 @@ struct dentry *get_monitors_root(void)
/*
* Interface for the monitor register.
*/
-static LIST_HEAD(rv_monitors_list);
+LIST_HEAD(rv_monitors_list);
static int task_monitor_count;
static bool task_monitor_slots[RV_PER_TASK_MONITORS];
@@ -207,6 +207,30 @@ void rv_put_task_monitor_slot(int slot)
}
/*
+ * Monitors with a parent are nested,
+ * Monitors without a parent could be standalone or containers.
+ */
+bool rv_is_nested_monitor(struct rv_monitor_def *mdef)
+{
+ return mdef->parent != NULL;
+}
+
+/*
+ * We set our list to have nested monitors listed after their parent
+ * if a monitor has a child element its a container.
+ * Containers can be also identified based on their function pointers:
+ * as they are not real monitors they do not need function definitions
+ * for enable()/disable(). Use this condition to find empty containers.
+ * Keep both conditions in case we have some non-compliant containers.
+ */
+bool rv_is_container_monitor(struct rv_monitor_def *mdef)
+{
+ struct rv_monitor_def *next = list_next_entry(mdef, list);
+
+ return next->parent == mdef->monitor || !mdef->monitor->enable;
+}
+
+/*
* This section collects the monitor/ files and folders.
*/
static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf, size_t count,
@@ -229,7 +253,8 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
if (mdef->monitor->enabled) {
mdef->monitor->enabled = 0;
- mdef->monitor->disable();
+ if (mdef->monitor->disable)
+ mdef->monitor->disable();
/*
* Wait for the execution of all events to finish.
@@ -243,6 +268,60 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
return 0;
}
+static void rv_disable_single(struct rv_monitor_def *mdef)
+{
+ __rv_disable_monitor(mdef, true);
+}
+
+static int rv_enable_single(struct rv_monitor_def *mdef)
+{
+ int retval;
+
+ lockdep_assert_held(&rv_interface_lock);
+
+ if (mdef->monitor->enabled)
+ return 0;
+
+ retval = mdef->monitor->enable();
+
+ if (!retval)
+ mdef->monitor->enabled = 1;
+
+ return retval;
+}
+
+static void rv_disable_container(struct rv_monitor_def *mdef)
+{
+ struct rv_monitor_def *p = mdef;
+ int enabled = 0;
+
+ list_for_each_entry_continue(p, &rv_monitors_list, list) {
+ if (p->parent != mdef->monitor)
+ break;
+ enabled += __rv_disable_monitor(p, false);
+ }
+ if (enabled)
+ tracepoint_synchronize_unregister();
+ mdef->monitor->enabled = 0;
+}
+
+static int rv_enable_container(struct rv_monitor_def *mdef)
+{
+ struct rv_monitor_def *p = mdef;
+ int retval = 0;
+
+ list_for_each_entry_continue(p, &rv_monitors_list, list) {
+ if (retval || p->parent != mdef->monitor)
+ break;
+ retval = rv_enable_single(p);
+ }
+ if (retval)
+ rv_disable_container(mdef);
+ else
+ mdef->monitor->enabled = 1;
+ return retval;
+}
+
/**
* rv_disable_monitor - disable a given runtime monitor
* @mdef: Pointer to the monitor definition structure.
@@ -251,7 +330,11 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
*/
int rv_disable_monitor(struct rv_monitor_def *mdef)
{
- __rv_disable_monitor(mdef, true);
+ if (rv_is_container_monitor(mdef))
+ rv_disable_container(mdef);
+ else
+ rv_disable_single(mdef);
+
return 0;
}
@@ -265,15 +348,10 @@ int rv_enable_monitor(struct rv_monitor_def *mdef)
{
int retval;
- lockdep_assert_held(&rv_interface_lock);
-
- if (mdef->monitor->enabled)
- return 0;
-
- retval = mdef->monitor->enable();
-
- if (!retval)
- mdef->monitor->enabled = 1;
+ if (rv_is_container_monitor(mdef))
+ retval = rv_enable_container(mdef);
+ else
+ retval = rv_enable_single(mdef);
return retval;
}
@@ -336,9 +414,9 @@ static const struct file_operations interface_desc_fops = {
* the monitor dir, where the specific options of the monitor
* are exposed.
*/
-static int create_monitor_dir(struct rv_monitor_def *mdef)
+static int create_monitor_dir(struct rv_monitor_def *mdef, struct rv_monitor_def *parent)
{
- struct dentry *root = get_monitors_root();
+ struct dentry *root = parent ? parent->root_d : get_monitors_root();
const char *name = mdef->monitor->name;
struct dentry *tmp;
int retval;
@@ -377,7 +455,11 @@ static int monitors_show(struct seq_file *m, void *p)
{
struct rv_monitor_def *mon_def = p;
- seq_printf(m, "%s\n", mon_def->monitor->name);
+ if (mon_def->parent)
+ seq_printf(m, "%s:%s\n", mon_def->parent->name,
+ mon_def->monitor->name);
+ else
+ seq_printf(m, "%s\n", mon_def->monitor->name);
return 0;
}
@@ -514,7 +596,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
struct rv_monitor_def *mdef;
int retval = -EINVAL;
bool enable = true;
- char *ptr;
+ char *ptr, *tmp;
int len;
if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1)
@@ -541,6 +623,11 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
retval = -EINVAL;
+ /* we support 1 nesting level, trim the parent */
+ tmp = strstr(ptr, ":");
+ if (tmp)
+ ptr = tmp+1;
+
list_for_each_entry(mdef, &rv_monitors_list, list) {
if (strcmp(ptr, mdef->monitor->name) != 0)
continue;
@@ -613,7 +700,7 @@ static void reset_all_monitors(void)
struct rv_monitor_def *mdef;
list_for_each_entry(mdef, &rv_monitors_list, list) {
- if (mdef->monitor->enabled)
+ if (mdef->monitor->enabled && mdef->monitor->reset)
mdef->monitor->reset();
}
}
@@ -685,18 +772,19 @@ static void destroy_monitor_dir(struct rv_monitor_def *mdef)
/**
* rv_register_monitor - register a rv monitor.
* @monitor: The rv_monitor to be registered.
+ * @parent: The parent of the monitor to be registered, NULL if not nested.
*
* Returns 0 if successful, error otherwise.
*/
-int rv_register_monitor(struct rv_monitor *monitor)
+int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
{
- struct rv_monitor_def *r;
+ struct rv_monitor_def *r, *p = NULL;
int retval = 0;
if (strlen(monitor->name) >= MAX_RV_MONITOR_NAME_SIZE) {
pr_info("Monitor %s has a name longer than %d\n", monitor->name,
MAX_RV_MONITOR_NAME_SIZE);
- return -1;
+ return -EINVAL;
}
mutex_lock(&rv_interface_lock);
@@ -704,11 +792,26 @@ int rv_register_monitor(struct rv_monitor *monitor)
list_for_each_entry(r, &rv_monitors_list, list) {
if (strcmp(monitor->name, r->monitor->name) == 0) {
pr_info("Monitor %s is already registered\n", monitor->name);
- retval = -1;
+ retval = -EEXIST;
goto out_unlock;
}
}
+ if (parent) {
+ list_for_each_entry(r, &rv_monitors_list, list) {
+ if (strcmp(parent->name, r->monitor->name) == 0) {
+ p = r;
+ break;
+ }
+ }
+ }
+
+ if (p && rv_is_nested_monitor(p)) {
+ pr_info("Parent monitor %s is already nested, cannot nest further\n",
+ parent->name);
+ return -EINVAL;
+ }
+
r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL);
if (!r) {
retval = -ENOMEM;
@@ -716,14 +819,19 @@ int rv_register_monitor(struct rv_monitor *monitor)
}
r->monitor = monitor;
+ r->parent = parent;
- retval = create_monitor_dir(r);
+ retval = create_monitor_dir(r, p);
if (retval) {
kfree(r);
goto out_unlock;
}
- list_add_tail(&r->list, &rv_monitors_list);
+ /* keep children close to the parent for easier visualisation */
+ if (p)
+ list_add(&r->list, &p->list);
+ else
+ list_add_tail(&r->list, &rv_monitors_list);
out_unlock:
mutex_unlock(&rv_interface_lock);
diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h
index db6cb0913dbd..98fca0a1adbc 100644
--- a/kernel/trace/rv/rv.h
+++ b/kernel/trace/rv/rv.h
@@ -21,6 +21,7 @@ struct rv_interface {
#define MAX_RV_REACTOR_NAME_SIZE 32
extern struct mutex rv_interface_lock;
+extern struct list_head rv_monitors_list;
#ifdef CONFIG_RV_REACTORS
struct rv_reactor_def {
@@ -34,6 +35,7 @@ struct rv_reactor_def {
struct rv_monitor_def {
struct list_head list;
struct rv_monitor *monitor;
+ struct rv_monitor *parent;
struct dentry *root_d;
#ifdef CONFIG_RV_REACTORS
struct rv_reactor_def *rdef;
@@ -45,6 +47,8 @@ struct rv_monitor_def {
struct dentry *get_monitors_root(void);
int rv_disable_monitor(struct rv_monitor_def *mdef);
int rv_enable_monitor(struct rv_monitor_def *mdef);
+bool rv_is_container_monitor(struct rv_monitor_def *mdef);
+bool rv_is_nested_monitor(struct rv_monitor_def *mdef);
#ifdef CONFIG_RV_REACTORS
int reactor_populate_monitor(struct rv_monitor_def *mdef);
diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c
index 7b49cbe388d4..9501ca886d83 100644
--- a/kernel/trace/rv/rv_reactors.c
+++ b/kernel/trace/rv/rv_reactors.c
@@ -158,8 +158,9 @@ static const struct seq_operations monitor_reactors_seq_ops = {
.show = monitor_reactor_show
};
-static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor_def *rdef,
- bool reacting)
+static void monitor_swap_reactors_single(struct rv_monitor_def *mdef,
+ struct rv_reactor_def *rdef,
+ bool reacting, bool nested)
{
bool monitor_enabled;
@@ -179,10 +180,31 @@ static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor
mdef->reacting = reacting;
mdef->monitor->react = rdef->reactor->react;
- if (monitor_enabled)
+ /* enable only once if iterating through a container */
+ if (monitor_enabled && !nested)
rv_enable_monitor(mdef);
}
+static void monitor_swap_reactors(struct rv_monitor_def *mdef,
+ struct rv_reactor_def *rdef, bool reacting)
+{
+ struct rv_monitor_def *p = mdef;
+
+ if (rv_is_container_monitor(mdef))
+ list_for_each_entry_continue(p, &rv_monitors_list, list) {
+ if (p->parent != mdef->monitor)
+ break;
+ monitor_swap_reactors_single(p, rdef, reacting, true);
+ }
+ /*
+ * This call enables and disables the monitor if they were active.
+ * In case of a container, we already disabled all and will enable all.
+ * All nested monitors are enabled also if they were off, we may refine
+ * this logic in the future.
+ */
+ monitor_swap_reactors_single(mdef, rdef, reacting, false);
+}
+
static ssize_t
monitor_reactors_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
index 5e65097423ba..422b75f58891 100644
--- a/kernel/trace/rv/rv_trace.h
+++ b/kernel/trace/rv/rv_trace.h
@@ -58,6 +58,11 @@ DECLARE_EVENT_CLASS(error_da_monitor,
);
#include <monitors/wip/wip_trace.h>
+#include <monitors/tss/tss_trace.h>
+#include <monitors/sco/sco_trace.h>
+#include <monitors/scpd/scpd_trace.h>
+#include <monitors/snep/snep_trace.h>
+#include <monitors/sncid/sncid_trace.h>
// Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here
#endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */
@@ -118,6 +123,7 @@ DECLARE_EVENT_CLASS(error_da_monitor_id,
);
#include <monitors/wwnr/wwnr_trace.h>
+#include <monitors/snroc/snroc_trace.h>
// Add new monitors based on CONFIG_DA_MON_EVENTS_ID here
#endif /* CONFIG_DA_MON_EVENTS_ID */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fd3cb2b2ab82..826267f5b650 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,6 +87,7 @@ void __init disable_tracing_selftest(const char *reason)
static struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;
static bool tracepoint_printk_stop_on_boot __initdata;
+static bool traceoff_after_boot __initdata;
static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
/* For tracers that don't implement custom flags */
@@ -330,6 +331,13 @@ static int __init set_tracepoint_printk_stop(char *str)
}
__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
+static int __init set_traceoff_after_boot(char *str)
+{
+ traceoff_after_boot = true;
+ return 1;
+}
+__setup("traceoff_after_boot", set_traceoff_after_boot);
+
unsigned long long ns2usecs(u64 nsec)
{
nsec += 500;
@@ -2878,13 +2886,16 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
void
trace_function(struct trace_array *tr, unsigned long ip, unsigned long
- parent_ip, unsigned int trace_ctx)
+ parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
{
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
+ int size = sizeof(*entry);
+
+ size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
- event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
+ event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
trace_ctx);
if (!event)
return;
@@ -2892,6 +2903,13 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long
entry->ip = ip;
entry->parent_ip = parent_ip;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ if (fregs) {
+ for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
+ entry->args[i] = ftrace_regs_get_argument(fregs, i);
+ }
+#endif
+
if (static_branch_unlikely(&trace_function_exports_enabled))
ftrace_exports(event, TRACE_EXPORT_FUNCTION);
__buffer_unlock_commit(buffer, event);
@@ -10699,6 +10717,9 @@ __init static int late_trace_init(void)
tracepoint_printk = 0;
}
+ if (traceoff_after_boot)
+ tracing_off();
+
tracing_set_default_clock();
clear_boot_tracer();
return 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9c21ba45b7af..4a6621e2a0fa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -21,6 +21,7 @@
#include <linux/workqueue.h>
#include <linux/ctype.h>
#include <linux/once_lite.h>
+#include <linux/ftrace_regs.h>
#include "pid_list.h"
@@ -697,7 +698,8 @@ unsigned long trace_total_entries(struct trace_array *tr);
void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
- unsigned int trace_ctx);
+ unsigned int trace_ctx,
+ struct ftrace_regs *regs);
void trace_graph_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
@@ -897,6 +899,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_RETVAL 0x800
#define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000
#define TRACE_GRAPH_PRINT_RETADDR 0x2000
+#define TRACE_GRAPH_ARGS 0x4000
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
@@ -1714,7 +1717,7 @@ struct event_trigger_data {
unsigned long count;
int ref;
int flags;
- struct event_trigger_ops *ops;
+ const struct event_trigger_ops *ops;
struct event_command *cmd_ops;
struct event_filter __rcu *filter;
char *filter_str;
@@ -1959,7 +1962,7 @@ struct event_command {
int (*set_filter)(char *filter_str,
struct event_trigger_data *data,
struct trace_event_file *file);
- struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
+ const struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
};
/**
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index fbfb396905a6..ee40d4e6ad1c 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -61,8 +61,9 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
TRACE_FN,
F_STRUCT(
- __field_fn( unsigned long, ip )
- __field_fn( unsigned long, parent_ip )
+ __field_fn( unsigned long, ip )
+ __field_fn( unsigned long, parent_ip )
+ __dynamic_array( unsigned long, args )
),
F_printk(" %ps <-- %ps",
@@ -72,17 +73,18 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
);
/* Function call entry */
-FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
+FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
TRACE_GRAPH_ENT,
F_STRUCT(
__field_struct( struct ftrace_graph_ent, graph_ent )
__field_packed( unsigned long, graph_ent, func )
- __field_packed( int, graph_ent, depth )
+ __field_packed( unsigned long, graph_ent, depth )
+ __dynamic_array(unsigned long, args )
),
- F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
+ F_printk("--> %ps (%lu)", (void *)__entry->func, __entry->depth)
);
#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 82fd637cfc19..c08355c3ef32 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -478,7 +478,7 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
__eprobe_trace_func(edata, rec);
}
-static struct event_trigger_ops eprobe_trigger_ops = {
+static const struct event_trigger_ops eprobe_trigger_ops = {
.trigger = eprobe_trigger_func,
.print = eprobe_trigger_print,
.init = eprobe_trigger_init,
@@ -507,8 +507,8 @@ static void eprobe_trigger_unreg_func(char *glob,
}
-static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
- char *param)
+static const struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
+ char *param)
{
return &eprobe_trigger_ops;
}
@@ -913,6 +913,8 @@ static int __trace_eprobe_create(int argc, const char *argv[])
}
if (argc - 2 > MAX_TRACE_ARGS) {
+ trace_probe_log_set_index(2);
+ trace_probe_log_err(0, TOO_MANY_ARGS);
ret = -E2BIG;
goto error;
}
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 3ff9caa4a71b..a6bb7577e8c5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -49,7 +49,7 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event)) {
- ret = perf_allow_tracepoint(&p_event->attr);
+ ret = perf_allow_tracepoint();
if (ret)
return ret;
@@ -86,7 +86,7 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
* ...otherwise raw tracepoint data can be a severe data leak,
* only allow root to have these.
*/
- ret = perf_allow_tracepoint(&p_event->attr);
+ ret = perf_allow_tracepoint();
if (ret)
return ret;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 513de9ceb80e..8e7603acca21 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -790,7 +790,9 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
}
- call->class->reg(call, TRACE_REG_UNREGISTER, file);
+ ret = call->class->reg(call, TRACE_REG_UNREGISTER, file);
+
+ WARN_ON_ONCE(ret);
}
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
if (file->flags & EVENT_FILE_FL_SOFT_MODE)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 53dc6719181e..1260c23cfa5f 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -6203,7 +6203,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data)
}
}
-static struct event_trigger_ops event_hist_trigger_ops = {
+static const struct event_trigger_ops event_hist_trigger_ops = {
.trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_init,
@@ -6235,15 +6235,15 @@ static void event_hist_trigger_named_free(struct event_trigger_data *data)
}
}
-static struct event_trigger_ops event_hist_trigger_named_ops = {
+static const struct event_trigger_ops event_hist_trigger_named_ops = {
.trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_named_init,
.free = event_hist_trigger_named_free,
};
-static struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd,
- char *param)
+static const struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd,
+ char *param)
{
return &event_hist_trigger_ops;
}
@@ -6838,38 +6838,38 @@ hist_enable_count_trigger(struct event_trigger_data *data,
hist_enable_trigger(data, buffer, rec, event);
}
-static struct event_trigger_ops hist_enable_trigger_ops = {
+static const struct event_trigger_ops hist_enable_trigger_ops = {
.trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops hist_enable_count_trigger_ops = {
+static const struct event_trigger_ops hist_enable_count_trigger_ops = {
.trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops hist_disable_trigger_ops = {
+static const struct event_trigger_ops hist_disable_trigger_ops = {
.trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops hist_disable_count_trigger_ops = {
+static const struct event_trigger_ops hist_disable_count_trigger_ops = {
.trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops *
+static const struct event_trigger_ops *
hist_enable_get_trigger_ops(char *cmd, char *param)
{
- struct event_trigger_ops *ops;
+ const struct event_trigger_ops *ops;
bool enable;
enable = (strcmp(cmd, ENABLE_HIST_STR) == 0);
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index e3f7d09e5512..969f48742d72 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -207,7 +207,7 @@ static int synth_field_string_size(char *type)
if (len == 0)
return 0; /* variable-length string */
- strncpy(buf, start, len);
+ memcpy(buf, start, len);
buf[len] = '\0';
err = kstrtouint(buf, 0, &size);
@@ -305,7 +305,7 @@ static const char *synth_field_fmt(char *type)
else if (strcmp(type, "gfp_t") == 0)
fmt = "%x";
else if (synth_field_is_string(type))
- fmt = "%.*s";
+ fmt = "%s";
else if (synth_field_is_stack(type))
fmt = "%s";
@@ -612,7 +612,7 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
fmt = synth_field_fmt(event->fields[i]->type);
pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s",
event->fields[i]->name, fmt,
- i == event->n_fields - 1 ? "" : ", ");
+ i == event->n_fields - 1 ? "" : " ");
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
@@ -852,6 +852,38 @@ static struct trace_event_fields synth_event_fields_array[] = {
{}
};
+static int synth_event_reg(struct trace_event_call *call,
+ enum trace_reg type, void *data)
+{
+ struct synth_event *event = container_of(call, struct synth_event, call);
+
+ switch (type) {
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+#endif
+ case TRACE_REG_REGISTER:
+ if (!try_module_get(event->mod))
+ return -EBUSY;
+ break;
+ default:
+ break;
+ }
+
+ int ret = trace_event_reg(call, type, data);
+
+ switch (type) {
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_UNREGISTER:
+#endif
+ case TRACE_REG_UNREGISTER:
+ module_put(event->mod);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
static int register_synth_event(struct synth_event *event)
{
struct trace_event_call *call = &event->call;
@@ -881,7 +913,7 @@ static int register_synth_event(struct synth_event *event)
goto out;
}
call->flags = TRACE_EVENT_FL_TRACEPOINT;
- call->class->reg = trace_event_reg;
+ call->class->reg = synth_event_reg;
call->class->probe = trace_event_raw_event_synth;
call->data = event;
call->tp = event->tp;
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d45448947094..b66b6d235d91 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -825,7 +825,7 @@ struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops,
void *private_data)
{
struct event_trigger_data *trigger_data;
- struct event_trigger_ops *trigger_ops;
+ const struct event_trigger_ops *trigger_ops;
trigger_ops = cmd_ops->get_trigger_ops(cmd, param);
@@ -1367,38 +1367,38 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_data *data)
data->filter_str);
}
-static struct event_trigger_ops traceon_trigger_ops = {
+static const struct event_trigger_ops traceon_trigger_ops = {
.trigger = traceon_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops traceon_count_trigger_ops = {
+static const struct event_trigger_ops traceon_count_trigger_ops = {
.trigger = traceon_count_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops traceoff_trigger_ops = {
+static const struct event_trigger_ops traceoff_trigger_ops = {
.trigger = traceoff_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops traceoff_count_trigger_ops = {
+static const struct event_trigger_ops traceoff_count_trigger_ops = {
.trigger = traceoff_count_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops *
+static const struct event_trigger_ops *
onoff_get_trigger_ops(char *cmd, char *param)
{
- struct event_trigger_ops *ops;
+ const struct event_trigger_ops *ops;
/* we register both traceon and traceoff to this callback */
if (strcmp(cmd, "traceon") == 0)
@@ -1491,21 +1491,21 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_data *data)
data->filter_str);
}
-static struct event_trigger_ops snapshot_trigger_ops = {
+static const struct event_trigger_ops snapshot_trigger_ops = {
.trigger = snapshot_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops snapshot_count_trigger_ops = {
+static const struct event_trigger_ops snapshot_count_trigger_ops = {
.trigger = snapshot_count_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops *
+static const struct event_trigger_ops *
snapshot_get_trigger_ops(char *cmd, char *param)
{
return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops;
@@ -1586,21 +1586,21 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
data->filter_str);
}
-static struct event_trigger_ops stacktrace_trigger_ops = {
+static const struct event_trigger_ops stacktrace_trigger_ops = {
.trigger = stacktrace_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops stacktrace_count_trigger_ops = {
+static const struct event_trigger_ops stacktrace_count_trigger_ops = {
.trigger = stacktrace_count_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
-static struct event_trigger_ops *
+static const struct event_trigger_ops *
stacktrace_get_trigger_ops(char *cmd, char *param)
{
return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops;
@@ -1711,28 +1711,28 @@ void event_enable_trigger_free(struct event_trigger_data *data)
}
}
-static struct event_trigger_ops event_enable_trigger_ops = {
+static const struct event_trigger_ops event_enable_trigger_ops = {
.trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops event_enable_count_trigger_ops = {
+static const struct event_trigger_ops event_enable_count_trigger_ops = {
.trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops event_disable_trigger_ops = {
+static const struct event_trigger_ops event_disable_trigger_ops = {
.trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-static struct event_trigger_ops event_disable_count_trigger_ops = {
+static const struct event_trigger_ops event_disable_count_trigger_ops = {
.trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
@@ -1916,10 +1916,10 @@ void event_enable_unregister_trigger(char *glob,
data->ops->free(data);
}
-static struct event_trigger_ops *
+static const struct event_trigger_ops *
event_enable_get_trigger_ops(char *cmd, char *param)
{
- struct event_trigger_ops *ops;
+ const struct event_trigger_ops *ops;
bool enable;
#ifdef CONFIG_HIST_TRIGGERS
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 97325fbd6283..af42aaa3d172 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -455,7 +455,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work)
if (ret && ret != -ENOENT) {
struct user_event *user = enabler->event;
- pr_warn("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n",
+ pr_warn("user_events: Fault for mm: 0x%p @ 0x%llx event: %s\n",
mm->mm, (unsigned long long)uaddr, EVENT_NAME(user));
}
@@ -2793,11 +2793,8 @@ static int user_seq_show(struct seq_file *m, void *p)
seq_printf(m, "%s", EVENT_TP_NAME(user));
- if (status != 0)
- seq_puts(m, " #");
-
if (status != 0) {
- seq_puts(m, " Used by");
+ seq_puts(m, " # Used by");
if (status & EVENT_STATUS_FTRACE)
seq_puts(m, " ftrace");
if (status & EVENT_STATUS_PERF)
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 985ff98272da..5d7ca80173ea 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -1199,8 +1199,11 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
argc = new_argc;
argv = new_argv;
}
- if (argc > MAX_TRACE_ARGS)
+ if (argc > MAX_TRACE_ARGS) {
+ trace_probe_log_set_index(2);
+ trace_probe_log_err(0, TOO_MANY_ARGS);
return -E2BIG;
+ }
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
if (ret)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index df56f9b76010..98ccf3f00c51 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -25,6 +25,9 @@ static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
static void
+function_args_trace_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
static void
@@ -42,9 +45,10 @@ enum {
TRACE_FUNC_NO_OPTS = 0x0, /* No flags set. */
TRACE_FUNC_OPT_STACK = 0x1,
TRACE_FUNC_OPT_NO_REPEATS = 0x2,
+ TRACE_FUNC_OPT_ARGS = 0x4,
/* Update this to next highest bit. */
- TRACE_FUNC_OPT_HIGHEST_BIT = 0x4
+ TRACE_FUNC_OPT_HIGHEST_BIT = 0x8
};
#define TRACE_FUNC_OPT_MASK (TRACE_FUNC_OPT_HIGHEST_BIT - 1)
@@ -114,6 +118,8 @@ static ftrace_func_t select_trace_function(u32 flags_val)
switch (flags_val & TRACE_FUNC_OPT_MASK) {
case TRACE_FUNC_NO_OPTS:
return function_trace_call;
+ case TRACE_FUNC_OPT_ARGS:
+ return function_args_trace_call;
case TRACE_FUNC_OPT_STACK:
return function_stack_trace_call;
case TRACE_FUNC_OPT_NO_REPEATS:
@@ -220,7 +226,34 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
data = this_cpu_ptr(tr->array_buffer.data);
if (!atomic_read(&data->disabled))
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
+
+ ftrace_test_recursion_unlock(bit);
+}
+
+static void
+function_args_trace_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ struct trace_array *tr = op->private;
+ struct trace_array_cpu *data;
+ unsigned int trace_ctx;
+ int bit;
+ int cpu;
+
+ if (unlikely(!tr->function_enabled))
+ return;
+
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
+ trace_ctx = tracing_gen_ctx();
+
+ cpu = smp_processor_id();
+ data = per_cpu_ptr(tr->array_buffer.data, cpu);
+ if (!atomic_read(&data->disabled))
+ trace_function(tr, ip, parent_ip, trace_ctx, fregs);
ftrace_test_recursion_unlock(bit);
}
@@ -270,7 +303,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
#ifdef CONFIG_UNWINDER_FRAME_POINTER
if (ftrace_pids_enabled(op))
skip++;
@@ -349,7 +382,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
trace_ctx = tracing_gen_ctx_dec();
process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
out:
ftrace_test_recursion_unlock(bit);
@@ -389,7 +422,7 @@ function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
trace_ctx = tracing_gen_ctx_flags(flags);
process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
__trace_stack(tr, trace_ctx, STACK_SKIP);
}
@@ -403,6 +436,9 @@ static struct tracer_opt func_opts[] = {
{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
#endif
{ TRACER_OPT(func-no-repeats, TRACE_FUNC_OPT_NO_REPEATS) },
+#ifdef CONFIG_FUNCTION_TRACE_ARGS
+ { TRACER_OPT(func-args, TRACE_FUNC_OPT_ARGS) },
+#endif
{ } /* Always set a last empty entry */
};
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 136c750b0b4d..2f077d4158e5 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -71,6 +71,10 @@ static struct tracer_opt trace_opts[] = {
/* Display function return address ? */
{ TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
#endif
+#ifdef CONFIG_FUNCTION_TRACE_ARGS
+ /* Display function arguments ? */
+ { TRACER_OPT(funcgraph-args, TRACE_GRAPH_ARGS) },
+#endif
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
@@ -110,25 +114,43 @@ static void
print_graph_duration(struct trace_array *tr, unsigned long long duration,
struct trace_seq *s, u32 flags);
-int __trace_graph_entry(struct trace_array *tr,
- struct ftrace_graph_ent *trace,
- unsigned int trace_ctx)
+static int __graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx, struct ftrace_regs *fregs)
{
struct ring_buffer_event *event;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ftrace_graph_ent_entry *entry;
+ int size;
- event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
- sizeof(*entry), trace_ctx);
+ /* If fregs is defined, add FTRACE_REGS_MAX_ARGS long size words */
+ size = sizeof(*entry) + (FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long));
+
+ event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, size, trace_ctx);
if (!event)
return 0;
- entry = ring_buffer_event_data(event);
- entry->graph_ent = *trace;
+
+ entry = ring_buffer_event_data(event);
+ entry->graph_ent = *trace;
+
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ if (fregs) {
+ for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
+ entry->args[i] = ftrace_regs_get_argument(fregs, i);
+ }
+#endif
+
trace_buffer_unlock_commit_nostack(buffer, event);
return 1;
}
+int __trace_graph_entry(struct trace_array *tr,
+ struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx)
+{
+ return __graph_entry(tr, trace, trace_ctx, NULL);
+}
+
#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
int __trace_graph_retaddr_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
@@ -174,9 +196,9 @@ struct fgraph_times {
unsigned long long sleeptime; /* may be optional! */
};
-int trace_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops,
- struct ftrace_regs *fregs)
+static int graph_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
@@ -246,7 +268,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
unsigned long retaddr = ftrace_graph_top_ret_addr(current);
ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
} else {
- ret = __trace_graph_entry(tr, trace, trace_ctx);
+ ret = __graph_entry(tr, trace, trace_ctx, fregs);
}
}
preempt_enable_notrace();
@@ -254,6 +276,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
return ret;
}
+int trace_graph_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
+{
+ return graph_entry(trace, gops, NULL);
+}
+
+static int trace_graph_entry_args(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
+{
+ return graph_entry(trace, gops, fregs);
+}
+
static void
__trace_graph_function(struct trace_array *tr,
unsigned long ip, unsigned int trace_ctx)
@@ -418,14 +454,17 @@ static int graph_trace_init(struct trace_array *tr)
{
int ret;
- tr->gops->entryfunc = trace_graph_entry;
+ if (tracer_flags_is_set(TRACE_GRAPH_ARGS))
+ tr->gops->entryfunc = trace_graph_entry_args;
+ else
+ tr->gops->entryfunc = trace_graph_entry;
if (tracing_thresh)
tr->gops->retfunc = trace_graph_thresh_return;
else
tr->gops->retfunc = trace_graph_return;
- /* Make gops functions are visible before we start tracing */
+ /* Make gops functions visible before we start tracing */
smp_mb();
ret = register_ftrace_graph(tr->gops);
@@ -436,6 +475,28 @@ static int graph_trace_init(struct trace_array *tr)
return 0;
}
+static int ftrace_graph_trace_args(struct trace_array *tr, int set)
+{
+ trace_func_graph_ent_t entry;
+
+ if (set)
+ entry = trace_graph_entry_args;
+ else
+ entry = trace_graph_entry;
+
+ /* See if there's any changes */
+ if (tr->gops->entryfunc == entry)
+ return 0;
+
+ unregister_ftrace_graph(tr->gops);
+
+ tr->gops->entryfunc = entry;
+
+ /* Make gops functions visible before we start tracing */
+ smp_mb();
+ return register_ftrace_graph(tr->gops);
+}
+
static void graph_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
@@ -775,7 +836,7 @@ static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_e
static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry,
struct ftrace_graph_ret *graph_ret, void *func,
- u32 opt_flags, u32 trace_flags)
+ u32 opt_flags, u32 trace_flags, int args_size)
{
unsigned long err_code = 0;
unsigned long retval = 0;
@@ -809,7 +870,14 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr
if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT)
print_retaddr = false;
- trace_seq_printf(s, "%ps();", func);
+ trace_seq_printf(s, "%ps", func);
+
+ if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) {
+ print_function_args(s, entry->args, (unsigned long)func);
+ trace_seq_putc(s, ';');
+ } else
+ trace_seq_puts(s, "();");
+
if (print_retval || print_retaddr)
trace_seq_puts(s, " /*");
else
@@ -836,7 +904,8 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr
#else
-#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0)
+#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags, args_size) \
+ do {} while (0)
#endif
@@ -852,16 +921,17 @@ print_graph_entry_leaf(struct trace_iterator *iter,
struct ftrace_graph_ret *graph_ret;
struct ftrace_graph_ent *call;
unsigned long long duration;
- unsigned long func;
+ unsigned long ret_func;
+ int args_size;
int cpu = iter->cpu;
int i;
+ args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args);
+
graph_ret = &ret_entry->ret;
call = &entry->graph_ent;
duration = ret_entry->rettime - ret_entry->calltime;
- func = call->func + iter->tr->text_delta;
-
if (data) {
struct fgraph_cpu_data *cpu_data;
@@ -887,16 +957,25 @@ print_graph_entry_leaf(struct trace_iterator *iter,
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
trace_seq_putc(s, ' ');
+ ret_func = graph_ret->func + iter->tr->text_delta;
+
/*
* Write out the function return value or return address
*/
if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
print_graph_retval(s, entry, graph_ret,
(void *)graph_ret->func + iter->tr->text_delta,
- flags, tr->trace_flags);
+ flags, tr->trace_flags, args_size);
} else {
- trace_seq_printf(s, "%ps();\n", (void *)func);
+ trace_seq_printf(s, "%ps", (void *)ret_func);
+
+ if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) {
+ print_function_args(s, entry->args, ret_func);
+ trace_seq_putc(s, ';');
+ } else
+ trace_seq_puts(s, "();");
}
+ trace_seq_printf(s, "\n");
print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
cpu, iter->ent->pid, flags);
@@ -913,6 +992,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
struct fgraph_data *data = iter->private;
struct trace_array *tr = iter->tr;
unsigned long func;
+ int args_size;
int i;
if (data) {
@@ -937,7 +1017,17 @@ print_graph_entry_nested(struct trace_iterator *iter,
func = call->func + iter->tr->text_delta;
- trace_seq_printf(s, "%ps() {", (void *)func);
+ trace_seq_printf(s, "%ps", (void *)func);
+
+ args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args);
+
+ if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long))
+ print_function_args(s, entry->args, func);
+ else
+ trace_seq_puts(s, "()");
+
+ trace_seq_puts(s, " {");
+
if (flags & __TRACE_GRAPH_PRINT_RETADDR &&
entry->ent.type == TRACE_GRAPH_RETADDR_ENT)
print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry,
@@ -1107,21 +1197,38 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
struct trace_iterator *iter, u32 flags)
{
struct fgraph_data *data = iter->private;
- struct ftrace_graph_ent *call = &field->graph_ent;
+ struct ftrace_graph_ent *call;
struct ftrace_graph_ret_entry *leaf_ret;
static enum print_line_t ret;
int cpu = iter->cpu;
+ /*
+ * print_graph_entry() may consume the current event,
+ * thus @field may become invalid, so we need to save it.
+ * sizeof(struct ftrace_graph_ent_entry) is very small,
+ * it can be safely saved at the stack.
+ */
+ struct ftrace_graph_ent_entry *entry;
+ u8 save_buf[sizeof(*entry) + FTRACE_REGS_MAX_ARGS * sizeof(long)];
+
+ /* The ent_size is expected to be as big as the entry */
+ if (iter->ent_size > sizeof(save_buf))
+ iter->ent_size = sizeof(save_buf);
+
+ entry = (void *)save_buf;
+ memcpy(entry, field, iter->ent_size);
+
+ call = &entry->graph_ent;
if (check_irq_entry(iter, flags, call->func, call->depth))
return TRACE_TYPE_HANDLED;
print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags);
- leaf_ret = get_return_for_leaf(iter, field);
+ leaf_ret = get_return_for_leaf(iter, entry);
if (leaf_ret)
- ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags);
+ ret = print_graph_entry_leaf(iter, entry, leaf_ret, s, flags);
else
- ret = print_graph_entry_nested(iter, field, s, cpu, flags);
+ ret = print_graph_entry_nested(iter, entry, s, cpu, flags);
if (data) {
/*
@@ -1195,7 +1302,8 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
* funcgraph-retval option is enabled.
*/
if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
- print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags);
+ print_graph_retval(s, NULL, trace, (void *)func, flags,
+ tr->trace_flags, 0);
} else {
/*
* If the return function does not have a matching entry,
@@ -1323,16 +1431,8 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
switch (entry->type) {
case TRACE_GRAPH_ENT: {
- /*
- * print_graph_entry() may consume the current event,
- * thus @field may become invalid, so we need to save it.
- * sizeof(struct ftrace_graph_ent_entry) is very small,
- * it can be safely saved at the stack.
- */
- struct ftrace_graph_ent_entry saved;
trace_assign_type(field, entry);
- saved = *field;
- return print_graph_entry(&saved, s, iter, flags);
+ return print_graph_entry(field, s, iter, flags);
}
#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
case TRACE_GRAPH_RETADDR_ENT: {
@@ -1511,6 +1611,7 @@ void graph_trace_close(struct trace_iterator *iter)
if (data) {
free_percpu(data->cpu_data);
kfree(data);
+ iter->private = NULL;
}
}
@@ -1526,6 +1627,9 @@ func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
if (bit == TRACE_GRAPH_GRAPH_TIME)
ftrace_graph_graph_time_control(set);
+ if (bit == TRACE_GRAPH_ARGS)
+ return ftrace_graph_trace_args(tr, set);
+
return 0;
}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7294ad676379..40c39e946940 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -150,7 +150,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
trace_ctx = tracing_gen_ctx_flags(flags);
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, fregs);
atomic_dec(&data->disabled);
}
@@ -250,8 +250,6 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
- else
- iter->private = NULL;
}
static void irqsoff_trace_close(struct trace_iterator *iter)
@@ -295,11 +293,17 @@ __trace_function(struct trace_array *tr,
if (is_graph(tr))
trace_graph_function(tr, ip, parent_ip, trace_ctx);
else
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
}
#else
-#define __trace_function trace_function
+static inline void
+__trace_function(struct trace_array *tr,
+ unsigned long ip, unsigned long parent_ip,
+ unsigned int trace_ctx)
+{
+ return trace_function(tr, ip, parent_ip, trace_ctx, NULL);
+}
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d8d5f18a141a..8287b175667f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1007,8 +1007,11 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
argc = new_argc;
argv = new_argv;
}
- if (argc > MAX_TRACE_ARGS)
+ if (argc > MAX_TRACE_ARGS) {
+ trace_probe_log_set_index(2);
+ trace_probe_log_err(0, TOO_MANY_ARGS);
return -E2BIG;
+ }
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
if (ret)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 92e16f03fa4e..e732c9e37e14 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -316,33 +316,6 @@ static inline void osn_var_reset_all(void)
bool trace_osnoise_callback_enabled;
/*
- * osnoise sample structure definition. Used to store the statistics of a
- * sample run.
- */
-struct osnoise_sample {
- u64 runtime; /* runtime */
- u64 noise; /* noise */
- u64 max_sample; /* max single noise sample */
- int hw_count; /* # HW (incl. hypervisor) interference */
- int nmi_count; /* # NMIs during this sample */
- int irq_count; /* # IRQs during this sample */
- int softirq_count; /* # softirqs during this sample */
- int thread_count; /* # threads during this sample */
-};
-
-#ifdef CONFIG_TIMERLAT_TRACER
-/*
- * timerlat sample structure definition. Used to store the statistics of
- * a sample run.
- */
-struct timerlat_sample {
- u64 timer_latency; /* timer_latency */
- unsigned int seqnum; /* unique sequence */
- int context; /* timer context */
-};
-#endif
-
-/*
* Tracer data.
*/
static struct osnoise_data {
@@ -497,7 +470,7 @@ static void print_osnoise_headers(struct seq_file *s)
* Record an osnoise_sample into the tracer buffer.
*/
static void
-__trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
+__record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
{
struct ring_buffer_event *event;
struct osnoise_entry *entry;
@@ -520,17 +493,19 @@ __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffe
}
/*
- * Record an osnoise_sample on all osnoise instances.
+ * Record an osnoise_sample on all osnoise instances and fire trace event.
*/
-static void trace_osnoise_sample(struct osnoise_sample *sample)
+static void record_osnoise_sample(struct osnoise_sample *sample)
{
struct osnoise_instance *inst;
struct trace_buffer *buffer;
+ trace_osnoise_sample(sample);
+
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
buffer = inst->tr->array_buffer.buffer;
- __trace_osnoise_sample(sample, buffer);
+ __record_osnoise_sample(sample, buffer);
}
rcu_read_unlock();
}
@@ -574,7 +549,7 @@ static void print_timerlat_headers(struct seq_file *s)
#endif /* CONFIG_PREEMPT_RT */
static void
-__trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
+__record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
{
struct ring_buffer_event *event;
struct timerlat_entry *entry;
@@ -594,15 +569,17 @@ __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buf
/*
* Record an timerlat_sample into the tracer buffer.
*/
-static void trace_timerlat_sample(struct timerlat_sample *sample)
+static void record_timerlat_sample(struct timerlat_sample *sample)
{
struct osnoise_instance *inst;
struct trace_buffer *buffer;
+ trace_timerlat_sample(sample);
+
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
buffer = inst->tr->array_buffer.buffer;
- __trace_timerlat_sample(sample, buffer);
+ __record_timerlat_sample(sample, buffer);
}
rcu_read_unlock();
}
@@ -1606,7 +1583,7 @@ static int run_osnoise(void)
/* Save interference stats info */
diff_osn_sample_stats(osn_var, &s);
- trace_osnoise_sample(&s);
+ record_osnoise_sample(&s);
notify_new_max_latency(max_noise);
@@ -1801,7 +1778,7 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
s.timer_latency = diff;
s.context = IRQ_CONTEXT;
- trace_timerlat_sample(&s);
+ record_timerlat_sample(&s);
if (osnoise_data.stop_tracing) {
if (time_to_us(diff) >= osnoise_data.stop_tracing) {
@@ -1920,7 +1897,7 @@ static int timerlat_main(void *data)
s.timer_latency = diff;
s.context = THREAD_CONTEXT;
- trace_timerlat_sample(&s);
+ record_timerlat_sample(&s);
notify_new_max_latency(diff);
@@ -2029,7 +2006,6 @@ static int start_kthread(unsigned int cpu)
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
- stop_per_cpu_kthreads();
return -ENOMEM;
}
@@ -2525,7 +2501,7 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
s.timer_latency = diff;
s.context = THREAD_URET;
- trace_timerlat_sample(&s);
+ record_timerlat_sample(&s);
notify_new_max_latency(diff);
@@ -2560,7 +2536,7 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
s.timer_latency = diff;
s.context = THREAD_CONTEXT;
- trace_timerlat_sample(&s);
+ record_timerlat_sample(&s);
if (osnoise_data.stop_tracing_total) {
if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 03d56f711ad1..72b699f909e8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -12,15 +12,19 @@
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/idr.h>
+#include <linux/btf.h>
+#include <linux/bpf.h>
+#include <linux/hashtable.h>
#include "trace_output.h"
+#include "trace_btf.h"
-/* must be a power of 2 */
-#define EVENT_HASHSIZE 128
+/* 2^7 = 128 */
+#define EVENT_HASH_BITS 7
DECLARE_RWSEM(trace_event_sem);
-static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
+static DEFINE_HASHTABLE(event_hash, EVENT_HASH_BITS);
enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
{
@@ -684,6 +688,88 @@ int trace_print_lat_context(struct trace_iterator *iter)
return !trace_seq_has_overflowed(s);
}
+#ifdef CONFIG_FUNCTION_TRACE_ARGS
+void print_function_args(struct trace_seq *s, unsigned long *args,
+ unsigned long func)
+{
+ const struct btf_param *param;
+ const struct btf_type *t;
+ const char *param_name;
+ char name[KSYM_NAME_LEN];
+ unsigned long arg;
+ struct btf *btf;
+ s32 tid, nr = 0;
+ int a, p, x;
+
+ trace_seq_printf(s, "(");
+
+ if (!args)
+ goto out;
+ if (lookup_symbol_name(func, name))
+ goto out;
+
+ /* TODO: Pass module name here too */
+ t = btf_find_func_proto(name, &btf);
+ if (IS_ERR_OR_NULL(t))
+ goto out;
+
+ param = btf_get_func_param(t, &nr);
+ if (!param)
+ goto out_put;
+
+ for (a = 0, p = 0; p < nr; a++, p++) {
+ if (p)
+ trace_seq_puts(s, ", ");
+
+ /* This only prints what the arch allows (6 args by default) */
+ if (a == FTRACE_REGS_MAX_ARGS) {
+ trace_seq_puts(s, "...");
+ break;
+ }
+
+ arg = args[a];
+
+ param_name = btf_name_by_offset(btf, param[p].name_off);
+ if (param_name)
+ trace_seq_printf(s, "%s=", param_name);
+ t = btf_type_skip_modifiers(btf, param[p].type, &tid);
+
+ switch (t ? BTF_INFO_KIND(t->info) : BTF_KIND_UNKN) {
+ case BTF_KIND_UNKN:
+ trace_seq_putc(s, '?');
+ /* Still print unknown type values */
+ fallthrough;
+ case BTF_KIND_PTR:
+ trace_seq_printf(s, "0x%lx", arg);
+ break;
+ case BTF_KIND_INT:
+ trace_seq_printf(s, "%ld", arg);
+ break;
+ case BTF_KIND_ENUM:
+ trace_seq_printf(s, "%ld", arg);
+ break;
+ default:
+ /* This does not handle complex arguments */
+ trace_seq_printf(s, "(%s)[0x%lx", btf_type_str(t), arg);
+ for (x = sizeof(long); x < t->size; x += sizeof(long)) {
+ trace_seq_putc(s, ':');
+ if (++a == FTRACE_REGS_MAX_ARGS) {
+ trace_seq_puts(s, "...]");
+ goto out_put;
+ }
+ trace_seq_printf(s, "0x%lx", args[a]);
+ }
+ trace_seq_putc(s, ']');
+ break;
+ }
+ }
+out_put:
+ btf_put(btf);
+out:
+ trace_seq_printf(s, ")");
+}
+#endif
+
/**
* ftrace_find_event - find a registered event
* @type: the type of event to look for
@@ -694,11 +780,8 @@ int trace_print_lat_context(struct trace_iterator *iter)
struct trace_event *ftrace_find_event(int type)
{
struct trace_event *event;
- unsigned key;
-
- key = type & (EVENT_HASHSIZE - 1);
- hlist_for_each_entry(event, &event_hash[key], node) {
+ hash_for_each_possible(event_hash, event, node, type) {
if (event->type == type)
return event;
}
@@ -753,7 +836,6 @@ void trace_event_read_unlock(void)
*/
int register_trace_event(struct trace_event *event)
{
- unsigned key;
int ret = 0;
down_write(&trace_event_sem);
@@ -786,9 +868,7 @@ int register_trace_event(struct trace_event *event)
if (event->funcs->binary == NULL)
event->funcs->binary = trace_nop_print;
- key = event->type & (EVENT_HASHSIZE - 1);
-
- hlist_add_head(&event->node, &event_hash[key]);
+ hash_add(event_hash, &event->node, event->type);
ret = event->type;
out:
@@ -803,7 +883,7 @@ EXPORT_SYMBOL_GPL(register_trace_event);
*/
int __unregister_trace_event(struct trace_event *event)
{
- hlist_del(&event->node);
+ hash_del(&event->node);
free_trace_event_type(event->type);
return 0;
}
@@ -1005,12 +1085,15 @@ enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
}
static void print_fn_trace(struct trace_seq *s, unsigned long ip,
- unsigned long parent_ip, long delta, int flags)
+ unsigned long parent_ip, long delta,
+ unsigned long *args, int flags)
{
ip += delta;
parent_ip += delta;
seq_print_ip_sym(s, ip, flags);
+ if (args)
+ print_function_args(s, args, ip);
if ((flags & TRACE_ITER_PRINT_PARENT) && parent_ip) {
trace_seq_puts(s, " <-");
@@ -1024,10 +1107,19 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
{
struct ftrace_entry *field;
struct trace_seq *s = &iter->seq;
+ unsigned long *args;
+ int args_size;
trace_assign_type(field, iter->ent);
- print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, flags);
+ args_size = iter->ent_size - offsetof(struct ftrace_entry, args);
+ if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long))
+ args = field->args;
+ else
+ args = NULL;
+
+ print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta,
+ args, flags);
trace_seq_putc(s, '\n');
return trace_handle_return(s);
@@ -1700,7 +1792,7 @@ trace_func_repeats_print(struct trace_iterator *iter, int flags,
trace_assign_type(field, iter->ent);
- print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, flags);
+ print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, NULL, flags);
trace_seq_printf(s, " (repeats: %u, last_ts:", field->count);
trace_print_time(s, iter,
iter->ts - FUNC_REPEATS_GET_DELTA_TS(field));
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index dca40f1f1da4..2e305364f2a9 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -41,5 +41,14 @@ extern struct rw_semaphore trace_event_sem;
#define SEQ_PUT_HEX_FIELD(s, x) \
trace_seq_putmem_hex(s, &(x), sizeof(x))
+#ifdef CONFIG_FUNCTION_TRACE_ARGS
+void print_function_args(struct trace_seq *s, unsigned long *args,
+ unsigned long func);
+#else
+static inline void print_function_args(struct trace_seq *s, unsigned long *args,
+ unsigned long func) {
+ trace_seq_puts(s, "()");
+}
+#endif
#endif
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 8f58ee1e8858..2eeecb6c95ee 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -770,6 +770,10 @@ static int check_prepare_btf_string_fetch(char *typename,
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+/*
+ * Add the entry code to store the 'argnum'th parameter and return the offset
+ * in the entry data buffer where the data will be stored.
+ */
static int __store_entry_arg(struct trace_probe *tp, int argnum)
{
struct probe_entry_arg *earg = tp->entry_arg;
@@ -793,6 +797,20 @@ static int __store_entry_arg(struct trace_probe *tp, int argnum)
tp->entry_arg = earg;
}
+ /*
+ * The entry code array is repeating the pair of
+ * [FETCH_OP_ARG(argnum)][FETCH_OP_ST_EDATA(offset of entry data buffer)]
+ * and the rest of entries are filled with [FETCH_OP_END].
+ *
+ * To reduce the redundant function parameter fetching, we scan the entry
+ * code array to find the FETCH_OP_ARG which already fetches the 'argnum'
+ * parameter. If it doesn't match, update 'offset' to find the last
+ * offset.
+ * If we find the FETCH_OP_END without matching FETCH_OP_ARG entry, we
+ * will save the entry with FETCH_OP_ARG and FETCH_OP_ST_EDATA, and
+ * return data offset so that caller can find the data offset in the entry
+ * data buffer.
+ */
offset = 0;
for (i = 0; i < earg->size - 1; i++) {
switch (earg->code[i].op) {
@@ -826,6 +844,16 @@ int traceprobe_get_entry_data_size(struct trace_probe *tp)
if (!earg)
return 0;
+ /*
+ * earg->code[] array has an operation sequence which is run in
+ * the entry handler.
+ * The sequence stopped by FETCH_OP_END and each data stored in
+ * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA
+ * stores the data at the data buffer + its offset, and all data are
+ * "unsigned long" size. The offset must be increased when a data is
+ * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the
+ * code array.
+ */
for (i = 0; i < earg->size; i++) {
switch (earg->code[i].op) {
case FETCH_OP_END:
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 96792bc4b092..854e5668f5ee 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -545,6 +545,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(BAD_BTF_TID, "Failed to get BTF type info."),\
C(BAD_TYPE4STR, "This type does not fit for string."),\
C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\
+ C(TOO_MANY_ARGS, "Too many arguments are specified"), \
C(TOO_MANY_EARGS, "Too many entry arguments specified"),
#undef C
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index af30586f1aea..a0db3404f7f7 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -188,8 +188,6 @@ static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
- else
- iter->private = NULL;
}
static void wakeup_trace_close(struct trace_iterator *iter)
@@ -242,7 +240,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
return;
local_irq_save(flags);
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, fregs);
local_irq_restore(flags);
atomic_dec(&data->disabled);
@@ -327,7 +325,7 @@ __trace_function(struct trace_array *tr,
if (is_graph(tr))
trace_graph_function(tr, ip, parent_ip, trace_ctx);
else
- trace_function(tr, ip, parent_ip, trace_ctx);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
}
static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index ccc762fbb69c..3386439ec9f6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -562,8 +562,14 @@ static int __trace_uprobe_create(int argc, const char **argv)
if (argc < 2)
return -ECANCELED;
- if (argc - 2 > MAX_TRACE_ARGS)
+
+ trace_probe_log_init("trace_uprobe", argc, argv);
+
+ if (argc - 2 > MAX_TRACE_ARGS) {
+ trace_probe_log_set_index(2);
+ trace_probe_log_err(0, TOO_MANY_ARGS);
return -E2BIG;
+ }
if (argv[0][1] == ':')
event = &argv[0][2];
@@ -582,7 +588,6 @@ static int __trace_uprobe_create(int argc, const char **argv)
return -ECANCELED;
}
- trace_probe_log_init("trace_uprobe", argc, argv);
trace_probe_log_set_index(1); /* filename is the 2nd argument */
*arg++ = '\0';
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1848ce7e2976..62719d2941c9 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -127,7 +127,7 @@ static void debug_print_probes(struct tracepoint_func *funcs)
return;
for (i = 0; funcs[i].func; i++)
- printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
+ printk(KERN_DEBUG "Probe %d : %pSb\n", i, funcs[i].func);
}
static struct tracepoint_func *