summaryrefslogtreecommitdiff
path: root/kernel/bpf/cpumap.c
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2017-10-16 12:19:44 +0200
committerDavid S. Miller <davem@davemloft.net>2017-10-18 12:12:18 +0100
commitf9419f7bd7a5318b636a941a0214c5cdfa6f6530 (patch)
tree20eed724e856cfae290ed8b3730c6b2a9af819ea /kernel/bpf/cpumap.c
parent1c601d829ab0d7ac3ac44853f83db2206afe67fc (diff)
downloadlwn-f9419f7bd7a5318b636a941a0214c5cdfa6f6530.tar.gz
lwn-f9419f7bd7a5318b636a941a0214c5cdfa6f6530.zip
bpf: cpumap add tracepoints
This adds two tracepoint to the cpumap. One for the enqueue side trace_xdp_cpumap_enqueue() and one for the kthread dequeue side trace_xdp_cpumap_kthread(). To mitigate the tracepoint overhead, these are invoked during the enqueue/dequeue bulking phases, thus amortizing the cost. The obvious use-cases are for debugging and monitoring. The non-intuitive use-case is using these as a feedback loop to know the system load. One can imagine auto-scaling by reducing, adding or activating more worker CPUs on demand. V4: tracepoint remove time_limit info, instead add sched info V8: intro struct bpf_cpu_map_entry members cpu+map_id in this patch Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/cpumap.c')
-rw-r--r--kernel/bpf/cpumap.c24
1 files changed, 19 insertions, 5 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ee7adf4352dd..b4358d84ddf1 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -24,6 +24,7 @@
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/capability.h>
+#include <trace/events/xdp.h>
#include <linux/netdevice.h> /* netif_receive_skb_core */
#include <linux/etherdevice.h> /* eth_type_trans */
@@ -43,6 +44,8 @@ struct xdp_bulk_queue {
/* Struct for every remote "destination" CPU in map */
struct bpf_cpu_map_entry {
+ u32 cpu; /* kthread CPU and map index */
+ int map_id; /* Back reference to map */
u32 qsize; /* Queue size placeholder for map lookup */
/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
@@ -280,15 +283,16 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
- unsigned int processed = 0, drops = 0;
+ unsigned int processed = 0, drops = 0, sched = 0;
struct xdp_pkt *xdp_pkt;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
__set_current_state(TASK_INTERRUPTIBLE);
schedule();
+ sched = 1;
} else {
- cond_resched();
+ sched = cond_resched();
}
__set_current_state(TASK_RUNNING);
@@ -318,6 +322,9 @@ static int cpu_map_kthread_run(void *data)
if (++processed == 8)
break;
}
+ /* Feedback loop via tracepoint */
+ trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
+
local_bh_enable(); /* resched point, may call do_softirq() */
}
__set_current_state(TASK_RUNNING);
@@ -354,7 +361,9 @@ struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id)
if (err)
goto free_queue;
- rcpu->qsize = qsize;
+ rcpu->cpu = cpu;
+ rcpu->map_id = map_id;
+ rcpu->qsize = qsize;
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
@@ -584,6 +593,8 @@ const struct bpf_map_ops cpu_map_ops = {
static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
struct xdp_bulk_queue *bq)
{
+ unsigned int processed = 0, drops = 0;
+ const int to_cpu = rcpu->cpu;
struct ptr_ring *q;
int i;
@@ -599,13 +610,16 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdp_pkt);
if (err) {
- /* Free xdp_pkt */
- page_frag_free(xdp_pkt);
+ drops++;
+ page_frag_free(xdp_pkt); /* Free xdp_pkt */
}
+ processed++;
}
bq->count = 0;
spin_unlock(&q->producer_lock);
+ /* Feedback loop via tracepoints */
+ trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu);
return 0;
}