diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/filter.c | 28 | ||||
-rw-r--r-- | net/xdp/xsk.c | 4 | ||||
-rw-r--r-- | net/xdp/xsk.h | 4 | ||||
-rw-r--r-- | net/xdp/xskmap.c | 29 |
4 files changed, 49 insertions, 16 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index d062053994c7..d22895caa164 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3897,6 +3897,34 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { .arg2_type = ARG_ANYTHING, }; +/* XDP_REDIRECT works by a three-step process, implemented in the functions + * below: + * + * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target + * of the redirect and store it (along with some other metadata) in a per-CPU + * struct bpf_redirect_info. + * + * 2. When the program returns the XDP_REDIRECT return code, the driver will + * call xdp_do_redirect() which will use the information in struct + * bpf_redirect_info to actually enqueue the frame into a map type-specific + * bulk queue structure. + * + * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(), + * which will flush all the different bulk queues, thus completing the + * redirect. + * + * Pointers to the map entries will be kept around for this whole sequence of + * steps, protected by RCU. However, there is no top-level rcu_read_lock() in + * the core code; instead, the RCU protection relies on everything happening + * inside a single NAPI poll sequence, which means it's between a pair of calls + * to local_bh_disable()/local_bh_enable(). + * + * The map entries are marked as __rcu and the map code makes sure to + * dereference those pointers with rcu_dereference_check() in a way that works + * for both sections that to hold an rcu_read_lock() and sections that are + * called from NAPI without a separate rcu_read_lock(). The code below does not + * use RCU annotations, but relies on those in the map code. + */ void xdp_do_flush(void) { __dev_flush(); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cd62d4ba87a9..996da915f520 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -749,7 +749,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs) } static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, - struct xdp_sock ***map_entry) + struct xdp_sock __rcu ***map_entry) { struct xsk_map *map = NULL; struct xsk_map_node *node; @@ -785,7 +785,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs) * might be updates to the map between * xsk_get_map_list_entry() and xsk_map_try_sock_delete(). */ - struct xdp_sock **map_entry = NULL; + struct xdp_sock __rcu **map_entry = NULL; struct xsk_map *map; while ((map = xsk_get_map_list_entry(xs, &map_entry))) { diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index edcf249ad1f1..a4bc4749faac 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -31,7 +31,7 @@ struct xdp_mmap_offsets_v1 { struct xsk_map_node { struct list_head node; struct xsk_map *map; - struct xdp_sock **map_entry; + struct xdp_sock __rcu **map_entry; }; static inline struct xdp_sock *xdp_sk(struct sock *sk) @@ -40,7 +40,7 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk) } void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, - struct xdp_sock **map_entry); + struct xdp_sock __rcu **map_entry); void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id); int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, u16 queue_id); diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 9df75ea4a567..2e48d0e094d9 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -12,7 +12,7 @@ #include "xsk.h" static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, - struct xdp_sock **map_entry) + struct xdp_sock __rcu **map_entry) { struct xsk_map_node *node; @@ -42,7 +42,7 @@ static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) } static void xsk_map_sock_delete(struct xdp_sock *xs, - struct xdp_sock **map_entry) + struct xdp_sock __rcu **map_entry) { struct xsk_map_node *n, *tmp; @@ -124,6 +124,10 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) return insn - insn_buf; } +/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or + * by local_bh_disable() (from XDP calls inside NAPI). The + * rcu_read_lock_bh_held() below makes lockdep accept both. + */ static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) { struct xsk_map *m = container_of(map, struct xsk_map, map); @@ -131,12 +135,11 @@ static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) if (key >= map->max_entries) return NULL; - return READ_ONCE(m->xsk_map[key]); + return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held()); } static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) { - WARN_ON_ONCE(!rcu_read_lock_held()); return __xsk_map_lookup_elem(map, *(u32 *)key); } @@ -149,7 +152,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); - struct xdp_sock *xs, *old_xs, **map_entry; + struct xdp_sock __rcu **map_entry; + struct xdp_sock *xs, *old_xs; u32 i = *(u32 *)key, fd = *(u32 *)value; struct xsk_map_node *node; struct socket *sock; @@ -179,7 +183,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, } spin_lock_bh(&m->lock); - old_xs = READ_ONCE(*map_entry); + old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock)); if (old_xs == xs) { err = 0; goto out; @@ -191,7 +195,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, goto out; } xsk_map_sock_add(xs, node); - WRITE_ONCE(*map_entry, xs); + rcu_assign_pointer(*map_entry, xs); if (old_xs) xsk_map_sock_delete(old_xs, map_entry); spin_unlock_bh(&m->lock); @@ -208,7 +212,8 @@ out: static int xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); - struct xdp_sock *old_xs, **map_entry; + struct xdp_sock __rcu **map_entry; + struct xdp_sock *old_xs; int k = *(u32 *)key; if (k >= map->max_entries) @@ -216,7 +221,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) spin_lock_bh(&m->lock); map_entry = &m->xsk_map[k]; - old_xs = xchg(map_entry, NULL); + old_xs = unrcu_pointer(xchg(map_entry, NULL)); if (old_xs) xsk_map_sock_delete(old_xs, map_entry); spin_unlock_bh(&m->lock); @@ -231,11 +236,11 @@ static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) } void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, - struct xdp_sock **map_entry) + struct xdp_sock __rcu **map_entry) { spin_lock_bh(&map->lock); - if (READ_ONCE(*map_entry) == xs) { - WRITE_ONCE(*map_entry, NULL); + if (rcu_access_pointer(*map_entry) == xs) { + rcu_assign_pointer(*map_entry, NULL); xsk_map_sock_delete(xs, map_entry); } spin_unlock_bh(&map->lock); |