From d8616ee2affcff37c5d315310da557a694a3303d Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 24 May 2022 15:53:11 +0800 Subject: bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues During TCP sockmap redirect pressure test, the following warning is triggered: WARNING: CPU: 3 PID: 2145 at net/core/stream.c:205 sk_stream_kill_queues+0xbc/0xd0 CPU: 3 PID: 2145 Comm: iperf Kdump: loaded Tainted: G W 5.10.0+ #9 Call Trace: inet_csk_destroy_sock+0x55/0x110 inet_csk_listen_stop+0xbb/0x380 tcp_close+0x41b/0x480 inet_release+0x42/0x80 __sock_release+0x3d/0xa0 sock_close+0x11/0x20 __fput+0x9d/0x240 task_work_run+0x62/0x90 exit_to_user_mode_prepare+0x110/0x120 syscall_exit_to_user_mode+0x27/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The reason we observed is that: When the listener is closing, a connection may have completed the three-way handshake but not accepted, and the client has sent some packets. The child sks in accept queue release by inet_child_forget()->inet_csk_destroy_sock(), but psocks of child sks have not released. To fix, add sock_map_destroy to release psocks. Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220524075311.649153-1-wangyufen@huawei.com --- net/core/skmsg.c | 1 + net/core/sock_map.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'net/core') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 22b983ade0e7..7e03f96e441b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -715,6 +715,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) psock->eval = __SK_NONE; psock->sk_proto = prot; psock->saved_unhash = prot->unhash; + psock->saved_destroy = prot->destroy; psock->saved_close = prot->close; psock->saved_write_space = sk->sk_write_space; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 81d4b4756a02..9f08ccfaf6da 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1561,6 +1561,29 @@ void sock_map_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(sock_map_unhash); +void sock_map_destroy(struct sock *sk) +{ + void (*saved_destroy)(struct sock *sk); + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock_get(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->destroy) + sk->sk_prot->destroy(sk); + return; + } + + saved_destroy = psock->saved_destroy; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + sk_psock_stop(psock, true); + sk_psock_put(sk, psock); + saved_destroy(sk); +} +EXPORT_SYMBOL_GPL(sock_map_destroy); + void sock_map_close(struct sock *sk, long timeout) { void (*saved_close)(struct sock *sk, long timeout); -- cgit v1.2.3 From ec43908dd556b2292f028c6e412261689405ba6e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Jun 2022 10:24:35 +0800 Subject: net: skb: use auto-generation to convert skb drop reason to string It is annoying to add new skb drop reasons to 'enum skb_drop_reason' and TRACE_SKB_DROP_REASON in trace/event/skb.h, and it's easy to forget to add the new reasons we added to TRACE_SKB_DROP_REASON. TRACE_SKB_DROP_REASON is used to convert drop reason of type number to string. For now, the string we passed to user space is exactly the same as the name in 'enum skb_drop_reason' with a 'SKB_DROP_REASON_' prefix. Therefore, we can use 'auto-generation' to generate these drop reasons to string at build time. The new source 'dropreason_str.c' will be auto generated during build time, which contains the string array 'const char * const drop_reasons[]'. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason.h | 2 ++ include/trace/events/skb.h | 89 +--------------------------------------------- net/core/.gitignore | 1 + net/core/Makefile | 23 +++++++++++- net/core/drop_monitor.c | 13 ------- net/core/skbuff.c | 3 ++ 6 files changed, 29 insertions(+), 102 deletions(-) create mode 100644 net/core/.gitignore (limited to 'net/core') diff --git a/include/net/dropreason.h b/include/net/dropreason.h index ecd18b7b1364..013ff0f2543e 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -181,4 +181,6 @@ enum skb_drop_reason { SKB_DR_SET(name, reason); \ } while (0) +extern const char * const drop_reasons[]; + #endif diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a477bf907498..45264e4bb254 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,92 +9,6 @@ #include #include -#define TRACE_SKB_DROP_REASON \ - EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ - EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ - EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ - EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ - EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ - EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ - EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ - EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ - EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ - EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ - EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ - UNICAST_IN_L2_MULTICAST) \ - EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ - EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ - EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ - EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ - EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \ - EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ - TCP_MD5UNEXPECTED) \ - EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ - EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ - EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ - EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ - EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ - EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ - EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ - EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \ - EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ - EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ - TCP_INVALID_SEQUENCE) \ - EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ - EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ - EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ - EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ - EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ - TCP_ACK_UNSENT_DATA) \ - EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \ - TCP_OFO_QUEUE_PRUNE) \ - EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ - EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ - BPF_CGROUP_EGRESS) \ - EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ - EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ - EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ - EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ - EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ - EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ - EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ - EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ - EM(SKB_DROP_REASON_XDP, XDP) \ - EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ - EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ - EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ - EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ - EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ - EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ - EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ - EM(SKB_DROP_REASON_NOMEM, NOMEM) \ - EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ - EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ - EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ - EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ - EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ - EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ - EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ - EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ - EMe(SKB_DROP_REASON_MAX, MAX) - -#undef EM -#undef EMe - -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -TRACE_SKB_DROP_REASON - -#undef EM -#undef EMe -#define EM(a, b) { a, #b }, -#define EMe(a, b) { a, #b } - /* * Tracepoint for free an sk_buff: */ @@ -121,8 +35,7 @@ TRACE_EVENT(kfree_skb, TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", __entry->skbaddr, __entry->protocol, __entry->location, - __print_symbolic(__entry->reason, - TRACE_SKB_DROP_REASON)) + drop_reasons[__entry->reason]) ); TRACE_EVENT(consume_skb, diff --git a/net/core/.gitignore b/net/core/.gitignore new file mode 100644 index 000000000000..df1e74372cce --- /dev/null +++ b/net/core/.gitignore @@ -0,0 +1 @@ +dropreason_str.c diff --git a/net/core/Makefile b/net/core/Makefile index a8e4f737692b..e8ce3bd283a6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -4,7 +4,8 @@ # obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \ + flow_dissector.o dropreason_str.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -39,3 +40,23 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o + +clean-files := dropreason_str.c + +quiet_cmd_dropreason_str = GEN $@ +cmd_dropreason_str = awk -F ',' 'BEGIN{ print "\#include \n"; \ + print "const char * const drop_reasons[] = {" }\ + /^enum skb_drop/ { dr=1; }\ + /^\};/ { dr=0; }\ + /^\tSKB_DROP_REASON_/ {\ + if (dr) {\ + sub(/\tSKB_DROP_REASON_/, "", $$1);\ + printf "\t[SKB_DROP_REASON_%s] = \"%s\",\n", $$1, $$1;\ + }\ + }\ + END{ print "};" }' $< > $@ + +$(obj)/dropreason_str.c: $(srctree)/include/net/dropreason.h + $(call cmd,dropreason_str) + +$(obj)/dropreason_str.o: $(obj)/dropreason_str.c diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 41cac0e4834e..4ad1decce724 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -48,19 +48,6 @@ static int trace_state = TRACE_OFF; static bool monitor_hw; -#undef EM -#undef EMe - -#define EM(a, b) [a] = #b, -#define EMe(a, b) [a] = #b - -/* drop_reasons is used to translate 'enum skb_drop_reason' to string, - * which is reported to user space. - */ -static const char * const drop_reasons[] = { - TRACE_SKB_DROP_REASON -}; - /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b3559cb1d82..b661040c100e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -91,6 +91,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +/* The array 'drop_reasons' is auto-generated in dropreason_str.c */ +EXPORT_SYMBOL(drop_reasons); + /** * skb_panic - private function for out-of-line support * @skb: buffer -- cgit v1.2.3 From d62607c3fe45911b2331fac073355a8c914bbde2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jun 2022 21:39:55 -0700 Subject: net: rename reference+tracking helpers Netdev reference helpers have a dev_ prefix for historic reasons. Renaming the old helpers would be too much churn but we can rename the tracking ones which are relatively recent and should be the default for new code. Rename: dev_hold_track() -> netdev_hold() dev_put_track() -> netdev_put() dev_replace_track() -> netdev_ref_replace() Link: https://lore.kernel.org/r/20220608043955.919359-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/eql.c | 4 ++-- drivers/net/macsec.c | 4 ++-- drivers/net/macvlan.c | 4 ++-- drivers/net/netconsole.c | 2 +- drivers/net/vrf.c | 8 ++++---- include/linux/netdevice.h | 24 ++++++++++++------------ include/net/xfrm.h | 2 +- net/8021q/vlan_dev.c | 4 ++-- net/ax25/af_ax25.c | 7 ++++--- net/ax25/ax25_dev.c | 6 +++--- net/bridge/br_if.c | 10 +++++----- net/core/dev.c | 10 +++++----- net/core/dev_ioctl.c | 4 ++-- net/core/drop_monitor.c | 5 +++-- net/core/dst.c | 8 ++++---- net/core/failover.c | 4 ++-- net/core/link_watch.c | 2 +- net/core/neighbour.c | 18 +++++++++--------- net/core/net-sysfs.c | 8 ++++---- net/core/netpoll.c | 2 +- net/core/pktgen.c | 6 +++--- net/ethtool/ioctl.c | 4 ++-- net/ethtool/netlink.c | 6 +++--- net/ethtool/netlink.h | 2 +- net/ipv4/devinet.c | 4 ++-- net/ipv4/fib_semantics.c | 11 ++++++----- net/ipv4/ipmr.c | 2 +- net/ipv4/route.c | 7 +++---- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/addrconf_core.c | 2 +- net/ipv6/ip6_gre.c | 8 ++++---- net/ipv6/ip6_tunnel.c | 4 ++-- net/ipv6/ip6_vti.c | 4 ++-- net/ipv6/ip6mr.c | 2 +- net/ipv6/route.c | 10 +++++----- net/ipv6/sit.c | 4 ++-- net/ipv6/xfrm6_policy.c | 4 ++-- net/llc/af_llc.c | 2 +- net/openvswitch/vport-netdev.c | 6 +++--- net/packet/af_packet.c | 12 ++++++------ net/sched/act_mirred.c | 6 +++--- net/sched/sch_api.c | 2 +- net/sched/sch_generic.c | 11 ++++++----- net/smc/smc_pnet.c | 7 ++++--- net/switchdev/switchdev.c | 4 ++-- net/tipc/bearer.c | 4 ++-- net/xfrm/xfrm_device.c | 2 +- 48 files changed, 141 insertions(+), 137 deletions(-) (limited to 'net/core') diff --git a/drivers/net/eql.c b/drivers/net/eql.c index 557ca8ff9dec..ca3e4700a813 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -225,7 +225,7 @@ static void eql_kill_one_slave(slave_queue_t *queue, slave_t *slave) list_del(&slave->list); queue->num_slaves--; slave->dev->flags &= ~IFF_SLAVE; - dev_put_track(slave->dev, &slave->dev_tracker); + netdev_put(slave->dev, &slave->dev_tracker); kfree(slave); } @@ -399,7 +399,7 @@ static int __eql_insert_slave(slave_queue_t *queue, slave_t *slave) if (duplicate_slave) eql_kill_one_slave(queue, duplicate_slave); - dev_hold_track(slave->dev, &slave->dev_tracker, GFP_ATOMIC); + netdev_hold(slave->dev, &slave->dev_tracker, GFP_ATOMIC); list_add(&slave->list, &queue->all_slaves); queue->num_slaves++; slave->dev->flags |= IFF_SLAVE; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 817577e713d7..815738c0e067 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3462,7 +3462,7 @@ static int macsec_dev_init(struct net_device *dev) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); /* Get macsec's reference to real_dev */ - dev_hold_track(real_dev, &macsec->dev_tracker, GFP_KERNEL); + netdev_hold(real_dev, &macsec->dev_tracker, GFP_KERNEL); return 0; } @@ -3710,7 +3710,7 @@ static void macsec_free_netdev(struct net_device *dev) free_percpu(macsec->secy.tx_sc.stats); /* Get rid of the macsec's reference to real_dev */ - dev_put_track(macsec->real_dev, &macsec->dev_tracker); + netdev_put(macsec->real_dev, &macsec->dev_tracker); } static void macsec_setup(struct net_device *dev) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index eff75beb1395..5b46a6526fc6 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -915,7 +915,7 @@ static int macvlan_init(struct net_device *dev) port->count += 1; /* Get macvlan's reference to lowerdev */ - dev_hold_track(lowerdev, &vlan->dev_tracker, GFP_KERNEL); + netdev_hold(lowerdev, &vlan->dev_tracker, GFP_KERNEL); return 0; } @@ -1185,7 +1185,7 @@ static void macvlan_dev_free(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); /* Get rid of the macvlan's reference to lowerdev */ - dev_put_track(vlan->lowerdev, &vlan->dev_tracker); + netdev_put(vlan->lowerdev, &vlan->dev_tracker); } void macvlan_common_setup(struct net_device *dev) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index ab8cd5551020..ddac61d79145 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -721,7 +721,7 @@ restart: __netpoll_cleanup(&nt->np); spin_lock_irqsave(&target_list_lock, flags); - dev_put_track(nt->np.dev, &nt->np.dev_tracker); + netdev_put(nt->np.dev, &nt->np.dev_tracker); nt->np.dev = NULL; nt->enabled = false; stopped = true; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cfc30ce4c6e1..40445a12c682 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -814,8 +814,8 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) */ if (rt6) { dst = &rt6->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } @@ -1061,8 +1061,8 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) */ if (rth) { dst = &rth->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..e2e5088888b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3981,8 +3981,8 @@ static inline void netdev_tracker_free(struct net_device *dev, #endif } -static inline void dev_hold_track(struct net_device *dev, - netdevice_tracker *tracker, gfp_t gfp) +static inline void netdev_hold(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) { if (dev) { __dev_hold(dev); @@ -3990,8 +3990,8 @@ static inline void dev_hold_track(struct net_device *dev, } } -static inline void dev_put_track(struct net_device *dev, - netdevice_tracker *tracker) +static inline void netdev_put(struct net_device *dev, + netdevice_tracker *tracker) { if (dev) { netdev_tracker_free(dev, tracker); @@ -4004,11 +4004,11 @@ static inline void dev_put_track(struct net_device *dev, * @dev: network device * * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. + * Try using netdev_hold() instead. */ static inline void dev_hold(struct net_device *dev) { - dev_hold_track(dev, NULL, GFP_ATOMIC); + netdev_hold(dev, NULL, GFP_ATOMIC); } /** @@ -4016,17 +4016,17 @@ static inline void dev_hold(struct net_device *dev) * @dev: network device * * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. + * Try using netdev_put() instead. */ static inline void dev_put(struct net_device *dev) { - dev_put_track(dev, NULL); + netdev_put(dev, NULL); } -static inline void dev_replace_track(struct net_device *odev, - struct net_device *ndev, - netdevice_tracker *tracker, - gfp_t gfp) +static inline void netdev_ref_replace(struct net_device *odev, + struct net_device *ndev, + netdevice_tracker *tracker, + gfp_t gfp) { if (odev) netdev_tracker_free(odev, tracker); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c39d910d4b45..9287712ad977 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1923,7 +1923,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); } } #else diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 839f2020b015..e3dff2df6f54 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -615,7 +615,7 @@ static int vlan_dev_init(struct net_device *dev) return -ENOMEM; /* Get vlan's reference to real_dev */ - dev_hold_track(real_dev, &vlan->dev_tracker, GFP_KERNEL); + netdev_hold(real_dev, &vlan->dev_tracker, GFP_KERNEL); return 0; } @@ -852,7 +852,7 @@ static void vlan_dev_free(struct net_device *dev) vlan->vlan_pcpu_stats = NULL; /* Get rid of the vlan's reference to real_dev */ - dev_put_track(vlan->real_dev, &vlan->dev_tracker); + netdev_put(vlan->real_dev, &vlan->dev_tracker); } void vlan_setup(struct net_device *dev) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 95393bb2760b..1a5c0b071aa3 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -102,7 +102,8 @@ again: ax25_disconnect(s, ENETUNREACH); s->ax25_dev = NULL; if (sk->sk_socket) { - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, + &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); @@ -1065,7 +1066,7 @@ static int ax25_release(struct socket *sock) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -1146,7 +1147,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); - dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + netdev_hold(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); } done: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 95a76d571c44..ab88b6ac5401 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -60,7 +60,7 @@ void ax25_dev_device_up(struct net_device *dev) refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; - dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); ax25_dev->forward = NULL; ax25_dev->device_up = true; @@ -136,7 +136,7 @@ unlock_put: spin_unlock_bh(&ax25_dev_lock); ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); + netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -205,7 +205,7 @@ void __exit ax25_dev_free(void) ax25_dev = ax25_dev_list; while (ax25_dev != NULL) { s = ax25_dev; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev = ax25_dev->next; kfree(s); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 47fcbade7389..a84a7cfb9d6d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -274,7 +274,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->br = NULL; p->dev = NULL; - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kobject_put(&p->kobj); } @@ -423,7 +423,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return ERR_PTR(-ENOMEM); p->br = br; - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; @@ -434,7 +434,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_stp_port_timer_init(p); err = br_multicast_add_port(p); if (err) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); p = ERR_PTR(err); } @@ -615,7 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -725,7 +725,7 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: diff --git a/net/core/dev.c b/net/core/dev.c index 08ce317fcec8..c9d96b85a825 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7463,7 +7463,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->ref_nr = 1; adj->private = private; adj->ignore = false; - dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL); + netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); @@ -7492,7 +7492,7 @@ remove_symlinks: if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree(adj); return ret; @@ -7534,7 +7534,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, list_del_rcu(&adj->list); pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree_rcu(adj, rcu); } @@ -10062,7 +10062,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); - dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL); + netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL); list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -10868,7 +10868,7 @@ void unregister_netdevice_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { - dev_put_track(dev, &dev->dev_registered_tracker); + netdev_put(dev, &dev->dev_registered_tracker); net_set_todo(dev); } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4f6be442ae7e..7674bb9f3076 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -384,10 +384,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -ENODEV; if (!netif_is_bridge_master(dev)) return -EOPNOTSUPP; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); rtnl_lock(); return err; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 4ad1decce724..804d02fc245f 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -864,7 +864,8 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); + netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker, + GFP_ATOMIC); return hw_metadata; @@ -880,7 +881,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) { - dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker); + netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); diff --git a/net/core/dst.c b/net/core/dst.c index d16c2c9bfebd..bc9c9be4e080 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - dev_put_track(dst->dev, &dst->dev_tracker); + netdev_put(dst->dev, &dst->dev_tracker); lwtstate_put(dst->lwtstate); @@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst) dst->input = dst_discard; dst->output = dst_discard_out; dst->dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, + GFP_ATOMIC); } EXPORT_SYMBOL(dst_dev_put); diff --git a/net/core/failover.c b/net/core/failover.c index dcaa92a85ea2..864d2d83eff4 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev, return ERR_PTR(-ENOMEM); rcu_assign_pointer(failover->ops, ops); - dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL); dev->priv_flags |= IFF_FAILOVER; rcu_assign_pointer(failover->failover_dev, dev); @@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover) failover_dev->name); failover_dev->priv_flags &= ~IFF_FAILOVER; - dev_put_track(failover_dev, &failover->dev_tracker); + netdev_put(failover_dev, &failover->dev_tracker); spin_lock(&failover_lock); list_del(&failover->list); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a244d3bade7d..aa6cb1f90966 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -110,7 +110,7 @@ static void linkwatch_add_event(struct net_device *dev) spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); - dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 54625287ee5b..d8ec70622ecb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -624,7 +624,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, memcpy(n->primary_key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC); /* Protocol specific setup. */ if (tbl->constructor && (error = tbl->constructor(n)) < 0) { @@ -770,10 +770,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &n->dev_tracker, GFP_KERNEL); if (tbl->pconstructor && tbl->pconstructor(n)) { - dev_put_track(dev, &n->dev_tracker); + netdev_put(dev, &n->dev_tracker); kfree(n); n = NULL; goto out; @@ -805,7 +805,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); return 0; } @@ -838,7 +838,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); } return -ENOENT; @@ -879,7 +879,7 @@ void neigh_destroy(struct neighbour *neigh) if (dev->netdev_ops->ndo_neigh_destroy) dev->netdev_ops->ndo_neigh_destroy(dev, neigh); - dev_put_track(dev, &neigh->dev_tracker); + netdev_put(dev, &neigh->dev_tracker); neigh_parms_put(neigh->parms); neigh_dbg(2, "neigh %p is destroyed\n", neigh); @@ -1671,13 +1671,13 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); return NULL; } @@ -1708,7 +1708,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - dev_put_track(parms->dev, &parms->dev_tracker); + netdev_put(parms->dev, &parms->dev_tracker); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e319e242dddf..d49fc974e630 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1016,7 +1016,7 @@ static void rx_queue_release(struct kobject *kobj) #endif memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *rx_queue_namespace(struct kobject *kobj) @@ -1056,7 +1056,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, @@ -1619,7 +1619,7 @@ static void netdev_queue_release(struct kobject *kobj) struct netdev_queue *queue = to_netdev_queue(kobj); memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *netdev_queue_namespace(struct kobject *kobj) @@ -1659,7 +1659,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index db724463e7cd..5d27067b72d5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np) if (!np->dev) goto out; __netpoll_cleanup(np); - dev_put_track(np->dev, &np->dev_tracker); + netdev_put(np->dev, &np->dev_tracker); np->dev = NULL; out: rtnl_unlock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 84b62cd7bc57..88906ba6d9a7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2100,7 +2100,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, /* Clean old setups */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } @@ -3807,7 +3807,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); out2: - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3901,7 +3901,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 326e14ee05db..d05ff6a17a1f 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2010,7 +2010,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) * removal of the device. */ busy = true; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); if (rc == 0) { @@ -2034,7 +2034,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); busy = false; (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5fe8f4ae2ceb..e26079e11835 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -402,7 +402,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -414,7 +414,7 @@ err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); kfree(reply_data); kfree(req_info); return ret; @@ -550,7 +550,7 @@ static int ethnl_default_start(struct netlink_callback *cb) * same parser as for non-dump (doit) requests is used, it * would take reference to the device if it finds one */ - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); req_info->dev = NULL; } if (ret < 0) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 7919ddb2371c..c0d587611854 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -237,7 +237,7 @@ struct ethnl_req_info { static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) { - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); } /** diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b2366ad540e6..92b778e423df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -244,7 +244,7 @@ void in_dev_finish_destroy(struct in_device *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + netdev_put(dev, &idev->dev_tracker); if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else @@ -272,7 +272,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) dev_disable_lro(dev); /* Reference in_dev->dev */ - dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..a5439a8414d4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -211,7 +211,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - dev_put_track(nhc->nhc_dev, &nhc->nhc_dev_tracker); + netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); @@ -1057,7 +1057,8 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL); + netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, + GFP_KERNEL); nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; @@ -1141,7 +1142,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; nh->fib_nh_dev = dev; - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_LINK; return 0; } @@ -1195,7 +1196,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, "No egress device for nexthop gateway"); goto out; } - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; @@ -1229,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, } nh->fib_nh_dev = in_dev->dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_HOST; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 13e6329784fb..8324e541d193 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -691,7 +691,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + netdev_put(dev, &v->dev_tracker); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 356f535f3443..2d16bcc7d346 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1550,9 +1550,8 @@ void rt_flush_dev(struct net_device *dev) if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, + &rt->dst.dev_tracker, GFP_ATOMIC); list_move(&rt->rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); @@ -2851,7 +2850,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - dev_hold_track(new->dev, &new->dev_tracker, GFP_ATOMIC); + netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6fde0b184791..3d0dfa6cf9f9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -75,7 +75,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b1932502e9e..3497ad1362c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -398,13 +398,13 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (ndev->cnf.forwarding) dev_disable_lro(dev); /* We refer to the device */ - dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); if (snmp6_alloc_dev(ndev) < 0) { netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", __func__); neigh_parms_release(&nd_tbl, ndev->nd_parms); - dev_put_track(dev, &ndev->dev_tracker); + netdev_put(dev, &ndev->dev_tracker); kfree(ndev); return ERR_PTR(err); } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 881d1477d24a..507a8353a6bd 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -263,7 +263,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + netdev_put(dev, &idev->dev_tracker); if (!idev->dead) { pr_warn("Freeing alive inet6 device %p\n", idev); return; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4e37f7c29900..3e22cbe5966a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -398,7 +398,7 @@ static void ip6erspan_tunnel_uninit(struct net_device *dev) ip6erspan_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } static void ip6gre_tunnel_uninit(struct net_device *dev) @@ -411,7 +411,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) if (ign->fb_tunnel_dev == dev) WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } @@ -1495,7 +1495,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; cleanup_dst_cache_init: @@ -1887,7 +1887,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; cleanup_dst_cache_init: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 19325b7600bb..689de5eb604e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -381,7 +381,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) else ip6_tnl_unlink(ip6n, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } /** @@ -1889,7 +1889,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; destroy_dst: diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3a434d75925c..8fe59a79e800 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -293,7 +293,7 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -936,7 +936,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4e74bc61a3db..d4aad41c9849 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -741,7 +741,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + netdev_put(dev, &v->dev_tracker); return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d25dc83bac62..0be01a4d48c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -182,9 +182,9 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) if (rt_dev == dev) { rt->dst.dev = blackhole_netdev; - dev_replace_track(rt_dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(rt_dev, blackhole_netdev, + &rt->dst.dev_tracker, + GFP_ATOMIC); handled = true; } if (handled) @@ -607,7 +607,7 @@ static void rt6_probe_deferred(struct work_struct *w) addrconf_addr_solict_mult(&work->target, &mcaddr); ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); - dev_put_track(work->dev, &work->dev_tracker); + netdev_put(work->dev, &work->dev_tracker); kfree(work); } @@ -661,7 +661,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } else { INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; - dev_hold_track(dev, &work->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC); work->dev = dev; schedule_work(&work->work); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c0b138c20992..4f1721865fda 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -521,7 +521,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_del_prl(tunnel, NULL); } dst_cache_reset(&tunnel->dst_cache); - dev_put_track(dev, &tunnel->dev_tracker); + netdev_put(dev, &tunnel->dev_tracker); } static int ipip6_err(struct sk_buff *skb, u32 info) @@ -1463,7 +1463,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e64e427a51cf..4a4b0e49ec92 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,11 +73,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { - dev_put_track(dev, &xdst->u.dst.dev_tracker); + netdev_put(dev, &xdst->u.dst.dev_tracker); return -ENODEV; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7f555d2e5357..da7fe94bea2e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -224,7 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - dev_put_track(llc->dev, &llc->dev_tracker); + netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); llc_sk_free(sk); out: diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b498dac4e1e0..2f61d5bdce1a 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -115,7 +115,7 @@ error_master_upper_dev_unlink: error_unlock: rtnl_unlock(); error_put: - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -137,7 +137,7 @@ static void vport_netdev_free(struct rcu_head *rcu) { struct vport *vport = container_of(rcu, struct vport, rcu); - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); ovs_vport_free(vport); } @@ -173,7 +173,7 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev); - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); vport->dev = NULL; rtnl_unlock(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ca6e92a22923..d08c4728523b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3134,7 +3134,7 @@ static int packet_release(struct socket *sock) packet_cached_dev_reset(po); if (po->prot_hook.dev) { - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); @@ -3235,15 +3235,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); if (unlikely(unlisted)) { po->prot_hook.dev = NULL; WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { - dev_hold_track(dev, &po->prot_hook.dev_tracker, - GFP_ATOMIC); + netdev_hold(dev, &po->prot_hook.dev_tracker, + GFP_ATOMIC); po->prot_hook.dev = dev; WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); @@ -4167,8 +4167,8 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - dev_put_track(po->prot_hook.dev, - &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, + &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index ebb92fb072ab..a1d70cf86843 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -79,7 +79,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -181,7 +181,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(ndev); odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); - dev_put_track(odev, &m->tcfm_dev_tracker); + netdev_put(odev, &m->tcfm_dev_tracker); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; } @@ -402,7 +402,7 @@ static int mirred_device_event(struct notifier_block *unused, list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3c0e8ea2dbb..bf87b50837a8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1292,7 +1292,7 @@ err_out5: if (ops->destroy) ops->destroy(sch); err_out3: - dev_put_track(dev, &sch->dev_tracker); + netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: module_put(ops->owner); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dba0b3e24af5..cc6eabee2830 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -541,7 +541,7 @@ static void dev_watchdog(struct timer_list *t) spin_unlock(&dev->tx_global_lock); if (release) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); } void __netdev_watchdog_up(struct net_device *dev) @@ -551,7 +551,8 @@ void __netdev_watchdog_up(struct net_device *dev) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) - dev_hold_track(dev, &dev->watchdog_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->watchdog_dev_tracker, + GFP_ATOMIC); } } EXPORT_SYMBOL_GPL(__netdev_watchdog_up); @@ -565,7 +566,7 @@ static void dev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); netif_tx_unlock_bh(dev); } @@ -975,7 +976,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold_track(dev, &sch->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); refcount_set(&sch->refcnt, 1); return sch; @@ -1067,7 +1068,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put_track(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7055ed10e316..4c3bf6db7038 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -120,7 +120,8 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + netdev_put(pnetelem->ndev, + &pnetelem->dev_tracker); pr_warn_ratelimited("smc: net device %s " "erased user defined " "pnetid %.16s\n", @@ -196,7 +197,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { - dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); + netdev_hold(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); pnetelem->ndev = ndev; rc = 0; pr_warn_ratelimited("smc: adding net device %s with " @@ -227,7 +228,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + netdev_put(pnetelem->ndev, &pnetelem->dev_tracker); pnetelem->ndev = NULL; rc = 0; pr_warn_ratelimited("smc: removing net device %s with " diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 474f76383033..8cc42aea19c7 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -64,7 +64,7 @@ void switchdev_deferred_process(void) while ((dfitem = switchdev_deferred_dequeue())) { dfitem->func(dfitem->dev, dfitem->data); - dev_put_track(dfitem->dev, &dfitem->dev_tracker); + netdev_put(dfitem->dev, &dfitem->dev_tracker); kfree(dfitem); } } @@ -91,7 +91,7 @@ static int switchdev_deferred_enqueue(struct net_device *dev, dfitem->dev = dev; dfitem->func = func; memcpy(dfitem->data, data, data_len); - dev_hold_track(dev, &dfitem->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dfitem->dev_tracker, GFP_ATOMIC); spin_lock_bh(&deferred_lock); list_add_tail(&dfitem->list, &deferred); spin_unlock_bh(&deferred_lock); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 932c87b98eca..35cac7733fd3 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -788,7 +788,7 @@ int tipc_attach_loopback(struct net *net) if (!dev) return -ENODEV; - dev_hold_track(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; @@ -801,7 +801,7 @@ void tipc_detach_loopback(struct net *net) struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); - dev_put_track(net->loopback_dev, &tn->loopback_pt.dev_tracker); + netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); } /* Caller should hold rtnl_lock to protect the bearer */ diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 35c7e89b2e7d..637ca8838436 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -275,7 +275,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = NULL; xso->dir = 0; xso->real_dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); if (err != -EOPNOTSUPP) return err; -- cgit v1.2.3 From 9962acefbcb92736c268aafe5f52200948f60f3e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:37 -0700 Subject: net: adopt u64_stats_t in struct pcpu_sw_netstats As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 8 ++++---- drivers/net/usb/usbnet.c | 8 ++++---- drivers/net/vxlan/vxlan_core.c | 8 ++++---- include/linux/netdevice.h | 16 ++++++++-------- include/net/ip_tunnels.h | 4 ++-- net/bridge/br_netlink.c | 8 ++++---- net/bridge/br_vlan.c | 36 ++++++++++++++++++++---------------- net/core/dev.c | 18 +++++++++--------- net/dsa/slave.c | 8 ++++---- 9 files changed, 59 insertions(+), 55 deletions(-) (limited to 'net/core') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 815738c0e067..c881e1bf6f6e 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -523,8 +523,8 @@ static void count_tx(struct net_device *dev, int ret, int len) struct pcpu_sw_netstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += len; + u64_stats_inc(&stats->tx_packets); + u64_stats_add(&stats->tx_bytes, len); u64_stats_update_end(&stats->syncp); } } @@ -825,8 +825,8 @@ static void count_rx(struct net_device *dev, int len) struct pcpu_sw_netstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += len; + u64_stats_inc(&stats->rx_packets); + u64_stats_add(&stats->rx_bytes, len); u64_stats_update_end(&stats->syncp); } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1cb6dab3e2d0..dc79811535c2 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -337,8 +337,8 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) skb->protocol = eth_type_trans (skb, dev->net); flags = u64_stats_update_begin_irqsave(&stats64->syncp); - stats64->rx_packets++; - stats64->rx_bytes += skb->len; + u64_stats_inc(&stats64->rx_packets); + u64_stats_add(&stats64->rx_bytes, skb->len); u64_stats_update_end_irqrestore(&stats64->syncp, flags); netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n", @@ -1258,8 +1258,8 @@ static void tx_complete (struct urb *urb) unsigned long flags; flags = u64_stats_update_begin_irqsave(&stats64->syncp); - stats64->tx_packets += entry->packets; - stats64->tx_bytes += entry->length; + u64_stats_add(&stats64->tx_packets, entry->packets); + u64_stats_add(&stats64->tx_bytes, entry->length); u64_stats_update_end_irqrestore(&stats64->syncp, flags); } else { dev->net->stats.tx_errors++; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 265d4a0245e7..8b0710b576c2 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2385,15 +2385,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); u64_stats_update_begin(&tx_stats->syncp); - tx_stats->tx_packets++; - tx_stats->tx_bytes += len; + u64_stats_inc(&tx_stats->tx_packets); + u64_stats_add(&tx_stats->tx_bytes, len); u64_stats_update_end(&tx_stats->syncp); vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len); if (__netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += len; + u64_stats_inc(&rx_stats->rx_packets); + u64_stats_add(&rx_stats->rx_bytes, len); u64_stats_update_end(&rx_stats->syncp); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX, len); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2e5088888b1..89afa4f7747d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2636,10 +2636,10 @@ struct packet_offload { /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); @@ -2656,8 +2656,8 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->rx_bytes += len; - tstats->rx_packets++; + u64_stats_add(&tstats->rx_bytes, len); + u64_stats_inc(&tstats->rx_packets); u64_stats_update_end(&tstats->syncp); } @@ -2668,8 +2668,8 @@ static inline void dev_sw_netstats_tx_add(struct net_device *dev, struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += len; - tstats->tx_packets += packets; + u64_stats_add(&tstats->tx_bytes, len); + u64_stats_add(&tstats->tx_packets, packets); u64_stats_update_end(&tstats->syncp); } diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c24fa934221d..70cbc4a72669 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -456,8 +456,8 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); } else { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index bb01776d2d88..1ef14a099c6b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1770,10 +1770,10 @@ static int br_fill_linkxstats(struct sk_buff *skb, if (v->vid == pvid) vxi.flags |= BRIDGE_VLAN_INFO_PVID; br_vlan_get_stats(v, &stats); - vxi.rx_bytes = stats.rx_bytes; - vxi.rx_packets = stats.rx_packets; - vxi.tx_bytes = stats.tx_bytes; - vxi.tx_packets = stats.tx_packets; + vxi.rx_bytes = u64_stats_read(&stats.rx_bytes); + vxi.rx_packets = u64_stats_read(&stats.rx_packets); + vxi.tx_bytes = u64_stats_read(&stats.tx_bytes); + vxi.tx_packets = u64_stats_read(&stats.tx_packets); if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) goto nla_put_failure; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 0f5e75ccac79..6e53dc991409 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -505,8 +505,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); - stats->tx_bytes += skb->len; - stats->tx_packets++; + u64_stats_add(&stats->tx_bytes, skb->len); + u64_stats_inc(&stats->tx_packets); u64_stats_update_end(&stats->syncp); } @@ -624,8 +624,8 @@ static bool __allowed_ingress(const struct net_bridge *br, if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += skb->len; - stats->rx_packets++; + u64_stats_add(&stats->rx_bytes, skb->len); + u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } @@ -1379,16 +1379,16 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, cpu_stats = per_cpu_ptr(v->stats, i); do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - rxpackets = cpu_stats->rx_packets; - rxbytes = cpu_stats->rx_bytes; - txbytes = cpu_stats->tx_bytes; - txpackets = cpu_stats->tx_packets; + rxpackets = u64_stats_read(&cpu_stats->rx_packets); + rxbytes = u64_stats_read(&cpu_stats->rx_bytes); + txbytes = u64_stats_read(&cpu_stats->tx_bytes); + txpackets = u64_stats_read(&cpu_stats->tx_packets); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->tx_bytes += txbytes; - stats->tx_packets += txpackets; + u64_stats_add(&stats->rx_packets, rxpackets); + u64_stats_add(&stats->rx_bytes, rxbytes); + u64_stats_add(&stats->tx_bytes, txbytes); + u64_stats_add(&stats->tx_packets, txpackets); } } @@ -1779,14 +1779,18 @@ static bool br_vlan_stats_fill(struct sk_buff *skb, return false; br_vlan_get_stats(v, &stats); - if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes, + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, + u64_stats_read(&stats.rx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, - stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) || - nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes, + u64_stats_read(&stats.rx_packets), + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, + u64_stats_read(&stats.tx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, - stats.tx_packets, BRIDGE_VLANDB_STATS_PAD)) + u64_stats_read(&stats.tx_packets), + BRIDGE_VLANDB_STATS_PAD)) goto out_err; nla_nest_end(skb, nest); diff --git a/net/core/dev.c b/net/core/dev.c index c9d96b85a825..511cf5d3aade 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10459,23 +10459,23 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, int cpu; for_each_possible_cpu(cpu) { + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; const struct pcpu_sw_netstats *stats; - struct pcpu_sw_netstats tmp; unsigned int start; stats = per_cpu_ptr(netstats, cpu); do { start = u64_stats_fetch_begin_irq(&stats->syncp); - tmp.rx_packets = stats->rx_packets; - tmp.rx_bytes = stats->rx_bytes; - tmp.tx_packets = stats->tx_packets; - tmp.tx_bytes = stats->tx_bytes; + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); - s->rx_packets += tmp.rx_packets; - s->rx_bytes += tmp.rx_bytes; - s->tx_packets += tmp.tx_packets; - s->tx_bytes += tmp.tx_bytes; + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; + s->tx_packets += tx_packets; + s->tx_bytes += tx_bytes; } } EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 801a5d445833..2e1ac638d135 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -935,10 +935,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, s = per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin_irq(&s->syncp); - tx_packets = s->tx_packets; - tx_bytes = s->tx_bytes; - rx_packets = s->rx_packets; - rx_bytes = s->rx_bytes; + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); } while (u64_stats_fetch_retry_irq(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; -- cgit v1.2.3 From 958751e0807d5b2316cc4c7c2834ba9a0fe1b9e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:38 -0700 Subject: devlink: adopt u64_stats_t As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 5cc88490f18f..db61f3a341cb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7946,8 +7946,8 @@ static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, } struct devlink_stats { - u64 rx_bytes; - u64 rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_packets; struct u64_stats_sync syncp; }; @@ -8104,12 +8104,12 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, cpu_stats = per_cpu_ptr(trap_stats, i); do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - rx_packets = cpu_stats->rx_packets; - rx_bytes = cpu_stats->rx_bytes; + rx_packets = u64_stats_read(&cpu_stats->rx_packets); + rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->rx_packets += rx_packets; - stats->rx_bytes += rx_bytes; + u64_stats_add(&stats->rx_packets, rx_packets); + u64_stats_add(&stats->rx_bytes, rx_bytes); } } @@ -8127,11 +8127,13 @@ devlink_trap_group_stats_put(struct sk_buff *msg, return -EMSGSIZE; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - stats.rx_packets, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_packets), + DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - stats.rx_bytes, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_bytes), + DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -8171,11 +8173,13 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - stats.rx_packets, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_packets), + DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - stats.rx_bytes, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_bytes), + DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -11641,8 +11645,8 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, stats = this_cpu_ptr(trap_stats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += skb_len; - stats->rx_packets++; + u64_stats_add(&stats->rx_bytes, skb_len); + u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } -- cgit v1.2.3 From c6cce71e746800097ad2ee71607d53dc723ad7ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:39 -0700 Subject: drop_monitor: adopt u64_stats_t As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/drop_monitor.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net/core') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 804d02fc245f..75501e1bdd25 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -55,7 +55,7 @@ static bool monitor_hw; static DEFINE_MUTEX(net_dm_mutex); struct net_dm_stats { - u64 dropped; + u64_stats_t dropped; struct u64_stats_sync syncp; }; @@ -530,7 +530,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore, unlock_free: spin_unlock_irqrestore(&data->drop_queue.lock, flags); u64_stats_update_begin(&data->stats.syncp); - data->stats.dropped++; + u64_stats_inc(&data->stats.dropped); u64_stats_update_end(&data->stats.syncp); consume_skb(nskb); } @@ -986,7 +986,7 @@ net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink, unlock_free: spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); u64_stats_update_begin(&hw_data->stats.syncp); - hw_data->stats.dropped++; + u64_stats_inc(&hw_data->stats.dropped); u64_stats_update_end(&hw_data->stats.syncp); net_dm_hw_metadata_free(n_hw_metadata); free: @@ -1433,10 +1433,10 @@ static void net_dm_stats_read(struct net_dm_stats *stats) do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - dropped = cpu_stats->dropped; + dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->dropped += dropped; + u64_stats_add(&stats->dropped, dropped); } } @@ -1452,7 +1452,7 @@ static int net_dm_stats_put(struct sk_buff *msg) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, - stats.dropped, NET_DM_ATTR_PAD)) + u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -1477,10 +1477,10 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats) do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - dropped = cpu_stats->dropped; + dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->dropped += dropped; + u64_stats_add(&stats->dropped, dropped); } } @@ -1496,7 +1496,7 @@ static int net_dm_hw_stats_put(struct sk_buff *msg) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, - stats.dropped, NET_DM_ATTR_PAD)) + u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); -- cgit v1.2.3 From 63fbdd3c77ec216a91a87d22821d2ecd96669a5d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:31 -0700 Subject: net: use DEBUG_NET_WARN_ON_ONCE() in __release_sock() Check against skb dst in socket backlog has never triggered in past years. Keep the check omly for CONFIG_DEBUG_NET=y builds. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 2ff40dd0a7a6..f5062d9e1222 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2844,7 +2844,7 @@ void __release_sock(struct sock *sk) do { next = skb->next; prefetch(next); - WARN_ON_ONCE(skb_dst_is_noref(skb)); + DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb)); skb_mark_not_on_list(skb); sk_backlog_rcv(sk, skb); -- cgit v1.2.3 From 76458faeb285b1536abc3e75ea318564543269c3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:32 -0700 Subject: net: use DEBUG_NET_WARN_ON_ONCE() in dev_loopback_xmit() One check in dev_loopback_xmit() has not caught issues in the past. Keep it for CONFIG_DEBUG_NET=y builds only. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 511cf5d3aade..39cb9055da37 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3925,7 +3925,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_LOOPBACK; if (skb->ip_summed == CHECKSUM_NONE) skb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb)); skb_dst_force(skb); netif_rx(skb); return 0; -- cgit v1.2.3 From c59f02f848672f92bcea90306240822239d68049 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:34 -0700 Subject: net: use WARN_ON_ONCE() in sk_stream_kill_queues() sk_stream_kill_queues() has three checks which have been useful to detect kernel bugs in the past. However they are potentially a problem because they could flood the syslog. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/stream.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/stream.c b/net/core/stream.c index 06b36c730ce8..ccc083cdef23 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -196,13 +196,13 @@ void sk_stream_kill_queues(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); /* Next, the write queue. */ - WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); + WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim_final(sk); - WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk->sk_forward_alloc); + WARN_ON_ONCE(sk->sk_wmem_queued); + WARN_ON_ONCE(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket -- cgit v1.2.3 From 7890e2f09d437f79e67cd37bf3e820fa0cddf2be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:36 -0700 Subject: net: use DEBUG_NET_WARN_ON_ONCE() in skb_release_head_state() Remove this check from fast path unless CONFIG_DEBUG_NET=y Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b661040c100e..cf83d9b8f41d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -728,7 +728,7 @@ void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { - WARN_ON(in_hardirq()); + DEBUG_NET_WARN_ON_ONCE(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) -- cgit v1.2.3 From ee2640df2393141a2975af726802b349d51713be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:37 -0700 Subject: net: add debug checks in napi_consume_skb and __napi_alloc_skb() Commit 6454eca81eae ("net: Use lockdep_assert_in_softirq() in napi_consume_skb()") added a check in napi_consume_skb() which is a bit weak. napi_consume_skb() and __napi_alloc_skb() should only be used from BH context, not from hard irq or nmi context, otherwise we could have races. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cf83d9b8f41d..fec75f8bf1f4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -560,6 +560,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, struct sk_buff *skb; void *data; + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, @@ -981,7 +982,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - lockdep_assert_in_softirq(); + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); if (!skb_unref(skb)) return; -- cgit v1.2.3 From fd9ea57f4e9514f9d0f0dec505eefd99a8faa148 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:38 -0700 Subject: net: add napi_get_frags_check() helper This is a follow up of commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") When/if we increase MAX_SKB_FRAGS, we better make sure the old bug will not come back. Adding a check in napi_get_frags() would be costly, even if using DEBUG_NET_WARN_ON_ONCE(). Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/dev.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 39cb9055da37..8958c4227b67 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6351,6 +6351,23 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6378,6 +6395,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); + napi_get_frags_check(napi); /* Create kthread for this napi if dev->threaded is set. * Clear dev->threaded if kthread creation failed so that * threaded mode will not be enabled in napi_enable(). -- cgit v1.2.3 From 100fdd1faf50557558e2911af4be32e515cb8036 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 23:34:07 -0700 Subject: net: remove SK_MEM_QUANTUM and SK_MEM_QUANTUM_SHIFT Due to memcg interface, SK_MEM_QUANTUM is effectively PAGE_SIZE. This might change in the future, but it seems better to avoid the confusion. Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/sock.h | 8 +++----- net/core/sock.c | 16 ++++++++-------- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/udp.c | 10 +++++----- net/mptcp/protocol.c | 8 ++++---- net/sctp/protocol.c | 4 ++-- 8 files changed, 26 insertions(+), 28 deletions(-) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index 5c5265269899..298897bbfb3a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1532,8 +1532,6 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind); void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -#define SK_MEM_QUANTUM ((int)PAGE_SIZE) -#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 @@ -1545,7 +1543,7 @@ static inline long sk_prot_mem_limits(const struct sock *sk, int index) static inline int sk_mem_pages(int amt) { - return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; + return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT; } static inline bool sk_has_account(struct sock *sk) @@ -1594,7 +1592,7 @@ static inline void sk_mem_reclaim(struct sock *sk) reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - if (reclaimable >= SK_MEM_QUANTUM) + if (reclaimable >= (int)PAGE_SIZE) __sk_mem_reclaim(sk, reclaimable); } @@ -1613,7 +1611,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk) reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - if (reclaimable > SK_MEM_QUANTUM) + if (reclaimable > (int)PAGE_SIZE) __sk_mem_reclaim(sk, reclaimable - 1); } diff --git a/net/core/sock.c b/net/core/sock.c index f5062d9e1222..063fd7680a84 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -991,7 +991,7 @@ EXPORT_SYMBOL(sock_set_mark); static void sock_release_reserved_memory(struct sock *sk, int bytes) { /* Round down bytes to multiple of pages */ - bytes &= ~(SK_MEM_QUANTUM - 1); + bytes = round_down(bytes, PAGE_SIZE); WARN_ON(bytes > sk->sk_reserved_mem); sk->sk_reserved_mem -= bytes; @@ -1028,9 +1028,9 @@ static int sock_reserve_memory(struct sock *sk, int bytes) mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); return -ENOMEM; } - sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc += pages << PAGE_SHIFT; - sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT; + sk->sk_reserved_mem += pages << PAGE_SHIFT; return 0; } @@ -3003,10 +3003,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) { int ret, amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc += amt << PAGE_SHIFT; ret = __sk_mem_raise_allocated(sk, size, amt, kind); if (!ret) - sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc -= amt << PAGE_SHIFT; return ret; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -3034,12 +3034,12 @@ EXPORT_SYMBOL(__sk_mem_reduce_allocated); /** * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated * @sk: socket - * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + * @amount: number of bytes (rounded down to a PAGE_SIZE multiple) */ void __sk_mem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + sk->sk_forward_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } EXPORT_SYMBOL(__sk_mem_reclaim); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9984d23a7f3e..9e696758a4c2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4661,11 +4661,11 @@ void __init tcp_init(void) max_wshare = min(4UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit); - init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare); - init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_rmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_rmem[1] = 131072; init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..3fb117022558 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5287,7 +5287,7 @@ new_range: before(TCP_SKB_CB(skb)->end_seq, start)) { /* Do not attempt collapsing tiny skbs */ if (range_truesize != head->truesize || - end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + end - start >= SKB_WITH_OVERHEAD(PAGE_SIZE)) { tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); } else { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c054431e358..8ab98e1aca67 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3367,7 +3367,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size) if (size <= sk->sk_forward_alloc) return; amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + sk->sk_forward_alloc += amt << PAGE_SHIFT; sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa9f2ec3dc46..bbc9970fa2e9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1461,11 +1461,11 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, sk->sk_forward_alloc += size; - amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); + amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); sk->sk_forward_alloc -= amt; if (amt) - __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); + __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT); atomic_sub(size, &sk->sk_rmem_alloc); @@ -1558,7 +1558,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) spin_lock(&list->lock); if (size >= sk->sk_forward_alloc) { amt = sk_mem_pages(size); - delta = amt << SK_MEM_QUANTUM_SHIFT; + delta = amt << PAGE_SHIFT; if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { err = -ENOBUFS; spin_unlock(&list->lock); @@ -3263,8 +3263,8 @@ EXPORT_SYMBOL(udp_flow_hashrnd); static void __udp_sysctl_init(struct net *net) { - net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM; - net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM; + net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE; + net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE; #ifdef CONFIG_NET_L3_MASTER_DEV net->ipv4.sysctl_udp_l3mdev_accept = 0; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 17e13396024a..080a630d6902 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -167,8 +167,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, static void __mptcp_rmem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } @@ -327,7 +327,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) return true; amt = sk_mem_pages(size); - amount = amt << SK_MEM_QUANTUM_SHIFT; + amount = amt << PAGE_SHIFT; msk->rmem_fwd_alloc += amount; if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) { if (ssk->sk_forward_alloc < amount) { @@ -972,7 +972,7 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk) lockdep_assert_held_once(&sk->sk_lock.slock); - if (reclaimable > SK_MEM_QUANTUM) + if (reclaimable > (int)PAGE_SIZE) __mptcp_rmem_reclaim(sk, reclaimable - 1); sk_mem_reclaim_partial(sk); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 35928fefae33..fa500ea3a1f1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1523,11 +1523,11 @@ static __init int sctp_init(void) limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ + sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); - sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; + sysctl_sctp_wmem[0] = PAGE_SIZE; sysctl_sctp_wmem[1] = 16*1024; sysctl_sctp_wmem[2] = max(64*1024, max_share); -- cgit v1.2.3 From 0defbb0af775ef037913786048d099bbe8b9a2c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 23:34:08 -0700 Subject: net: add per_cpu_fw_alloc field to struct proto Each protocol having a ->memory_allocated pointer gets a corresponding per-cpu reserve, that following patches will use. Instead of having reserved bytes per socket, we want to have per-cpu reserves. Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 + include/net/tcp.h | 2 ++ include/net/udp.h | 1 + net/core/sock.c | 4 ++++ net/decnet/af_decnet.c | 4 ++++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_ipv4.c | 3 +++ net/ipv4/udp.c | 4 ++++ net/ipv4/udplite.c | 3 +++ net/ipv6/tcp_ipv6.c | 3 +++ net/ipv6/udp.c | 3 +++ net/ipv6/udplite.c | 3 +++ net/mptcp/protocol.c | 3 +++ net/sctp/socket.c | 7 +++++++ 14 files changed, 43 insertions(+) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index 298897bbfb3a..825f8cbf791f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1254,6 +1254,7 @@ struct proto { void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ + int __percpu *per_cpu_fw_alloc; struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e99f5c61f84..4794cae4577e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -253,6 +253,8 @@ extern long sysctl_tcp_mem[3]; #define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */ extern atomic_long_t tcp_memory_allocated; +DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); + extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; diff --git a/include/net/udp.h b/include/net/udp.h index b83a00330566..b60eea2e3fae 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -95,6 +95,7 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; +DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ extern long sysctl_udp_mem[3]; diff --git a/net/core/sock.c b/net/core/sock.c index 063fd7680a84..f96efc95e334 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3798,6 +3798,10 @@ int proto_register(struct proto *prot, int alloc_slab) pr_err("%s: missing sysctl_mem\n", prot->name); return -EINVAL; } + if (prot->memory_allocated && !prot->per_cpu_fw_alloc) { + pr_err("%s: missing per_cpu_fw_alloc\n", prot->name); + return -EINVAL; + } if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index dc92a67baea3..aa4f43f52499 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -149,6 +149,7 @@ static DEFINE_RWLOCK(dn_hash_lock); static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; static struct hlist_head dn_wild_sk; static atomic_long_t decnet_memory_allocated; +static DEFINE_PER_CPU(int, decnet_memory_per_cpu_fw_alloc); static int __dn_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen, int flags); @@ -454,7 +455,10 @@ static struct proto dn_proto = { .owner = THIS_MODULE, .enter_memory_pressure = dn_enter_memory_pressure, .memory_pressure = &dn_memory_pressure, + .memory_allocated = &decnet_memory_allocated, + .per_cpu_fw_alloc = &decnet_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_decnet_mem, .sysctl_wmem = sysctl_decnet_wmem, .sysctl_rmem = sysctl_decnet_rmem, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9e696758a4c2..e6bdf8e2c09a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -294,6 +294,8 @@ EXPORT_SYMBOL(sysctl_tcp_mem); atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); +DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); #if IS_ENABLED(CONFIG_SMC) DEFINE_STATIC_KEY_FALSE(tcp_have_smc); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe8f23b95d32..fda811a5251f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3045,7 +3045,10 @@ struct proto tcp_prot = { .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bbc9970fa2e9..6172b4750a88 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -125,6 +125,8 @@ EXPORT_SYMBOL(sysctl_udp_mem); atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(udp_memory_allocated); +DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); +EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) @@ -2946,6 +2948,8 @@ struct proto udp_prot = { .psock_update_sk_prot = udp_bpf_update_proto, #endif .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index cd1cd68adeec..6e08a76ae1e7 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -51,7 +51,10 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37dd4aa91c6..c72448ba6dc9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2159,7 +2159,10 @@ struct proto tcpv6_prot = { .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 55afd7f39c04..be074f07073a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1740,7 +1740,10 @@ struct proto udpv6_prot = { #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index fbb700d3f437..b70725856259 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -48,7 +48,10 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udplite_table, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 080a630d6902..9563124ac8af 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3437,7 +3437,10 @@ static struct proto mptcp_prot = { .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, .sockets_allocated = &mptcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6d37d2dfb3da..05174acd981a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; +static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc); struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) @@ -9657,7 +9658,10 @@ struct proto sctp_prot = { .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, + .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, + .sockets_allocated = &sctp_sockets_allocated, }; @@ -9700,7 +9704,10 @@ struct proto sctpv6_prot = { .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, + .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, + .sockets_allocated = &sctp_sockets_allocated, }; #endif /* IS_ENABLED(CONFIG_IPV6) */ -- cgit v1.2.3 From 4890b686f4088c90432149bd6de567e621266fa2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 23:34:11 -0700 Subject: net: keep sk->sk_forward_alloc as small as possible Currently, tcp_memory_allocated can hit tcp_mem[] limits quite fast. Each TCP socket can forward allocate up to 2 MB of memory, even after flow became less active. 10,000 sockets can have reserved 20 GB of memory, and we have no shrinker in place to reclaim that. Instead of trying to reclaim the extra allocations in some places, just keep sk->sk_forward_alloc values as small as possible. This should not impact performance too much now we have per-cpu reserves: Changes to tcp_memory_allocated should not be too frequent. For sockets not using SO_RESERVE_MEM: - idle sockets (no packets in tx/rx queues) have zero forward alloc. - non idle sockets have a forward alloc smaller than one page. Note: - Removal of SK_RECLAIM_CHUNK and SK_RECLAIM_THRESHOLD is left to MPTCP maintainers as a follow up. Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/sock.h | 29 ++--------------------------- net/core/datagram.c | 3 --- net/ipv4/tcp.c | 7 ------- net/ipv4/tcp_input.c | 4 ---- net/ipv4/tcp_timer.c | 19 ++++--------------- net/iucv/af_iucv.c | 2 -- net/mptcp/protocol.c | 2 +- net/sctp/sm_statefuns.c | 2 -- net/sctp/socket.c | 5 ----- net/sctp/stream_interleave.c | 2 -- net/sctp/ulpqueue.c | 4 ---- 11 files changed, 7 insertions(+), 72 deletions(-) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index cf288f7e9019..0063e8410a4e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1627,19 +1627,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk) sk_mem_reclaim(sk); } -static inline void sk_mem_reclaim_partial(struct sock *sk) -{ - int reclaimable; - - if (!sk_has_account(sk)) - return; - - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - if (reclaimable > (int)PAGE_SIZE) - __sk_mem_reclaim(sk, reclaimable - 1); -} - static inline void sk_mem_charge(struct sock *sk, int size) { if (!sk_has_account(sk)) @@ -1647,29 +1634,17 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } -/* the following macros control memory reclaiming in sk_mem_uncharge() +/* the following macros control memory reclaiming in mptcp_rmem_uncharge() */ #define SK_RECLAIM_THRESHOLD (1 << 21) #define SK_RECLAIM_CHUNK (1 << 20) static inline void sk_mem_uncharge(struct sock *sk, int size) { - int reclaimable; - if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - /* Avoid a possible overflow. - * TCP send queues can make this happen, if sk_mem_reclaim() - * is not called and more than 2 GBytes are released at once. - * - * If we reach 2 MBytes, reclaim 1 MBytes right now, there is - * no need to hold that much forward allocation anyway. - */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); + sk_mem_reclaim(sk); } /* diff --git a/net/core/datagram.c b/net/core/datagram.c index 50f4faeea76c..35791f86bd1a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram); void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { consume_skb(skb); - sk_mem_reclaim_partial(sk); } EXPORT_SYMBOL(skb_free_datagram); @@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); skb_orphan(skb); - sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ @@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) NULL); kfree_skb(skb); - sk_mem_reclaim_partial(sk); return err; } EXPORT_SYMBOL(skb_kill_datagram); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e6bdf8e2c09a..14ebb4ec4a51 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -858,9 +858,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, { struct sk_buff *skb; - if (unlikely(tcp_under_memory_pressure(sk))) - sk_mem_reclaim_partial(sk); - skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); if (likely(skb)) { bool mem_scheduled; @@ -2764,8 +2761,6 @@ void __tcp_close(struct sock *sk, long timeout) __kfree_skb(skb); } - sk_mem_reclaim(sk); - /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ if (sk->sk_state == TCP_CLOSE) goto adjudge_to_death; @@ -2873,7 +2868,6 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - sk_mem_reclaim(sk); if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); @@ -2951,7 +2945,6 @@ void tcp_write_queue_purge(struct sock *sk) } tcp_rtx_queue_purge(sk); INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); - sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); tcp_sk(sk)->packets_out = 0; inet_csk(sk)->icsk_backoff = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3fb117022558..fdc7beb81b68 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -805,7 +805,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * restart window, so that we send ACKs quickly. */ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); - sk_mem_reclaim(sk); } } icsk->icsk_ack.lrcvtime = now; @@ -4390,7 +4389,6 @@ void tcp_fin(struct sock *sk) skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); - sk_mem_reclaim(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); @@ -5336,7 +5334,6 @@ static bool tcp_prune_ofo_queue(struct sock *sk) tcp_drop_reason(sk, rb_to_skb(node), SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); if (!prev || goal <= 0) { - sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; @@ -5383,7 +5380,6 @@ static int tcp_prune_queue(struct sock *sk) skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); - sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d..2208755e8efc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -290,15 +290,13 @@ void tcp_delack_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - sk_mem_reclaim_partial(sk); - if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) - goto out; + return; if (time_after(icsk->icsk_ack.timeout, jiffies)) { sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); - goto out; + return; } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; @@ -317,10 +315,6 @@ void tcp_delack_timer_handler(struct sock *sk) tcp_send_ack(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } - -out: - if (tcp_under_memory_pressure(sk)) - sk_mem_reclaim(sk); } @@ -600,11 +594,11 @@ void tcp_write_timer_handler(struct sock *sk) if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending) - goto out; + return; if (time_after(icsk->icsk_timeout, jiffies)) { sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); - goto out; + return; } tcp_mstamp_refresh(tcp_sk(sk)); @@ -626,9 +620,6 @@ void tcp_write_timer_handler(struct sock *sk) tcp_probe_timer(sk); break; } - -out: - sk_mem_reclaim(sk); } static void tcp_write_timer(struct timer_list *t) @@ -743,8 +734,6 @@ static void tcp_keepalive_timer (struct timer_list *t) elapsed = keepalive_time_when(tp) - elapsed; } - sk_mem_reclaim(sk); - resched: inet_csk_reset_keepalive_timer (sk, elapsed); goto out; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a0385ddbffcf..498a0c35b7bb 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -278,8 +278,6 @@ static void iucv_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_error_queue); - sk_mem_reclaim(sk); - if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive iucv socket %p\n", sk); return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9563124ac8af..e0fb9f96c45c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -975,7 +975,7 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk) if (reclaimable > (int)PAGE_SIZE) __mptcp_rmem_reclaim(sk, reclaimable - 1); - sk_mem_reclaim_partial(sk); + sk_mem_reclaim(sk); } static void mptcp_mem_reclaim_partial(struct sock *sk) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 52edee1322fc..f6ee7f4040c1 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6590,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; - } else { - sk_mem_reclaim(sk); } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 05174acd981a..171f1a35d205 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1824,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sctp_wspace(asoc) < (int)msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - if (sk_under_memory_pressure(sk)) - sk_mem_reclaim(sk); - if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); @@ -9195,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, goto do_error; if (signal_pending(current)) goto do_interrupted; - if (sk_under_memory_pressure(sk)) - sk_mem_reclaim(sk); if ((int)msg_len <= sctp_wspace(asoc) && sk_wmem_schedule(sk, msg_len)) break; diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 6b13f737ebf2..bb22b71df7a3 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) sctp_intl_start_pd(ulpq, gfp); - - sk_mem_reclaim(asoc->base.sk); } static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid, diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 407fed46931b..0a8510a0c5e6 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1100,12 +1100,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, else if (retval == 1) sctp_ulpq_reasm_drain(ulpq); } - - sk_mem_reclaim(asoc->base.sk); } - - /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ -- cgit v1.2.3 From 0f2c2693988aeeb4c83a581fe58a28d526eecd39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 23:34:12 -0700 Subject: net: unexport __sk_mem_{raise|reduce}_allocated These two helpers are only used from core networking. Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- net/core/sock.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index f96efc95e334..697d5c8e2f0d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2987,7 +2987,6 @@ suppress_allocation: return 0; } -EXPORT_SYMBOL(__sk_mem_raise_allocated); /** * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated @@ -3029,7 +3028,6 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } -EXPORT_SYMBOL(__sk_mem_reduce_allocated); /** * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated -- cgit v1.2.3 From 219160be496f7f9cd105c5708e37cf22ab4ce0c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Jun 2022 20:30:16 -0700 Subject: tcp: sk_forced_mem_schedule() optimization sk_memory_allocated_add() has three callers, and returns to them @memory_allocated. sk_forced_mem_schedule() is one of them, and ignores the returned value. Change sk_memory_allocated_add() to return void. Change sock_reserve_memory() and __sk_mem_raise_allocated() to call sk_memory_allocated(). This removes one cache line miss [1] for RPC workloads, as first skbs in TCP write queue and receive queue go through sk_forced_mem_schedule(). [1] Cache line holding tcp_memory_allocated. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Shakeel Butt Signed-off-by: David S. Miller --- include/net/sock.h | 3 +-- net/core/sock.c | 9 ++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index 0063e8410a4e..304a5e39d41e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1412,7 +1412,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) -static inline long +static inline void sk_memory_allocated_add(struct sock *sk, int amt) { int local_reserve; @@ -1424,7 +1424,6 @@ sk_memory_allocated_add(struct sock *sk, int amt) atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } preempt_enable(); - return sk_memory_allocated(sk); } static inline void diff --git a/net/core/sock.c b/net/core/sock.c index 697d5c8e2f0d..92a0296ccb18 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1019,7 +1019,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return -ENOMEM; /* pre-charge to forward_alloc */ - allocated = sk_memory_allocated_add(sk, pages); + sk_memory_allocated_add(sk, pages); + allocated = sk_memory_allocated(sk); /* If the system goes into memory pressure with this * precharge, give up and return error. */ @@ -2906,11 +2907,13 @@ EXPORT_SYMBOL(sk_wait_data); */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { - struct proto *prot = sk->sk_prot; - long allocated = sk_memory_allocated_add(sk, amt); bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; + struct proto *prot = sk->sk_prot; bool charged = true; + long allocated; + sk_memory_allocated_add(sk, amt); + allocated = sk_memory_allocated(sk); if (memcg_charge && !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, gfp_memcg_charge()))) -- cgit v1.2.3 From 49ae83fc4fd027a4b4cf56bd2c94c7814ec4baff Mon Sep 17 00:00:00 2001 From: Sieng Piaw Liew Date: Wed, 15 Jun 2022 11:24:26 +0800 Subject: net: don't check skb_count twice NAPI cache skb_count is being checked twice without condition. Change to checking the second time only if the first check is run. Signed-off-by: Sieng Piaw Liew Signed-off-by: David S. Miller --- net/core/skbuff.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fec75f8bf1f4..00bf35ee8205 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -175,13 +175,14 @@ static struct sk_buff *napi_skb_cache_get(void) struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; - if (unlikely(!nc->skb_count)) + if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); - if (unlikely(!nc->skb_count)) - return NULL; + if (unlikely(!nc->skb_count)) + return NULL; + } skb = nc->skb_cache[--nc->skb_count]; kasan_unpoison_object_data(skbuff_head_cache, skb); -- cgit v1.2.3 From 33bf9885040c399cf6a95bd33216644126728e14 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:44 +0300 Subject: bpf: Add helpers to issue and check SYN cookies in XDP The new helpers bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6} allow an XDP program to generate SYN cookies in response to TCP SYN packets and to check those cookies upon receiving the first ACK packet (the final packet of the TCP handshake). Unlike bpf_tcp_{gen,check}_syncookie these new helpers don't need a listening socket on the local machine, which allows to use them together with synproxy to accelerate SYN cookie generation. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-4-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 1 + include/uapi/linux/bpf.h | 78 +++++++++++++++++++++++++++ net/core/filter.c | 118 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 3 +- scripts/bpf_doc.py | 4 ++ tools/include/uapi/linux/bpf.h | 78 +++++++++++++++++++++++++++ 6 files changed, 281 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e99f5c61f84..9a1efe23fab7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -432,6 +432,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, struct tcphdr *th, u32 *cookie); u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, struct tcphdr *th, u32 *cookie); +u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss); u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct tcphdr *th); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f545e39df72a..e81362891596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5251,6 +5251,80 @@ union bpf_attr { * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5457,6 +5531,10 @@ union bpf_attr { FN(dynptr_read), \ FN(dynptr_write), \ FN(dynptr_data), \ + FN(tcp_raw_gen_syncookie_ipv4), \ + FN(tcp_raw_gen_syncookie_ipv6), \ + FN(tcp_raw_check_syncookie_ipv4), \ + FN(tcp_raw_check_syncookie_ipv6), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index 5af58eb48587..b62d4126a561 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7444,6 +7444,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_SYN_COOKIES +BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph, + struct tcphdr *, th, u32, th_len) +{ + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT; + cookie = __cookie_v4_init_sequence(iph, th, &mss); + + return cookie | ((u64)mss << 32); +} + +static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { + .func = bpf_tcp_raw_gen_syncookie_ipv4, + .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct iphdr), + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, + struct tcphdr *, th, u32, th_len) +{ +#if IS_BUILTIN(CONFIG_IPV6) + const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr); + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + mss = tcp_parse_mss_option(th, 0) ?: mss_clamp; + cookie = __cookie_v6_init_sequence(iph, th, &mss); + + return cookie | ((u64)mss << 32); +#else + return -EPROTONOSUPPORT; +#endif +} + +static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { + .func = bpf_tcp_raw_gen_syncookie_ipv6, + .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct ipv6hdr), + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, + struct tcphdr *, th) +{ + u32 cookie = ntohl(th->ack_seq) - 1; + + if (__cookie_v4_check(iph, th, cookie) > 0) + return 0; + + return -EACCES; +} + +static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = { + .func = bpf_tcp_raw_check_syncookie_ipv4, + .gpl_only = true, /* __cookie_v4_check is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct iphdr), + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg2_size = sizeof(struct tcphdr), +}; + +BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, + struct tcphdr *, th) +{ +#if IS_BUILTIN(CONFIG_IPV6) + u32 cookie = ntohl(th->ack_seq) - 1; + + if (__cookie_v6_check(iph, th, cookie) > 0) + return 0; + + return -EACCES; +#else + return -EPROTONOSUPPORT; +#endif +} + +static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = { + .func = bpf_tcp_raw_check_syncookie_ipv6, + .gpl_only = true, /* __cookie_v6_check is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct ipv6hdr), + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg2_size = sizeof(struct tcphdr), +}; +#endif /* CONFIG_SYN_COOKIES */ + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -7856,6 +7964,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; +#ifdef CONFIG_SYN_COOKIES + case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: + return &bpf_tcp_raw_gen_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: + return &bpf_tcp_raw_gen_syncookie_ipv6_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv4: + return &bpf_tcp_raw_check_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv6: + return &bpf_tcp_raw_check_syncookie_ipv6_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..6426f6a2e744 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3967,7 +3967,7 @@ static bool smc_parse_options(const struct tcphdr *th, /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped * value on success. */ -static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) { const unsigned char *ptr = (const unsigned char *)(th + 1); int length = (th->doff * 4) - sizeof(struct tcphdr); @@ -4006,6 +4006,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) } return mss; } +EXPORT_SYMBOL_GPL(tcp_parse_mss_option); /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 855b937e7585..a0ec321469bd 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -635,6 +635,8 @@ class PrinterHelpers(Printer): 'struct bpf_timer', 'struct mptcp_sock', 'struct bpf_dynptr', + 'struct iphdr', + 'struct ipv6hdr', ] known_types = { '...', @@ -686,6 +688,8 @@ class PrinterHelpers(Printer): 'struct bpf_timer', 'struct mptcp_sock', 'struct bpf_dynptr', + 'struct iphdr', + 'struct ipv6hdr', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f545e39df72a..e81362891596 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5251,6 +5251,80 @@ union bpf_attr { * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5457,6 +5531,10 @@ union bpf_attr { FN(dynptr_read), \ FN(dynptr_write), \ FN(dynptr_data), \ + FN(tcp_raw_gen_syncookie_ipv4), \ + FN(tcp_raw_gen_syncookie_ipv6), \ + FN(tcp_raw_check_syncookie_ipv4), \ + FN(tcp_raw_check_syncookie_ipv6), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 9a4cf073866c414199be52bdac1afe6f72ecb8e1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:46 +0300 Subject: bpf: Allow the new syncookie helpers to work with SKBs This commit allows the new BPF helpers to work in SKB context (in TC BPF programs): bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6}. Using these helpers in TC BPF programs is not recommended, because it's unlikely that the BPF program will provide any substantional speedup compared to regular SYN cookies or synproxy, after the SKB is already created. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-6-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index b62d4126a561..423f47db84c6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7915,6 +7915,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_assign_proto; case BPF_FUNC_skb_set_tstamp: return &bpf_skb_set_tstamp_proto; +#ifdef CONFIG_SYN_COOKIES + case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: + return &bpf_tcp_raw_gen_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: + return &bpf_tcp_raw_gen_syncookie_ipv6_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv4: + return &bpf_tcp_raw_check_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv6: + return &bpf_tcp_raw_check_syncookie_ipv6_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); -- cgit v1.2.3 From f5be22c64bd6ee6c1cb0b34f4ff748d43879cd4c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 17 Jun 2022 17:21:21 +0200 Subject: bpf: Fix bpf_skc_lookup comment wrt. return type The function no longer returns 'unsigned long' as of commit edbf8c01de5a ("bpf: add skc_lookup_tcp helper"). Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220617152121.29617-1-tklauser@distanz.ch --- net/core/filter.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 423f47db84c6..151aa4756bd6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6463,8 +6463,6 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, /* bpf_skc_lookup performs the core lookup for different types of sockets, * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. - * Returns the socket as an 'unsigned long' to simplify the casting in the - * callers to satisfy BPF_CALL declarations. */ static struct sock * __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, -- cgit v1.2.3 From 965b57b469a589d64d81b1688b38dcb537011bb0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 15 Jun 2022 09:20:12 -0700 Subject: net: Introduce a new proto_ops ->read_skb() Currently both splice() and sockmap use ->read_sock() to read skb from receive queue, but for sockmap we only read one entire skb at a time, so ->read_sock() is too conservative to use. Introduce a new proto_ops ->read_skb() which supports this sematic, with this we can finally pass the ownership of skb to recv actors. For non-TCP protocols, all ->read_sock() can be simply converted to ->read_skb(). Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20220615162014.89193-3-xiyou.wangcong@gmail.com --- include/linux/net.h | 4 ++++ include/net/tcp.h | 3 +-- include/net/udp.h | 3 +-- net/core/skmsg.c | 20 +++++--------------- net/ipv4/af_inet.c | 3 ++- net/ipv4/tcp.c | 9 +++------ net/ipv4/udp.c | 10 ++++------ net/ipv6/af_inet6.c | 3 ++- net/unix/af_unix.c | 23 +++++++++-------------- 9 files changed, 31 insertions(+), 47 deletions(-) (limited to 'net/core') diff --git a/include/linux/net.h b/include/linux/net.h index 12093f4db50c..a03485e8cbb2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -152,6 +152,8 @@ struct module; struct sk_buff; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); +typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); + struct proto_ops { int family; @@ -214,6 +216,8 @@ struct proto_ops { */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); + /* This is different from read_sock(), it reads an entire skb at a time. */ + int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor); int (*sendpage_locked)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, diff --git a/include/net/tcp.h b/include/net/tcp.h index 7547d90fbb57..8e48dc56837b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -672,8 +672,7 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); void tcp_initialize_rcv_mss(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index b60eea2e3fae..987f7fc7c0aa 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -306,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 7e03f96e441b..f7f63b7d990c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1160,21 +1160,17 @@ static void sk_psock_done_strp(struct sk_psock *psock) } #endif /* CONFIG_BPF_STREAM_PARSER */ -static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, - unsigned int offset, size_t orig_len) +static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = (struct sock *)desc->arg.data; struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; - int len = orig_len; + int len = skb->len; /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) { - desc->error = -ENOMEM; + if (!skb) return 0; - } rcu_read_lock(); psock = sk_psock(sk); @@ -1204,16 +1200,10 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { struct socket *sock = sk->sk_socket; - read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) return; - - desc.arg.data = sk; - desc.error = 0; - desc.count = 1; - - sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv); + sock->ops->read_skb(sk, sk_psock_verdict_recv); } void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index da81f56fdd1c..7abd652a558f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = { .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, @@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = { .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, - .read_sock = udp_read_sock, + .read_skb = udp_read_skb, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 124f384f8695..9d2fd3ced21b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1734,8 +1734,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, } EXPORT_SYMBOL(tcp_read_sock); -int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct tcp_sock *tp = tcp_sk(sk); u32 seq = tp->copied_seq; @@ -1750,7 +1749,7 @@ int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, int used; __skb_unlink(skb, &sk->sk_receive_queue); - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -1765,9 +1764,7 @@ int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, break; } consume_skb(skb); - if (!desc->count) - break; - WRITE_ONCE(tp->copied_seq, seq); + break; } WRITE_ONCE(tp->copied_seq, seq); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6172b4750a88..c660b0bc4d14 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1797,8 +1797,7 @@ busy_check: } EXPORT_SYMBOL(__skb_recv_udp); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { int copied = 0; @@ -1820,7 +1819,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -1831,13 +1830,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, } kfree_skb(skb); - if (!desc->count) - break; + break; } return copied; } -EXPORT_SYMBOL(udp_read_sock); +EXPORT_SYMBOL(udp_read_skb); /* * This should be easy, if there is something there we diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 658823e91eca..0ee0770e79aa 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -702,6 +702,7 @@ const struct proto_ops inet6_stream_ops = { .sendpage_locked = tcp_sendpage_locked, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .read_skb = tcp_read_skb, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, @@ -727,7 +728,7 @@ const struct proto_ops inet6_dgram_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ - .read_sock = udp_read_sock, + .read_skb = udp_read_skb, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3453e0053f76..1bed3739768c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -741,10 +741,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); @@ -798,7 +796,7 @@ static const struct proto_ops unix_stream_ops = { .shutdown = unix_shutdown, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, - .read_sock = unix_stream_read_sock, + .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, @@ -823,7 +821,7 @@ static const struct proto_ops unix_dgram_ops = { .listen = sock_no_listen, .shutdown = unix_shutdown, .sendmsg = unix_dgram_sendmsg, - .read_sock = unix_read_sock, + .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, @@ -2487,8 +2485,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si return __unix_dgram_recvmsg(sk, msg, size, flags); } -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { int copied = 0; @@ -2503,7 +2500,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, if (!skb) return err; - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -2514,8 +2511,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, } kfree_skb(skb); - if (!desc->count) - break; + break; } return copied; @@ -2650,13 +2646,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } #endif -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { if (unlikely(sk->sk_state != TCP_ESTABLISHED)) return -ENOTCONN; - return unix_read_sock(sk, desc, recv_actor); + return unix_read_skb(sk, recv_actor); } static int unix_stream_read_generic(struct unix_stream_read_state *state, -- cgit v1.2.3 From 57452d767feaeab405de3bff0d240c3ac84bfe0d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 15 Jun 2022 09:20:13 -0700 Subject: skmsg: Get rid of skb_clone() With ->read_skb() now we have an entire skb dequeued from receive queue, now we just need to grab an addtional refcnt before passing its ownership to recv actors. And we should not touch them any more, particularly for skb->sk. Fortunately, skb->sk is already set for most of the protocols except UDP where skb->sk has been stolen, so we have to fix it up for UDP case. Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20220615162014.89193-4-xiyou.wangcong@gmail.com --- net/core/skmsg.c | 7 +------ net/ipv4/udp.c | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f7f63b7d990c..8b248d289c11 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1167,10 +1167,7 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) int ret = __SK_DROP; int len = skb->len; - /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - return 0; + skb_get(skb); rcu_read_lock(); psock = sk_psock(sk); @@ -1183,12 +1180,10 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) if (!prog) prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { - skb->sk = sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); - skb->sk = NULL; } if (sk_psock_verdict_apply(psock, skb, ret) < 0) len = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c660b0bc4d14..2516078aa03e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1819,6 +1819,7 @@ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) continue; } + WARN_ON(!skb_set_owner_sk_safe(skb, sk)); used = recv_actor(sk, skb); if (used <= 0) { if (!copied) -- cgit v1.2.3 From 43312915b5ba20741617dd2119e835205fa8580c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 15 Jun 2022 09:20:14 -0700 Subject: skmsg: Get rid of unncessary memset() We always allocate skmsg with kzalloc(), so there is no need to call memset(0) on it, the only thing we need from sk_msg_init() is sg_init_marker(). So introduce a new helper which is just kzalloc()+sg_init_marker(), this saves an unncessary memset(0) for skmsg on fast path. Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20220615162014.89193-5-xiyou.wangcong@gmail.com --- net/core/skmsg.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 8b248d289c11..4b297d67edb7 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -497,23 +497,27 @@ bool sk_msg_is_readable(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_msg_is_readable); -static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, - struct sk_buff *skb) +static struct sk_msg *alloc_sk_msg(void) { struct sk_msg *msg; - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); + if (unlikely(!msg)) return NULL; + sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); + return msg; +} - if (!sk_rmem_schedule(sk, skb, skb->truesize)) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, + struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return NULL; - msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); - if (unlikely(!msg)) + if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL; - sk_msg_init(msg); - return msg; + return alloc_sk_msg(); } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, @@ -590,13 +594,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { - struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + struct sk_msg *msg = alloc_sk_msg(); struct sock *sk = psock->sk; int err; if (unlikely(!msg)) return -EAGAIN; - sk_msg_init(msg); skb_set_owner_r(skb, sk); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) -- cgit v1.2.3 From ede57d58e6f38d5bc66137368e4a1e68a157af6e Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Wed, 22 Jun 2022 18:09:03 +0200 Subject: net: helper function skb_len_add Move the len fields manipulation in the skbs to a helper function. There is a comment specifically requesting this and there are several other areas in the code displaying the same pattern which can be refactored. This improves code readability. Signed-off-by: Richard Gobert Link: https://lore.kernel.org/r/20220622160853.GA6478@debian Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++++ include/net/sock.h | 4 +--- net/core/skbuff.c | 13 +++---------- net/ipv4/esp4.c | 4 +--- net/ipv4/ip_output.c | 8 ++------ 5 files changed, 19 insertions(+), 22 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cd4a8268894a..f6a27ab19202 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2351,6 +2351,18 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +/** + * skb_len_add - adds a number to len fields of skb + * @skb: buffer to add len to + * @delta: number of bytes to add + */ +static inline void skb_len_add(struct sk_buff *skb, int delta) +{ + skb->len += delta; + skb->data_len += delta; + skb->truesize += delta; +} + /** * __skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised diff --git a/include/net/sock.h b/include/net/sock.h index 5bed1ea7a722..40bbd0e8925b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2219,9 +2219,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro if (err) return err; - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 00bf35ee8205..c62e42d0c531 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3195,9 +3195,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) } } - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; + skb_len_add(to, len + plen); if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); @@ -3634,13 +3632,8 @@ onlymerged: tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; - /* Yak, is it really working this way? Some helper please? */ - skb->len -= shiftlen; - skb->data_len -= shiftlen; - skb->truesize -= shiftlen; - tgt->len += shiftlen; - tgt->data_len += shiftlen; - tgt->truesize += shiftlen; + skb_len_add(skb, -shiftlen); + skb_len_add(tgt, shiftlen); return shiftlen; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b21238df3301..7eae8d686e20 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,9 +502,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * nfrags++; - skb->len += tailen; - skb->data_len += tailen; - skb->truesize += tailen; + skb_len_add(skb, tailen); if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 00b4bf26fd93..5e32a2f86fbd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1214,9 +1214,7 @@ alloc_new_skb: pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); @@ -1443,9 +1441,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->csum = csum_block_add(skb->csum, csum, skb->len); } - skb->len += len; - skb->data_len += len; - skb->truesize += len; + skb_len_add(skb, len); refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; -- cgit v1.2.3 From 697fb80a53642be624f5121b6ca9d66769c180e0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 27 Jun 2022 20:58:03 -0700 Subject: bpf: Fix sockmap calling sleepable function in teardown path syzbot reproduced the bug ... BUG: sleeping function called from invalid context at kernel/workqueue.c:3010 ... with the following stack trace fragment ... start_flush_work kernel/workqueue.c:3010 [inline] __flush_work+0x109/0xb10 kernel/workqueue.c:3074 __cancel_work_timer+0x3f9/0x570 kernel/workqueue.c:3162 sk_psock_stop+0x4cb/0x630 net/core/skmsg.c:802 sock_map_destroy+0x333/0x760 net/core/sock_map.c:1581 inet_csk_destroy_sock+0x196/0x440 net/ipv4/inet_connection_sock.c:1130 __tcp_close+0xd5b/0x12b0 net/ipv4/tcp.c:2897 tcp_close+0x29/0xc0 net/ipv4/tcp.c:2909 ... introduced by d8616ee2affc. Do a quick trace of the code path and the bug is obvious: inet_csk_destroy_sock(sk) sk_prot->destroy(sk); <--- sock_map_destroy sk_psock_stop(, true); <--- true so cancel workqueue cancel_work_sync() <--- splat, because *_bh_disable() We can not call cancel_work_sync() from inside destroy path. So mark the sk_psock_stop call to skip this cancel_work_sync(). This will avoid the BUG, but means we may run sk_psock_backlog after or during the destroy op. We zapped the ingress_skb queue in sk_psock_stop (safe to do with local_bh_disable) so its empty and the sk_psock_backlog work item will not find any pkts to process here. However, because we are not going to wait for it or clear its ->state its possible it kicks off or is already running. This should be 'safe' up until psock drops its refcnt to psock->sk. The sock_put() that drops this reference is only done at psock destroy time from sk_psock_destroy(). This is done through workqueue when sk_psock_drop() is called on psock refnt reaches 0. And importantly sk_psock_destroy() does a cancel_work_sync(). So trivial fix works. I've had hit or miss luck reproducing this caught it once or twice with the provided reproducer when running with many runners. However, syzkaller is very good at reproducing so relying on syzkaller to verify fix. Fixes: d8616ee2affc ("bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues") Reported-by: syzbot+140186ceba0c496183bc@syzkaller.appspotmail.com Suggested-by: Hillf Danton Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Cc: Wang Yufen Link: https://lore.kernel.org/bpf/20220628035803.317876-1-john.fastabend@gmail.com --- net/core/sock_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9f08ccfaf6da..028813dfecb0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1578,7 +1578,7 @@ void sock_map_destroy(struct sock *sk) saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); - sk_psock_stop(psock, true); + sk_psock_stop(psock, false); sk_psock_put(sk, psock); saved_destroy(sk); } -- cgit v1.2.3 From 9113d7e48e9128522b9f5a54dfd30dff10509a92 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:09 -0700 Subject: bpf: expose bpf_{g,s}etsockopt to lsm cgroup I don't see how to make it nice without introducing btf id lists for the hooks where these helpers are allowed. Some LSM hooks work on the locked sockets, some are triggering early and don't grab any locks, so have two lists for now: 1. LSM hooks which trigger under socket lock - minority of the hooks, but ideal case for us, we can expose existing BTF-based helpers 2. LSM hooks which trigger without socket lock, but they trigger early in the socket creation path where it should be safe to do setsockopt without any locks 3. The rest are prohibited. I'm thinking that this use-case might be a good gateway to sleeping lsm cgroup hooks in the future. We can either expose lock/unlock operations (and add tracking to the verifier) or have another set of bpf_setsockopt wrapper that grab the locks and might sleep. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-7-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/bpf_lsm.c | 38 +++++++++++++++++++++++++++++++++ net/core/filter.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 93 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5d2afa55c7c3..2b21f2a3452f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2386,6 +2386,8 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; +extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 83aa431dd52e..d469b7f3deef 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -45,6 +45,24 @@ BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) BTF_SET_END(bpf_lsm_current_hooks) +/* List of LSM hooks that trigger while the socket is properly locked. + */ +BTF_SET_START(bpf_lsm_locked_sockopt_hooks) +BTF_ID(func, bpf_lsm_socket_sock_rcv_skb) +BTF_ID(func, bpf_lsm_sock_graft) +BTF_ID(func, bpf_lsm_inet_csk_clone) +BTF_ID(func, bpf_lsm_inet_conn_established) +BTF_SET_END(bpf_lsm_locked_sockopt_hooks) + +/* List of LSM hooks that trigger while the socket is _not_ locked, + * but it's ok to call bpf_{g,s}etsockopt because the socket is still + * in the early init phase. + */ +BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks) +BTF_ID(func, bpf_lsm_socket_post_create) +BTF_ID(func, bpf_lsm_socket_socketpair) +BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks) + void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) { @@ -201,6 +219,26 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_retval: return prog->expected_attach_type == BPF_LSM_CGROUP ? &bpf_get_retval_proto : NULL; + case BPF_FUNC_setsockopt: + if (prog->expected_attach_type != BPF_LSM_CGROUP) + return NULL; + if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_sk_setsockopt_proto; + if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_unlocked_sk_setsockopt_proto; + return NULL; + case BPF_FUNC_getsockopt: + if (prog->expected_attach_type != BPF_LSM_CGROUP) + return NULL; + if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_sk_getsockopt_proto; + if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_unlocked_sk_getsockopt_proto; + return NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/net/core/filter.c b/net/core/filter.c index 151aa4756bd6..c6941ab0eb52 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5012,8 +5012,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static int _bpf_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) +static int __bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { char devname[IFNAMSIZ]; int val, valbool; @@ -5024,8 +5024,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (!sk_fullsock(sk)) return -EINVAL; - sock_owned_by_me(sk); - if (level == SOL_SOCKET) { if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) return -EINVAL; @@ -5258,14 +5256,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, return ret; } -static int _bpf_getsockopt(struct sock *sk, int level, int optname, +static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) +{ + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +static int __bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { if (!sk_fullsock(sk)) goto err_clear; - sock_owned_by_me(sk); - if (level == SOL_SOCKET) { if (optlen != sizeof(int)) goto err_clear; @@ -5360,6 +5364,14 @@ err_clear: return -EINVAL; } +static int _bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { @@ -5400,6 +5412,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = { + .func = bpf_unlocked_sk_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = { + .func = bpf_unlocked_sk_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, int, level, int, optname, char *, optval, int, optlen) { -- cgit v1.2.3 From 211da42eaa45db7b0edfde187dd88a85fbd466b5 Mon Sep 17 00:00:00 2001 From: Yuwei Wang Date: Wed, 29 Jun 2022 08:48:32 +0000 Subject: net, neigh: introduce interval_probe_time_ms for periodic probe commit ed6cd6a17896 ("net, neigh: Set lower cap for neigh_managed_work rearming") fixed a case when DELAY_PROBE_TIME is configured to 0, the processing of the system work queue hog CPU to 100%, and further more we should introduce a new option used by periodic probe Signed-off-by: Yuwei Wang Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 6 ++++++ include/net/neighbour.h | 1 + include/uapi/linux/neighbour.h | 1 + include/uapi/linux/sysctl.h | 37 +++++++++++++++++----------------- net/core/neighbour.c | 32 +++++++++++++++++++++++++++-- net/decnet/dn_neigh.c | 1 + net/ipv4/arp.c | 1 + net/ipv6/ndisc.c | 1 + 8 files changed, 60 insertions(+), 20 deletions(-) (limited to 'net/core') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 9f41961d11d5..4c8bbf5acfd1 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -202,6 +202,12 @@ neigh/default/unres_qlen - INTEGER Default: 101 +neigh/default/interval_probe_time_ms - INTEGER + The probe interval for neighbor entries with NTF_MANAGED flag, + the min value is 1. + + Default: 5000 + mtu_expires - INTEGER Time, in seconds, that cached PMTU information is kept. diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 87419f7f5421..9f0bab0589d9 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -48,6 +48,7 @@ enum { NEIGH_VAR_RETRANS_TIME, NEIGH_VAR_BASE_REACHABLE_TIME, NEIGH_VAR_DELAY_PROBE_TIME, + NEIGH_VAR_INTERVAL_PROBE_TIME_MS, NEIGH_VAR_GC_STALETIME, NEIGH_VAR_QUEUE_LEN_BYTES, NEIGH_VAR_PROXY_QLEN, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 39c565e460c7..a998bf761635 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -154,6 +154,7 @@ enum { NDTPA_QUEUE_LENBYTES, /* u32 */ NDTPA_MCAST_REPROBES, /* u32 */ NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 6a3b194c50fe..8981f00204db 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -584,24 +584,25 @@ enum { /* /proc/sys/net//neigh/ */ enum { - NET_NEIGH_MCAST_SOLICIT=1, - NET_NEIGH_UCAST_SOLICIT=2, - NET_NEIGH_APP_SOLICIT=3, - NET_NEIGH_RETRANS_TIME=4, - NET_NEIGH_REACHABLE_TIME=5, - NET_NEIGH_DELAY_PROBE_TIME=6, - NET_NEIGH_GC_STALE_TIME=7, - NET_NEIGH_UNRES_QLEN=8, - NET_NEIGH_PROXY_QLEN=9, - NET_NEIGH_ANYCAST_DELAY=10, - NET_NEIGH_PROXY_DELAY=11, - NET_NEIGH_LOCKTIME=12, - NET_NEIGH_GC_INTERVAL=13, - NET_NEIGH_GC_THRESH1=14, - NET_NEIGH_GC_THRESH2=15, - NET_NEIGH_GC_THRESH3=16, - NET_NEIGH_RETRANS_TIME_MS=17, - NET_NEIGH_REACHABLE_TIME_MS=18, + NET_NEIGH_MCAST_SOLICIT = 1, + NET_NEIGH_UCAST_SOLICIT = 2, + NET_NEIGH_APP_SOLICIT = 3, + NET_NEIGH_RETRANS_TIME = 4, + NET_NEIGH_REACHABLE_TIME = 5, + NET_NEIGH_DELAY_PROBE_TIME = 6, + NET_NEIGH_GC_STALE_TIME = 7, + NET_NEIGH_UNRES_QLEN = 8, + NET_NEIGH_PROXY_QLEN = 9, + NET_NEIGH_ANYCAST_DELAY = 10, + NET_NEIGH_PROXY_DELAY = 11, + NET_NEIGH_LOCKTIME = 12, + NET_NEIGH_GC_INTERVAL = 13, + NET_NEIGH_GC_THRESH1 = 14, + NET_NEIGH_GC_THRESH2 = 15, + NET_NEIGH_GC_THRESH3 = 16, + NET_NEIGH_RETRANS_TIME_MS = 17, + NET_NEIGH_REACHABLE_TIME_MS = 18, + NET_NEIGH_INTERVAL_PROBE_TIME_MS = 19, }; /* /proc/sys/net/dccp */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d8ec70622ecb..6a8c2596ebab 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1579,7 +1579,7 @@ static void neigh_managed_work(struct work_struct *work) list_for_each_entry(neigh, &tbl->managed_list, managed_list) neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, - max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ)); + NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS)); write_unlock_bh(&tbl->lock); } @@ -2100,7 +2100,9 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) nla_put_msecs(skb, NDTPA_PROXY_DELAY, NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_LOCKTIME, - NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) + NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) || + nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS, + NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -2255,6 +2257,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, [NDTPA_LOCKTIME] = { .type = NLA_U64 }, + [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 }, }; static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2373,6 +2376,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, nla_get_msecs(tbp[i])); call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); break; + case NDTPA_INTERVAL_PROBE_TIME_MS: + NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS, + nla_get_msecs(tbp[i])); + break; case NDTPA_RETRANS_TIME: NEIGH_VAR_SET(p, RETRANS_TIME, nla_get_msecs(tbp[i])); @@ -3562,6 +3569,22 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, return ret; } +static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp = *ctl; + int ret; + + int min = msecs_to_jiffies(1); + + tmp.extra1 = &min; + tmp.extra2 = NULL; + + ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos); + neigh_proc_update(ctl, write); + return ret; +} + int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -3658,6 +3681,9 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) +#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive) + #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) @@ -3676,6 +3702,8 @@ static struct neigh_sysctl_table { NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), + NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS, + "interval_probe_time_ms"), NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index fbd98ac853ea..7c569bcc0aca 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -94,6 +94,7 @@ struct neigh_table dn_neigh_table = { [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 0, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ab4a5601c82a..af2f12ffc9ca 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -168,6 +168,7 @@ struct neigh_table arp_tbl = { [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b0dfe97ea4ee..cd84cbdac0a2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -128,6 +128,7 @@ struct neigh_table nd_tbl = { [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, -- cgit v1.2.3 From 2064a132c0de3426d5ba43023200994e0c77e652 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 30 Jun 2022 10:26:18 +0200 Subject: bpf: Omit superfluous address family check in __bpf_skc_lookup family is only set to either AF_INET or AF_INET6 based on len. In all other cases we return early. Thus the check against AF_UNSPEC can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220630082618.15649-1-tklauser@distanz.ch --- net/core/filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index c6941ab0eb52..4fae91984359 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6516,8 +6516,8 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, u64 flags) { struct sock *sk = NULL; - u8 family = AF_UNSPEC; struct net *net; + u8 family; int sdif; if (len == sizeof(tuple->ipv4)) @@ -6527,8 +6527,7 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, else return NULL; - if (unlikely(family == AF_UNSPEC || flags || - !((s32)netns_id < 0 || netns_id <= S32_MAX))) + if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; if (family == AF_INET) -- cgit v1.2.3 From c46b01839f7aad5889e23505bbfbeb5f4d7fde8e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jul 2022 16:59:26 -0700 Subject: tls: rx: periodically flush socket backlog We continuously hold the socket lock during large reads and writes. This may inflate RTT and negatively impact TCP performance. Flush the backlog periodically. I tried to pick a flush period (128kB) which gives significant benefit but the max Bps rate is not yet visibly impacted. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/sock.c | 1 + net/tls/tls_sw.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 92a0296ccb18..4cb957d934a2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2870,6 +2870,7 @@ void __sk_flush_backlog(struct sock *sk) __release_sock(sk); spin_unlock_bh(&sk->sk_lock.slock); } +EXPORT_SYMBOL_GPL(__sk_flush_backlog); /** * sk_wait_data - wait for data to arrive at sk_receive_queue diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7592b6519953..79043bc3da39 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1738,6 +1738,24 @@ out: return copied ? : err; } +static void +tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, + size_t len_left, size_t decrypted, ssize_t done, + size_t *flushed_at) +{ + size_t max_rec; + + if (len_left <= decrypted) + return; + + max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE; + if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec) + return; + + *flushed_at = done; + sk_flush_backlog(sk); +} + int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -1750,6 +1768,7 @@ int tls_sw_recvmsg(struct sock *sk, struct sk_psock *psock; unsigned char control = 0; ssize_t decrypted = 0; + size_t flushed_at = 0; struct strp_msg *rxm; struct tls_msg *tlm; struct sk_buff *skb; @@ -1839,6 +1858,10 @@ int tls_sw_recvmsg(struct sock *sk, if (err <= 0) goto recv_end; + /* periodically flush backlog, and feed strparser */ + tls_read_flush_backlog(sk, prot, len, to_decrypt, + decrypted + copied, &flushed_at); + ctx->recv_pkt = NULL; __strp_unpause(&ctx->strp); __skb_queue_tail(&ctx->rx_list, skb); -- cgit v1.2.3 From d810d367ec40a1031173a447bd0146cf48e98733 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 5 Jul 2022 19:35:15 +0800 Subject: net: page_pool: optimize page pool page allocation in NUMA scenario Currently NIC packet receiving performance based on page pool deteriorates occasionally. To analysis the causes of this problem page allocation stats are collected. Here are the stats when NIC rx performance deteriorates: bandwidth(Gbits/s) 16.8 6.91 rx_pp_alloc_fast 13794308 21141869 rx_pp_alloc_slow 108625 166481 rx_pp_alloc_slow_h 0 0 rx_pp_alloc_empty 8192 8192 rx_pp_alloc_refill 0 0 rx_pp_alloc_waive 100433 158289 rx_pp_recycle_cached 0 0 rx_pp_recycle_cache_full 0 0 rx_pp_recycle_ring 362400 420281 rx_pp_recycle_ring_full 6064893 9709724 rx_pp_recycle_released_ref 0 0 The rx_pp_alloc_waive count indicates that a large number of pages' numa node are inconsistent with the NIC device numa node. Therefore these pages can't be reused by the page pool. As a result, many new pages would be allocated by __page_pool_alloc_pages_slow which is time consuming. This causes the NIC rx performance fluctuations. The main reason of huge numa mismatch pages in page pool is that page pool uses alloc_pages_bulk_array to allocate original pages. This function is not suitable for page allocation in NUMA scenario. So this patch uses alloc_pages_bulk_array_node which has a NUMA id input parameter to ensure the NUMA consistent between NIC device and allocated pages. Repeated NIC rx performance tests are performed 40 times. NIC rx bandwidth is higher and more stable compared to the datas above. Here are three test stats, the rx_pp_alloc_waive count is zero and rx_pp_alloc_slow which indicates pages allocated from slow patch is relatively low. bandwidth(Gbits/s) 93 93.9 93.8 rx_pp_alloc_fast 60066264 61266386 60938254 rx_pp_alloc_slow 16512 16517 16539 rx_pp_alloc_slow_ho 0 0 0 rx_pp_alloc_empty 16512 16517 16539 rx_pp_alloc_refill 473841 481910 481585 rx_pp_alloc_waive 0 0 0 rx_pp_recycle_cached 0 0 0 rx_pp_recycle_cache_full 0 0 0 rx_pp_recycle_ring 29754145 30358243 30194023 rx_pp_recycle_ring_full 0 0 0 rx_pp_recycle_released_ref 0 0 0 Signed-off-by: Jie Wang Acked-by: Jesper Dangaard Brouer Acked-by: Ilias Apalodimas Link: https://lore.kernel.org/r/20220705113515.54342-1-huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f18e6e771993..b74905fcc3a1 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -389,7 +389,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache); + nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, + pool->alloc.cache); if (unlikely(!nr_pages)) return NULL; -- cgit v1.2.3 From c2dd4059dc31ee6f5b83c8d2064bb1f1f465bcec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jul 2022 19:18:46 +0000 Subject: net: minor optimization in __alloc_skb() TCP allocates 'fast clones' skbs for packets in tx queues. Currently, __alloc_skb() initializes the companion fclone field to SKB_FCLONE_CLONE, and leaves other fields untouched. It makes sense to defer this init much later in skb_clone(), because all fclone fields are copied and hot in cpu caches at that time. This removes one cache line miss in __alloc_skb(), cost seen on an host with 256 cpus all competing on memory accesses. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c62e42d0c531..c4a751781581 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -454,8 +454,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); - - fclones->skb2.fclone = SKB_FCLONE_CLONE; } return skb; @@ -1513,6 +1511,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; refcount_set(&fclones->fclone_ref, 2); + n->fclone = SKB_FCLONE_CLONE; } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; -- cgit v1.2.3 From 9974d37ea75f01b47d16072b5dad305bd8d23fcc Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 28 Jun 2022 20:36:16 +0800 Subject: skmsg: Fix invalid last sg check in sk_msg_recvmsg() In sk_psock_skb_ingress_enqueue function, if the linear area + nr_frags + frag_list of the SKB has NR_MSG_FRAG_IDS blocks in total, skb_to_sgvec will return NR_MSG_FRAG_IDS, then msg->sg.end will be set to NR_MSG_FRAG_IDS, and in addition, (NR_MSG_FRAG_IDS - 1) is set to the last SG of msg. Recv the msg in sk_msg_recvmsg, when i is (NR_MSG_FRAG_IDS - 1), the sk_msg_iter_var_next(i) will change i to 0 (not NR_MSG_FRAG_IDS), the judgment condition "msg_rx->sg.start==msg_rx->sg.end" and "i != msg_rx->sg.end" can not work. As a result, the processed msg cannot be deleted from ingress_msg list. But the length of all the sge of the msg has changed to 0. Then the next recvmsg syscall will process the msg repeatedly, because the length of sge is 0, the -EFAULT error is always returned. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220628123616.186950-1-liujian56@huawei.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 266d3b7b7d0b..81627892bdd4 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (i != msg_rx->sg.end); + } while (!sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + if (!sge->length && sg_is_last(sge)) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } -- cgit v1.2.3 From 868232f5cd382c37a5005deb7be0620c6f7bc08d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Jul 2022 01:14:02 -0700 Subject: devlink: Remove unused function devlink_rate_nodes_destroy The previous patch removed the last usage of the function devlink_rate_nodes_destroy(). Thus, remove this function from devlink API. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Paolo Abeni --- include/net/devlink.h | 1 - net/core/devlink.c | 18 ------------------ 2 files changed, 19 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 2a2a2a0c93f7..0e163cc87d45 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1571,7 +1571,6 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, bool external); int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); -void devlink_rate_nodes_destroy(struct devlink *devlink); void devlink_port_linecard_set(struct devlink_port *devlink_port, struct devlink_linecard *linecard); struct devlink_linecard * diff --git a/net/core/devlink.c b/net/core/devlink.c index db61f3a341cb..1588e2246234 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10095,24 +10095,6 @@ void devl_rate_nodes_destroy(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy); -/** - * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device - * - * @devlink: devlink instance - * - * Unset parent for all rate objects and destroy all rate nodes - * on specified device. - * - * Context: Takes and release devlink->lock . - */ -void devlink_rate_nodes_destroy(struct devlink *devlink) -{ - mutex_lock(&devlink->lock); - devl_rate_nodes_destroy(devlink); - mutex_unlock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); - /** * devlink_port_linecard_set - Link port with a linecard * -- cgit v1.2.3 From df539fc62b069ed2b2395d8d1c77f7758c5001a6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Jul 2022 01:14:05 -0700 Subject: devlink: Remove unused functions devlink_rate_leaf_create/destroy The previous patch removed the last usage of the functions devlink_rate_leaf_create() and devlink_rate_nodes_destroy(). Thus, remove these function from devlink API. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Paolo Abeni --- include/net/devlink.h | 2 -- net/core/devlink.c | 42 +++++++----------------------------------- 2 files changed, 7 insertions(+), 37 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0e163cc87d45..5150deb67fab 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1569,8 +1569,6 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); -int devlink_rate_leaf_create(struct devlink_port *port, void *priv); -void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); void devlink_port_linecard_set(struct devlink_port *devlink_port, struct devlink_linecard *linecard); struct devlink_linecard * diff --git a/net/core/devlink.c b/net/core/devlink.c index 1588e2246234..970e5c2a52bd 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10006,20 +10006,13 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) } EXPORT_SYMBOL_GPL(devl_rate_leaf_create); -int -devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) -{ - struct devlink *devlink = devlink_port->devlink; - int ret; - - mutex_lock(&devlink->lock); - ret = devl_rate_leaf_create(devlink_port, priv); - mutex_unlock(&devlink->lock); - - return ret; -} -EXPORT_SYMBOL_GPL(devlink_rate_leaf_create); - +/** + * devl_rate_leaf_destroy - destroy devlink rate leaf + * + * @devlink_port: devlink port linked to the rate object + * + * Destroy the devlink rate object of type leaf on provided @devlink_port. + */ void devl_rate_leaf_destroy(struct devlink_port *devlink_port) { struct devlink_rate *devlink_rate = devlink_port->devlink_rate; @@ -10037,27 +10030,6 @@ void devl_rate_leaf_destroy(struct devlink_port *devlink_port) } EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy); -/** - * devlink_rate_leaf_destroy - destroy devlink rate leaf - * - * @devlink_port: devlink port linked to the rate object - * - * Context: Takes and release devlink->lock . - */ -void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) -{ - struct devlink_rate *devlink_rate = devlink_port->devlink_rate; - struct devlink *devlink = devlink_port->devlink; - - if (!devlink_rate) - return; - - mutex_lock(&devlink->lock); - devl_rate_leaf_destroy(devlink_port); - mutex_unlock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); - /** * devl_rate_nodes_destroy - destroy all devlink rate nodes on device * @devlink: devlink instance -- cgit v1.2.3 From f0680ef0f9497f88b513dea1ae54664f0806ecfb Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Jul 2022 01:14:08 -0700 Subject: devlink: Hold the instance lock in port_new / port_del callbacks Let the core take the devlink instance lock around port_new and port_del callbacks and remove the now redundant locking in the only driver that currently use them. Signed-off-by: Moshe Shemesh Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 4 ---- net/core/devlink.c | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'net/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 2068c22ff367..7d955a4d9f14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -355,9 +355,7 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); return -EOPNOTSUPP; } - devl_lock(devlink); err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index); - devl_unlock(devlink); mlx5_sf_table_put(table); return err; } @@ -402,9 +400,7 @@ int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, goto sf_err; } - devl_lock(devlink); mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); - devl_unlock(devlink); mlx5_sf_id_erase(table, sf); mutex_lock(&table->sf_state_lock); diff --git a/net/core/devlink.c b/net/core/devlink.c index 970e5c2a52bd..e206cc90bec5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1712,7 +1712,7 @@ static int devlink_port_new_notifiy(struct devlink *devlink, if (!msg) return -ENOMEM; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); devlink_port = devlink_port_get_by_index(devlink, port_index); if (!devlink_port) { err = -ENODEV; @@ -1725,11 +1725,9 @@ static int devlink_port_new_notifiy(struct devlink *devlink, goto out; err = genlmsg_reply(msg, info); - mutex_unlock(&devlink->lock); return err; out: - mutex_unlock(&devlink->lock); nlmsg_free(msg); return err; } @@ -9067,13 +9065,11 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PORT_NEW, .doit = devlink_nl_cmd_port_new_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PORT_DEL, .doit = devlink_nl_cmd_port_del_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_LINECARD_GET, -- cgit v1.2.3 From 83d85bb069152b790caad905fa53e6d50cd3734d Mon Sep 17 00:00:00 2001 From: Maksym Glubokiy Date: Mon, 11 Jul 2022 18:09:07 +0300 Subject: net: extract port range fields from fl_flow_key So it can be used for port range filter offloading. Co-developed-by: Volodymyr Mytnyk Signed-off-by: Volodymyr Mytnyk Signed-off-by: Maksym Glubokiy Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 16 ++++++++++++++++ include/net/flow_offload.h | 6 ++++++ net/core/flow_offload.c | 7 +++++++ net/sched/cls_flower.c | 8 +------- 4 files changed, 30 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index a4c6057c7097..0f9544a9bb9e 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -178,6 +178,22 @@ struct flow_dissector_key_ports { }; }; +/** + * struct flow_dissector_key_ports_range + * @tp: port number from packet + * @tp_min: min port number in range + * @tp_max: max port number in range + */ +struct flow_dissector_key_ports_range { + union { + struct flow_dissector_key_ports tp; + struct { + struct flow_dissector_key_ports tp_min; + struct flow_dissector_key_ports tp_max; + }; + }; +}; + /** * flow_dissector_key_icmp: * type: ICMP type diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 7ac313858037..a8d8512b7059 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -48,6 +48,10 @@ struct flow_match_ports { struct flow_dissector_key_ports *key, *mask; }; +struct flow_match_ports_range { + struct flow_dissector_key_ports_range *key, *mask; +}; + struct flow_match_icmp { struct flow_dissector_key_icmp *key, *mask; }; @@ -94,6 +98,8 @@ void flow_rule_match_ip(const struct flow_rule *rule, struct flow_match_ip *out); void flow_rule_match_ports(const struct flow_rule *rule, struct flow_match_ports *out); +void flow_rule_match_ports_range(const struct flow_rule *rule, + struct flow_match_ports_range *out); void flow_rule_match_tcp(const struct flow_rule *rule, struct flow_match_tcp *out); void flow_rule_match_icmp(const struct flow_rule *rule, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 929f6379a279..0d3075d3c8fb 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -125,6 +125,13 @@ void flow_rule_match_ports(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_ports); +void flow_rule_match_ports_range(const struct flow_rule *rule, + struct flow_match_ports_range *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE, out); +} +EXPORT_SYMBOL(flow_rule_match_ports_range); + void flow_rule_match_tcp(const struct flow_rule *rule, struct flow_match_tcp *out) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index dcca70144dff..1a1e34480b7e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -63,13 +63,7 @@ struct fl_flow_key { struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; struct flow_dissector_key_enc_opts enc_opts; - union { - struct flow_dissector_key_ports tp; - struct { - struct flow_dissector_key_ports tp_min; - struct flow_dissector_key_ports tp_max; - }; - } tp_range; + struct flow_dissector_key_ports_range tp_range; struct flow_dissector_key_ct ct; struct flow_dissector_key_hash hash; struct flow_dissector_key_num_of_vlans num_of_vlans; -- cgit v1.2.3 From 1abfb265f0accc8635254cf1da03154ac7a5fb1b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 Jul 2022 12:24:22 +0200 Subject: net: devlink: fix unlocked vs locked functions descriptions To be unified with the rest of the code, the unlocked version (devl_*) of function should have the same description in documentation as the locked one. Add the missing documentation. Also, add "Context" annotation for the locked versions where it is missing. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/devlink.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index e206cc90bec5..8cef65a6934d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9673,6 +9673,19 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port) cancel_delayed_work_sync(&devlink_port->type_warn_dw); } +/** + * devl_port_register() - Register devlink port + * + * @devlink: devlink + * @devlink_port: devlink port + * @port_index: driver-specific numerical identifier of the port + * + * Register devlink port with provided port index. User can use + * any indexing, even hw-related one. devlink_port structure + * is convenient to be embedded inside user driver private structure. + * Note that the caller should take care of zeroing the devlink_port + * structure. + */ int devl_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index) @@ -9711,6 +9724,8 @@ EXPORT_SYMBOL_GPL(devl_port_register); * is convenient to be embedded inside user driver private structure. * Note that the caller should take care of zeroing the devlink_port * structure. + * + * Context: Takes and release devlink->lock . */ int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, @@ -9725,6 +9740,11 @@ int devlink_port_register(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_port_register); +/** + * devl_port_unregister() - Unregister devlink port + * + * @devlink_port: devlink port + */ void devl_port_unregister(struct devlink_port *devlink_port) { lockdep_assert_held(&devlink_port->devlink->lock); @@ -9742,6 +9762,8 @@ EXPORT_SYMBOL_GPL(devl_port_unregister); * devlink_port_unregister - Unregister devlink port * * @devlink_port: devlink port + * + * Context: Takes and release devlink->lock . */ void devlink_port_unregister(struct devlink_port *devlink_port) { -- cgit v1.2.3 From 7715023aa51f2a2d14d1ae785f2cb3cd72819d6f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 Jul 2022 12:24:23 +0200 Subject: net: devlink: use helpers to work with devlink->lock mutex As far as the lock helpers exist as the drivers need to work with the devlink->lock mutex, use the helpers internally in devlink.c in order to be consistent. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/devlink.c | 230 ++++++++++++++++++++++++++--------------------------- 1 file changed, 115 insertions(+), 115 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 8cef65a6934d..2b2e454ebd78 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -711,7 +711,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, return PTR_ERR(devlink); } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - mutex_lock(&devlink->lock); + devl_lock(devlink); info->user_ptr[0] = devlink; if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { devlink_port = devlink_port_get_from_info(devlink, info); @@ -754,7 +754,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); mutex_unlock(&devlink_mutex); return err; @@ -772,7 +772,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, devlink_linecard_put(linecard); } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); mutex_unlock(&devlink_mutex); } @@ -1329,7 +1329,7 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; u32 id = NETLINK_CB(cb->skb).portid; @@ -1342,13 +1342,13 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, NULL); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -1495,7 +1495,7 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { idx++; @@ -1507,13 +1507,13 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -2450,7 +2450,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { idx++; @@ -2462,13 +2462,13 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -2603,7 +2603,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, !devlink->ops->sb_pool_get) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, start, &idx, devlink, devlink_sb, @@ -2612,12 +2612,12 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -2824,7 +2824,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, !devlink->ops->sb_port_pool_get) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, start, &idx, devlink, devlink_sb, @@ -2833,12 +2833,12 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -3073,7 +3073,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, !devlink->ops->sb_tc_pool_bind_get) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, devlink, @@ -3083,12 +3083,12 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -5159,7 +5159,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { idx++; @@ -5173,13 +5173,13 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -5394,7 +5394,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, &devlink_port->param_list, list) { @@ -5412,14 +5412,14 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -5671,7 +5671,7 @@ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id) unsigned long count; void *p; - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) @@ -5707,7 +5707,7 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) unsigned long count; void *p; - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) @@ -5746,7 +5746,7 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) */ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) { - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); if (xa_load(&devlink->snapshot_ids, id)) return -EEXIST; @@ -5773,7 +5773,7 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) */ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1), xa_limit_32b, GFP_KERNEL); @@ -5801,7 +5801,7 @@ __devlink_region_snapshot_create(struct devlink_region *region, struct devlink_snapshot *snapshot; int err; - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); /* check if region can hold one more snapshot */ if (region->cur_snapshots == region->max_snapshots) @@ -5839,7 +5839,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region, { struct devlink *devlink = region->devlink; - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); region->cur_snapshots--; @@ -5933,7 +5933,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, struct devlink_port *port; int err = 0; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(region, &devlink->region_list, list) { if (*idx < start) { (*idx)++; @@ -5957,7 +5957,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, } out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } @@ -6247,7 +6247,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_dev; } - mutex_lock(&devlink->lock); + devl_lock(devlink); if (!attrs[DEVLINK_ATTR_REGION_NAME] || !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { @@ -6343,7 +6343,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); mutex_unlock(&devlink_mutex); @@ -6352,7 +6352,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); out_dev: mutex_unlock(&devlink_mutex); @@ -6515,12 +6515,12 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, if (idx < start || !devlink->ops->info_get) goto inc; - mutex_lock(&devlink->lock); + devl_lock(devlink); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); if (err == -EOPNOTSUPP) err = 0; else if (err) { @@ -7722,7 +7722,7 @@ retry_rep: if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry_port; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); list_for_each_entry(reporter, &port->reporter_list, list) { @@ -7737,7 +7737,7 @@ retry_rep: cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { mutex_unlock(&port->reporters_lock); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } @@ -7745,7 +7745,7 @@ retry_rep: } mutex_unlock(&port->reporters_lock); } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry_port: devlink_put(devlink); } @@ -8292,7 +8292,7 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { idx++; @@ -8304,13 +8304,13 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -8519,7 +8519,7 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(group_item, &devlink->trap_group_list, list) { if (idx < start) { @@ -8532,13 +8532,13 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -8833,7 +8833,7 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { if (idx < start) { @@ -8846,13 +8846,13 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } @@ -9690,7 +9690,7 @@ int devl_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index) { - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); if (devlink_port_index_exists(devlink, port_index)) return -EEXIST; @@ -9733,9 +9733,9 @@ int devlink_port_register(struct devlink *devlink, { int err; - mutex_lock(&devlink->lock); + devl_lock(devlink); err = devl_port_register(devlink, devlink_port, port_index); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_port_register); @@ -9769,9 +9769,9 @@ void devlink_port_unregister(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; - mutex_lock(&devlink->lock); + devl_lock(devlink); devl_port_unregister(devlink_port); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_port_unregister); @@ -10380,7 +10380,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, struct devlink_sb *devlink_sb; int err = 0; - mutex_lock(&devlink->lock); + devl_lock(devlink); if (devlink_sb_index_exists(devlink, sb_index)) { err = -EEXIST; goto unlock; @@ -10399,7 +10399,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, devlink_sb->egress_tc_count = egress_tc_count; list_add_tail(&devlink_sb->list, &devlink->sb_list); unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_sb_register); @@ -10408,11 +10408,11 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) { struct devlink_sb *devlink_sb; - mutex_lock(&devlink->lock); + devl_lock(devlink); devlink_sb = devlink_sb_get_by_index(devlink, sb_index); WARN_ON(!devlink_sb); list_del(&devlink_sb->list); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); kfree(devlink_sb); } EXPORT_SYMBOL_GPL(devlink_sb_unregister); @@ -10428,9 +10428,9 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); int devlink_dpipe_headers_register(struct devlink *devlink, struct devlink_dpipe_headers *dpipe_headers) { - mutex_lock(&devlink->lock); + devl_lock(devlink); devlink->dpipe_headers = dpipe_headers; - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return 0; } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); @@ -10444,9 +10444,9 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); */ void devlink_dpipe_headers_unregister(struct devlink *devlink) { - mutex_lock(&devlink->lock); + devl_lock(devlink); devlink->dpipe_headers = NULL; - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); @@ -10501,7 +10501,7 @@ int devlink_dpipe_table_register(struct devlink *devlink, if (WARN_ON(!table_ops->size_get)) return -EINVAL; - mutex_lock(&devlink->lock); + devl_lock(devlink); if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, devlink)) { @@ -10522,7 +10522,7 @@ int devlink_dpipe_table_register(struct devlink *devlink, list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); @@ -10538,17 +10538,17 @@ void devlink_dpipe_table_unregister(struct devlink *devlink, { struct devlink_dpipe_table *table; - mutex_lock(&devlink->lock); + devl_lock(devlink); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, devlink); if (!table) goto unlock; list_del_rcu(&table->list); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); kfree_rcu(table, rcu); return; unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); @@ -10580,7 +10580,7 @@ int devlink_resource_register(struct devlink *devlink, top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; - mutex_lock(&devlink->lock); + devl_lock(devlink); resource = devlink_resource_find(devlink, NULL, resource_id); if (resource) { err = -EINVAL; @@ -10620,7 +10620,7 @@ int devlink_resource_register(struct devlink *devlink, INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_resource_register); @@ -10647,7 +10647,7 @@ void devlink_resources_unregister(struct devlink *devlink) { struct devlink_resource *tmp, *child_resource; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, list) { @@ -10656,7 +10656,7 @@ void devlink_resources_unregister(struct devlink *devlink) kfree(child_resource); } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resources_unregister); @@ -10674,7 +10674,7 @@ int devlink_resource_size_get(struct devlink *devlink, struct devlink_resource *resource; int err = 0; - mutex_lock(&devlink->lock); + devl_lock(devlink); resource = devlink_resource_find(devlink, NULL, resource_id); if (!resource) { err = -EINVAL; @@ -10683,7 +10683,7 @@ int devlink_resource_size_get(struct devlink *devlink, *p_resource_size = resource->size_new; resource->size = resource->size_new; out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_resource_size_get); @@ -10703,7 +10703,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink, struct devlink_dpipe_table *table; int err = 0; - mutex_lock(&devlink->lock); + devl_lock(devlink); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, devlink); if (!table) { @@ -10714,7 +10714,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink, table->resource_units = resource_units; table->resource_valid = true; out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); @@ -10734,7 +10734,7 @@ void devlink_resource_occ_get_register(struct devlink *devlink, { struct devlink_resource *resource; - mutex_lock(&devlink->lock); + devl_lock(devlink); resource = devlink_resource_find(devlink, NULL, resource_id); if (WARN_ON(!resource)) goto out; @@ -10743,7 +10743,7 @@ void devlink_resource_occ_get_register(struct devlink *devlink, resource->occ_get = occ_get; resource->occ_get_priv = occ_get_priv; out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register); @@ -10758,7 +10758,7 @@ void devlink_resource_occ_get_unregister(struct devlink *devlink, { struct devlink_resource *resource; - mutex_lock(&devlink->lock); + devl_lock(devlink); resource = devlink_resource_find(devlink, NULL, resource_id); if (WARN_ON(!resource)) goto out; @@ -10767,7 +10767,7 @@ void devlink_resource_occ_get_unregister(struct devlink *devlink, resource->occ_get = NULL; resource->occ_get_priv = NULL; out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister); @@ -11006,7 +11006,7 @@ devlink_region_create(struct devlink *devlink, if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); - mutex_lock(&devlink->lock); + devl_lock(devlink); if (devlink_region_get_by_name(devlink, ops->name)) { err = -EEXIST; @@ -11027,11 +11027,11 @@ devlink_region_create(struct devlink *devlink, list_add_tail(®ion->list, &devlink->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return region; unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(devlink_region_create); @@ -11056,7 +11056,7 @@ devlink_port_region_create(struct devlink_port *port, if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); - mutex_lock(&devlink->lock); + devl_lock(devlink); if (devlink_port_region_get_by_name(port, ops->name)) { err = -EEXIST; @@ -11078,11 +11078,11 @@ devlink_port_region_create(struct devlink_port *port, list_add_tail(®ion->list, &port->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return region; unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(devlink_port_region_create); @@ -11097,7 +11097,7 @@ void devlink_region_destroy(struct devlink_region *region) struct devlink *devlink = region->devlink; struct devlink_snapshot *snapshot, *ts; - mutex_lock(&devlink->lock); + devl_lock(devlink); /* Free all snapshots of region */ list_for_each_entry_safe(snapshot, ts, ®ion->snapshot_list, list) @@ -11106,7 +11106,7 @@ void devlink_region_destroy(struct devlink_region *region) list_del(®ion->list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); kfree(region); } EXPORT_SYMBOL_GPL(devlink_region_destroy); @@ -11130,9 +11130,9 @@ int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { int err; - mutex_lock(&devlink->lock); + devl_lock(devlink); err = __devlink_region_snapshot_id_get(devlink, id); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } @@ -11150,9 +11150,9 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); */ void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id) { - mutex_lock(&devlink->lock); + devl_lock(devlink); __devlink_snapshot_id_decrement(devlink, id); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); @@ -11174,9 +11174,9 @@ int devlink_region_snapshot_create(struct devlink_region *region, struct devlink *devlink = region->devlink; int err; - mutex_lock(&devlink->lock); + devl_lock(devlink); err = __devlink_region_snapshot_create(region, data, snapshot_id); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } @@ -11559,7 +11559,7 @@ int devlink_traps_register(struct devlink *devlink, if (!devlink->ops->trap_init || !devlink->ops->trap_action_set) return -EINVAL; - mutex_lock(&devlink->lock); + devl_lock(devlink); for (i = 0; i < traps_count; i++) { const struct devlink_trap *trap = &traps[i]; @@ -11571,7 +11571,7 @@ int devlink_traps_register(struct devlink *devlink, if (err) goto err_trap_register; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return 0; @@ -11579,7 +11579,7 @@ err_trap_register: err_trap_verify: for (i--; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_traps_register); @@ -11596,7 +11596,7 @@ void devlink_traps_unregister(struct devlink *devlink, { int i; - mutex_lock(&devlink->lock); + devl_lock(devlink); /* Make sure we do not have any packets in-flight while unregistering * traps by disabling all of them and waiting for a grace period. */ @@ -11605,7 +11605,7 @@ void devlink_traps_unregister(struct devlink *devlink, synchronize_rcu(); for (i = traps_count - 1; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_traps_unregister); @@ -11777,7 +11777,7 @@ int devlink_trap_groups_register(struct devlink *devlink, { int i, err; - mutex_lock(&devlink->lock); + devl_lock(devlink); for (i = 0; i < groups_count; i++) { const struct devlink_trap_group *group = &groups[i]; @@ -11789,7 +11789,7 @@ int devlink_trap_groups_register(struct devlink *devlink, if (err) goto err_trap_group_register; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return 0; @@ -11797,7 +11797,7 @@ err_trap_group_register: err_trap_group_verify: for (i--; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_trap_groups_register); @@ -11814,10 +11814,10 @@ void devlink_trap_groups_unregister(struct devlink *devlink, { int i; - mutex_lock(&devlink->lock); + devl_lock(devlink); for (i = groups_count - 1; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister); @@ -11917,7 +11917,7 @@ devlink_trap_policers_register(struct devlink *devlink, { int i, err; - mutex_lock(&devlink->lock); + devl_lock(devlink); for (i = 0; i < policers_count; i++) { const struct devlink_trap_policer *policer = &policers[i]; @@ -11932,7 +11932,7 @@ devlink_trap_policers_register(struct devlink *devlink, if (err) goto err_trap_policer_register; } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return 0; @@ -11940,7 +11940,7 @@ err_trap_policer_register: err_trap_policer_verify: for (i--; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_trap_policers_register); @@ -11958,10 +11958,10 @@ devlink_trap_policers_unregister(struct devlink *devlink, { int i; - mutex_lock(&devlink->lock); + devl_lock(devlink); for (i = policers_count - 1; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister); @@ -12015,9 +12015,9 @@ void devlink_compat_running_version(struct devlink *devlink, if (!devlink->ops->info_get) return; - mutex_lock(&devlink->lock); + devl_lock(devlink); __devlink_compat_running_version(devlink, buf, len); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) @@ -12032,11 +12032,11 @@ int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) if (ret) return ret; - mutex_lock(&devlink->lock); + devl_lock(devlink); devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, NULL); devlink_flash_update_end_notify(devlink); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); release_firmware(params.fw); -- cgit v1.2.3 From 96a233e600df351bcb06e3c20efe048855552926 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 14 Jul 2022 11:51:01 +0100 Subject: bpf: Add endian modifiers to fix endian warnings A couple of the syscalls which load values (bpf_skb_load_helper_16() and bpf_skb_load_helper_32()) are using u16/u32 types which are triggering warnings as they are then converted from big-endian to CPU-endian. Fix these by making the types __be instead. Fixes the following sparse warnings: net/core/filter.c:246:32: warning: cast to restricted __be16 net/core/filter.c:246:32: warning: cast to restricted __be16 net/core/filter.c:246:32: warning: cast to restricted __be16 net/core/filter.c:246:32: warning: cast to restricted __be16 net/core/filter.c:273:32: warning: cast to restricted __be32 net/core/filter.c:273:32: warning: cast to restricted __be32 net/core/filter.c:273:32: warning: cast to restricted __be32 net/core/filter.c:273:32: warning: cast to restricted __be32 net/core/filter.c:273:32: warning: cast to restricted __be32 net/core/filter.c:273:32: warning: cast to restricted __be32 Signed-off-by: Ben Dooks Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220714105101.297304-1-ben.dooks@sifive.com --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 4ef77ec5255e..289614887ed5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -237,7 +237,7 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u16 tmp, *ptr; + __be16 tmp, *ptr; const int len = sizeof(tmp); if (offset >= 0) { @@ -264,7 +264,7 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u32 tmp, *ptr; + __be32 tmp, *ptr; const int len = sizeof(tmp); if (likely(offset >= 0)) { -- cgit v1.2.3 From 9a7923668bc779dd601d94704b4dd895a2ef6a77 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Jul 2022 16:18:51 +0200 Subject: net: devlink: make devlink_dpipe_headers_register() return void The return value is not used, so change the return value type to void. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 6 ++---- include/net/devlink.h | 2 +- net/core/devlink.c | 5 ++--- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net/core') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 5d494fabf93d..c2540292702d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -1266,10 +1266,8 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int err; - err = devlink_dpipe_headers_register(devlink, - &mlxsw_sp_dpipe_headers); - if (err) - return err; + devlink_dpipe_headers_register(devlink, &mlxsw_sp_dpipe_headers); + err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); if (err) goto err_erif_table_init; diff --git a/include/net/devlink.h b/include/net/devlink.h index b1b5c19a8316..88c701b375a2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1590,7 +1590,7 @@ int devlink_dpipe_table_register(struct devlink *devlink, void *priv, bool counter_control_extern); void devlink_dpipe_table_unregister(struct devlink *devlink, const char *table_name); -int devlink_dpipe_headers_register(struct devlink *devlink, +void devlink_dpipe_headers_register(struct devlink *devlink, struct devlink_dpipe_headers *dpipe_headers); void devlink_dpipe_headers_unregister(struct devlink *devlink); bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, diff --git a/net/core/devlink.c b/net/core/devlink.c index 2b2e454ebd78..c261bba9ab76 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10425,13 +10425,12 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); * * Register the headers supported by hardware. */ -int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers *dpipe_headers) +void devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) { devl_lock(devlink); devlink->dpipe_headers = dpipe_headers; devl_unlock(devlink); - return 0; } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); -- cgit v1.2.3 From ced92571af24b66c2b5b32a489c144b0b45c93a3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Jul 2022 16:18:52 +0200 Subject: net: devlink: fix a typo in function name devlink_port_new_notifiy() Fix the typo in a name of devlink_port_new_notifiy() function. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index c261bba9ab76..2f22ce33c3ec 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1700,9 +1700,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink->ops->port_unsplit(devlink, devlink_port, info->extack); } -static int devlink_port_new_notifiy(struct devlink *devlink, - unsigned int port_index, - struct genl_info *info) +static int devlink_port_new_notify(struct devlink *devlink, + unsigned int port_index, + struct genl_info *info) { struct devlink_port *devlink_port; struct sk_buff *msg; @@ -1775,7 +1775,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, if (err) return err; - err = devlink_port_new_notifiy(devlink, new_port_index, info); + err = devlink_port_new_notify(devlink, new_port_index, info); if (err && err != -ENODEV) { /* Fail to send the response; destroy newly created port. */ devlink->ops->port_del(devlink, new_port_index, extack); -- cgit v1.2.3 From a44c4511ffb2c7387fce6061d20febf75f5399bb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Jul 2022 16:18:53 +0200 Subject: net: devlink: fix return statement in devlink_port_new_notify() Return directly without intermediate value store at the end of devlink_port_new_notify() function. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 2f22ce33c3ec..a9776ea923ae 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1724,8 +1724,7 @@ static int devlink_port_new_notify(struct devlink *devlink, if (err) goto out; - err = genlmsg_reply(msg, info); - return err; + return genlmsg_reply(msg, info); out: nlmsg_free(msg); -- cgit v1.2.3 From e26fde2f5befad0951fe6345403616bf51e901be Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 16 Jul 2022 13:02:33 +0200 Subject: net: devlink: avoid false DEADLOCK warning reported by lockdep Add a lock_class_key per devlink instance to avoid DEADLOCK warning by lockdep, while locking more than one devlink instance in driver code, for example in opening VFs flow. Kernel log: [ 101.433802] ============================================ [ 101.433803] WARNING: possible recursive locking detected [ 101.433810] 5.19.0-rc1+ #35 Not tainted [ 101.433812] -------------------------------------------- [ 101.433813] bash/892 is trying to acquire lock: [ 101.433815] ffff888127bfc2f8 (&devlink->lock){+.+.}-{3:3}, at: probe_one+0x3c/0x690 [mlx5_core] [ 101.433909] but task is already holding lock: [ 101.433910] ffff888118f4c2f8 (&devlink->lock){+.+.}-{3:3}, at: mlx5_core_sriov_configure+0x62/0x280 [mlx5_core] [ 101.433989] other info that might help us debug this: [ 101.433990] Possible unsafe locking scenario: [ 101.433991] CPU0 [ 101.433991] ---- [ 101.433992] lock(&devlink->lock); [ 101.433993] lock(&devlink->lock); [ 101.433995] *** DEADLOCK *** [ 101.433996] May be due to missing lock nesting notation [ 101.433996] 6 locks held by bash/892: [ 101.433998] #0: ffff88810eb50448 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0xf3/0x1d0 [ 101.434009] #1: ffff888114777c88 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x20d/0x520 [ 101.434017] #2: ffff888102b58660 (kn->active#231){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x230/0x520 [ 101.434023] #3: ffff888102d70198 (&dev->mutex){....}-{3:3}, at: sriov_numvfs_store+0x132/0x310 [ 101.434031] #4: ffff888118f4c2f8 (&devlink->lock){+.+.}-{3:3}, at: mlx5_core_sriov_configure+0x62/0x280 [mlx5_core] [ 101.434108] #5: ffff88812adce198 (&dev->mutex){....}-{3:3}, at: __device_attach+0x76/0x430 [ 101.434116] stack backtrace: [ 101.434118] CPU: 5 PID: 892 Comm: bash Not tainted 5.19.0-rc1+ #35 [ 101.434120] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 101.434130] Call Trace: [ 101.434133] [ 101.434135] dump_stack_lvl+0x57/0x7d [ 101.434145] __lock_acquire.cold+0x1df/0x3e7 [ 101.434151] ? register_lock_class+0x1880/0x1880 [ 101.434157] lock_acquire+0x1c1/0x550 [ 101.434160] ? probe_one+0x3c/0x690 [mlx5_core] [ 101.434229] ? lockdep_hardirqs_on_prepare+0x400/0x400 [ 101.434232] ? __xa_alloc+0x1ed/0x2d0 [ 101.434236] ? ksys_write+0xf3/0x1d0 [ 101.434239] __mutex_lock+0x12c/0x14b0 [ 101.434243] ? probe_one+0x3c/0x690 [mlx5_core] [ 101.434312] ? probe_one+0x3c/0x690 [mlx5_core] [ 101.434380] ? devlink_alloc_ns+0x11b/0x910 [ 101.434385] ? mutex_lock_io_nested+0x1320/0x1320 [ 101.434388] ? lockdep_init_map_type+0x21a/0x7d0 [ 101.434391] ? lockdep_init_map_type+0x21a/0x7d0 [ 101.434393] ? __init_swait_queue_head+0x70/0xd0 [ 101.434397] probe_one+0x3c/0x690 [mlx5_core] [ 101.434467] pci_device_probe+0x1b4/0x480 [ 101.434471] really_probe+0x1e0/0xaa0 [ 101.434474] __driver_probe_device+0x219/0x480 [ 101.434478] driver_probe_device+0x49/0x130 [ 101.434481] __device_attach_driver+0x1b8/0x280 [ 101.434484] ? driver_allows_async_probing+0x140/0x140 [ 101.434487] bus_for_each_drv+0x123/0x1a0 [ 101.434489] ? bus_for_each_dev+0x1a0/0x1a0 [ 101.434491] ? lockdep_hardirqs_on_prepare+0x286/0x400 [ 101.434494] ? trace_hardirqs_on+0x2d/0x100 [ 101.434498] __device_attach+0x1a3/0x430 [ 101.434501] ? device_driver_attach+0x1e0/0x1e0 [ 101.434503] ? pci_bridge_d3_possible+0x1e0/0x1e0 [ 101.434506] ? pci_create_resource_files+0xeb/0x190 [ 101.434511] pci_bus_add_device+0x6c/0xa0 [ 101.434514] pci_iov_add_virtfn+0x9e4/0xe00 [ 101.434517] ? trace_hardirqs_on+0x2d/0x100 [ 101.434521] sriov_enable+0x64a/0xca0 [ 101.434524] ? pcibios_sriov_disable+0x10/0x10 [ 101.434528] mlx5_core_sriov_configure+0xab/0x280 [mlx5_core] [ 101.434602] sriov_numvfs_store+0x20a/0x310 [ 101.434605] ? sriov_totalvfs_show+0xc0/0xc0 [ 101.434608] ? sysfs_file_ops+0x170/0x170 [ 101.434611] ? sysfs_file_ops+0x117/0x170 [ 101.434614] ? sysfs_file_ops+0x170/0x170 [ 101.434616] kernfs_fop_write_iter+0x348/0x520 [ 101.434619] new_sync_write+0x2e5/0x520 [ 101.434621] ? new_sync_read+0x520/0x520 [ 101.434624] ? lock_acquire+0x1c1/0x550 [ 101.434626] ? lockdep_hardirqs_on_prepare+0x400/0x400 [ 101.434630] vfs_write+0x5cb/0x8d0 [ 101.434633] ksys_write+0xf3/0x1d0 [ 101.434635] ? __x64_sys_read+0xb0/0xb0 [ 101.434638] ? lockdep_hardirqs_on_prepare+0x286/0x400 [ 101.434640] ? syscall_enter_from_user_mode+0x1d/0x50 [ 101.434643] do_syscall_64+0x3d/0x90 [ 101.434647] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 101.434650] RIP: 0033:0x7f5ff536b2f7 [ 101.434658] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 [ 101.434661] RSP: 002b:00007ffd9ea85d58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 101.434664] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f5ff536b2f7 [ 101.434666] RDX: 0000000000000002 RSI: 000055c4c279e230 RDI: 0000000000000001 [ 101.434668] RBP: 000055c4c279e230 R08: 000000000000000a R09: 0000000000000001 [ 101.434669] R10: 000055c4c283cbf0 R11: 0000000000000246 R12: 0000000000000002 [ 101.434670] R13: 00007f5ff543d500 R14: 0000000000000002 R15: 00007f5ff543d700 [ 101.434673] Signed-off-by: Moshe Shemesh Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index a9776ea923ae..d2a4e6ee1be6 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -66,6 +66,7 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ struct mutex lock; + struct lock_class_key lock_key; u8 reload_failed:1; refcount_t refcount; struct completion comp; @@ -9472,7 +9473,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_list); INIT_LIST_HEAD(&devlink->trap_group_list); INIT_LIST_HEAD(&devlink->trap_policer_list); + lockdep_register_key(&devlink->lock_key); mutex_init(&devlink->lock); + lockdep_set_class(&devlink->lock, &devlink->lock_key); mutex_init(&devlink->reporters_lock); mutex_init(&devlink->linecards_lock); refcount_set(&devlink->refcount, 1); @@ -9619,6 +9622,7 @@ void devlink_free(struct devlink *devlink) mutex_destroy(&devlink->linecards_lock); mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); + lockdep_unregister_key(&devlink->lock_key); WARN_ON(!list_empty(&devlink->trap_policer_list)); WARN_ON(!list_empty(&devlink->trap_group_list)); WARN_ON(!list_empty(&devlink->trap_list)); -- cgit v1.2.3 From 852e85a704c2e11c050bdea286bc438aba4f4a22 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:34 +0200 Subject: net: devlink: add unlocked variants of devling_trap*() functions Add unlocked variants of devl_trap*() functions to be used in drivers called-in with devlink->lock held. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 20 ++++++ net/core/devlink.c | 180 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 168 insertions(+), 32 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 88c701b375a2..fb1e17d998b6 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1745,9 +1745,15 @@ void devlink_flash_update_timeout_notify(struct devlink *devlink, const char *component, unsigned long timeout); +int devl_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv); int devlink_traps_register(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count, void *priv); +void devl_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count); void devlink_traps_unregister(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count); @@ -1755,17 +1761,31 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, void *trap_ctx, struct devlink_port *in_devlink_port, const struct flow_action_cookie *fa_cookie); void *devlink_trap_ctx_priv(void *trap_ctx); +int devl_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count); int devlink_trap_groups_register(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count); +void devl_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count); void devlink_trap_groups_unregister(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count); int +devl_trap_policers_register(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count); +int devlink_trap_policers_register(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count); void +devl_trap_policers_unregister(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count); +void devlink_trap_policers_unregister(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count); diff --git a/net/core/devlink.c b/net/core/devlink.c index d2a4e6ee1be6..b0f6e8388880 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -11544,7 +11544,7 @@ static void devlink_trap_disable(struct devlink *devlink, } /** - * devlink_traps_register - Register packet traps with devlink. + * devl_traps_register - Register packet traps with devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. @@ -11552,16 +11552,16 @@ static void devlink_trap_disable(struct devlink *devlink, * * Return: Non-zero value on failure. */ -int devlink_traps_register(struct devlink *devlink, - const struct devlink_trap *traps, - size_t traps_count, void *priv) +int devl_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv) { int i, err; if (!devlink->ops->trap_init || !devlink->ops->trap_action_set) return -EINVAL; - devl_lock(devlink); + devl_assert_locked(devlink); for (i = 0; i < traps_count; i++) { const struct devlink_trap *trap = &traps[i]; @@ -11573,7 +11573,6 @@ int devlink_traps_register(struct devlink *devlink, if (err) goto err_trap_register; } - devl_unlock(devlink); return 0; @@ -11581,24 +11580,47 @@ err_trap_register: err_trap_verify: for (i--; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); + return err; +} +EXPORT_SYMBOL_GPL(devl_traps_register); + +/** + * devlink_traps_register - Register packet traps with devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + * @priv: Driver private information. + * + * Context: Takes and release devlink->lock . + * + * Return: Non-zero value on failure. + */ +int devlink_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv) +{ + int err; + + devl_lock(devlink); + err = devl_traps_register(devlink, traps, traps_count, priv); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_traps_register); /** - * devlink_traps_unregister - Unregister packet traps from devlink. + * devl_traps_unregister - Unregister packet traps from devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. */ -void devlink_traps_unregister(struct devlink *devlink, - const struct devlink_trap *traps, - size_t traps_count) +void devl_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count) { int i; - devl_lock(devlink); + devl_assert_locked(devlink); /* Make sure we do not have any packets in-flight while unregistering * traps by disabling all of them and waiting for a grace period. */ @@ -11607,6 +11629,23 @@ void devlink_traps_unregister(struct devlink *devlink, synchronize_rcu(); for (i = traps_count - 1; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); +} +EXPORT_SYMBOL_GPL(devl_traps_unregister); + +/** + * devlink_traps_unregister - Unregister packet traps from devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + * + * Context: Takes and release devlink->lock . + */ +void devlink_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count) +{ + devl_lock(devlink); + devl_traps_unregister(devlink, traps, traps_count); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_traps_unregister); @@ -11766,20 +11805,20 @@ devlink_trap_group_unregister(struct devlink *devlink, } /** - * devlink_trap_groups_register - Register packet trap groups with devlink. + * devl_trap_groups_register - Register packet trap groups with devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. * * Return: Non-zero value on failure. */ -int devlink_trap_groups_register(struct devlink *devlink, - const struct devlink_trap_group *groups, - size_t groups_count) +int devl_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) { int i, err; - devl_lock(devlink); + devl_assert_locked(devlink); for (i = 0; i < groups_count; i++) { const struct devlink_trap_group *group = &groups[i]; @@ -11791,7 +11830,6 @@ int devlink_trap_groups_register(struct devlink *devlink, if (err) goto err_trap_group_register; } - devl_unlock(devlink); return 0; @@ -11799,26 +11837,65 @@ err_trap_group_register: err_trap_group_verify: for (i--; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); + return err; +} +EXPORT_SYMBOL_GPL(devl_trap_groups_register); + +/** + * devlink_trap_groups_register - Register packet trap groups with devlink. + * @devlink: devlink. + * @groups: Packet trap groups. + * @groups_count: Count of provided packet trap groups. + * + * Context: Takes and release devlink->lock . + * + * Return: Non-zero value on failure. + */ +int devlink_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) +{ + int err; + + devl_lock(devlink); + err = devl_trap_groups_register(devlink, groups, groups_count); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_trap_groups_register); +/** + * devl_trap_groups_unregister - Unregister packet trap groups from devlink. + * @devlink: devlink. + * @groups: Packet trap groups. + * @groups_count: Count of provided packet trap groups. + */ +void devl_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) +{ + int i; + + devl_assert_locked(devlink); + for (i = groups_count - 1; i >= 0; i--) + devlink_trap_group_unregister(devlink, &groups[i]); +} +EXPORT_SYMBOL_GPL(devl_trap_groups_unregister); + /** * devlink_trap_groups_unregister - Unregister packet trap groups from devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. + * + * Context: Takes and release devlink->lock . */ void devlink_trap_groups_unregister(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count) { - int i; - devl_lock(devlink); - for (i = groups_count - 1; i >= 0; i--) - devlink_trap_group_unregister(devlink, &groups[i]); + devl_trap_groups_unregister(devlink, groups, groups_count); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister); @@ -11905,7 +11982,7 @@ devlink_trap_policer_unregister(struct devlink *devlink, } /** - * devlink_trap_policers_register - Register packet trap policers with devlink. + * devl_trap_policers_register - Register packet trap policers with devlink. * @devlink: devlink. * @policers: Packet trap policers. * @policers_count: Count of provided packet trap policers. @@ -11913,13 +11990,13 @@ devlink_trap_policer_unregister(struct devlink *devlink, * Return: Non-zero value on failure. */ int -devlink_trap_policers_register(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count) +devl_trap_policers_register(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count) { int i, err; - devl_lock(devlink); + devl_assert_locked(devlink); for (i = 0; i < policers_count; i++) { const struct devlink_trap_policer *policer = &policers[i]; @@ -11934,35 +12011,74 @@ devlink_trap_policers_register(struct devlink *devlink, if (err) goto err_trap_policer_register; } - devl_unlock(devlink); - return 0; err_trap_policer_register: err_trap_policer_verify: for (i--; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); + return err; +} +EXPORT_SYMBOL_GPL(devl_trap_policers_register); + +/** + * devlink_trap_policers_register - Register packet trap policers with devlink. + * @devlink: devlink. + * @policers: Packet trap policers. + * @policers_count: Count of provided packet trap policers. + * + * Return: Non-zero value on failure. + * + * Context: Takes and release devlink->lock . + */ +int +devlink_trap_policers_register(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count) +{ + int err; + + devl_lock(devlink); + err = devl_trap_policers_register(devlink, policers, policers_count); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_trap_policers_register); +/** + * devl_trap_policers_unregister - Unregister packet trap policers from devlink. + * @devlink: devlink. + * @policers: Packet trap policers. + * @policers_count: Count of provided packet trap policers. + */ +void +devl_trap_policers_unregister(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count) +{ + int i; + + devl_assert_locked(devlink); + for (i = policers_count - 1; i >= 0; i--) + devlink_trap_policer_unregister(devlink, &policers[i]); +} +EXPORT_SYMBOL_GPL(devl_trap_policers_unregister); + /** * devlink_trap_policers_unregister - Unregister packet trap policers from devlink. * @devlink: devlink. * @policers: Packet trap policers. * @policers_count: Count of provided packet trap policers. + * + * Context: Takes and release devlink->lock . */ void devlink_trap_policers_unregister(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count) { - int i; - devl_lock(devlink); - for (i = policers_count - 1; i >= 0; i--) - devlink_trap_policer_unregister(devlink, &policers[i]); + devl_trap_policers_unregister(devlink, policers, policers_count); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister); -- cgit v1.2.3 From c223d6a4bf6d2a88f2418316393f9170f81c54f0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:35 +0200 Subject: net: devlink: add unlocked variants of devlink_resource*() functions Add unlocked variants of devlink_resource*() functions to be used in drivers called-in with devlink->lock held. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 17 ++++ net/core/devlink.c | 217 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 173 insertions(+), 61 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index fb1e17d998b6..d341753753ce 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1608,23 +1608,40 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; +int devl_resource_register(struct devlink *devlink, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params); int devlink_resource_register(struct devlink *devlink, const char *resource_name, u64 resource_size, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); +void devl_resources_unregister(struct devlink *devlink); void devlink_resources_unregister(struct devlink *devlink); +int devl_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size); int devlink_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); int devlink_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units); +void devl_resource_occ_get_register(struct devlink *devlink, + u64 resource_id, + devlink_resource_occ_get_t *occ_get, + void *occ_get_priv); void devlink_resource_occ_get_register(struct devlink *devlink, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv); +void devl_resource_occ_get_unregister(struct devlink *devlink, + u64 resource_id); + void devlink_resource_occ_get_unregister(struct devlink *devlink, u64 resource_id); int devlink_params_register(struct devlink *devlink, diff --git a/net/core/devlink.c b/net/core/devlink.c index b0f6e8388880..1688271ef7b2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10555,45 +10555,41 @@ unlock: EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); /** - * devlink_resource_register - devlink resource register + * devl_resource_register - devlink resource register * - * @devlink: devlink - * @resource_name: resource's name - * @resource_size: resource's size - * @resource_id: resource's id - * @parent_resource_id: resource's parent id - * @size_params: size parameters + * @devlink: devlink + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @size_params: size parameters * - * Generic resources should reuse the same names across drivers. - * Please see the generic resources list at: - * Documentation/networking/devlink/devlink-resource.rst + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst */ -int devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) +int devl_resource_register(struct devlink *devlink, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params) { struct devlink_resource *resource; struct list_head *resource_list; bool top_hierarchy; - int err = 0; + + lockdep_assert_held(&devlink->lock); top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; - devl_lock(devlink); resource = devlink_resource_find(devlink, NULL, resource_id); - if (resource) { - err = -EINVAL; - goto out; - } + if (resource) + return -EINVAL; resource = kzalloc(sizeof(*resource), GFP_KERNEL); - if (!resource) { - err = -ENOMEM; - goto out; - } + if (!resource) + return -ENOMEM; if (top_hierarchy) { resource_list = &devlink->resource_list; @@ -10607,8 +10603,7 @@ int devlink_resource_register(struct devlink *devlink, resource->parent = parent_resource; } else { kfree(resource); - err = -EINVAL; - goto out; + return -EINVAL; } } @@ -10621,7 +10616,39 @@ int devlink_resource_register(struct devlink *devlink, sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); -out: + + return 0; +} +EXPORT_SYMBOL_GPL(devl_resource_register); + +/** + * devlink_resource_register - devlink resource register + * + * @devlink: devlink + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @size_params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst + * + * Context: Takes and release devlink->lock . + */ +int devlink_resource_register(struct devlink *devlink, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params) +{ + int err; + + devl_lock(devlink); + err = devl_resource_register(devlink, resource_name, resource_size, + resource_id, parent_resource_id, size_params); devl_unlock(devlink); return err; } @@ -10641,15 +10668,15 @@ static void devlink_resource_unregister(struct devlink *devlink, } /** - * devlink_resources_unregister - free all resources + * devl_resources_unregister - free all resources * - * @devlink: devlink + * @devlink: devlink */ -void devlink_resources_unregister(struct devlink *devlink) +void devl_resources_unregister(struct devlink *devlink) { struct devlink_resource *tmp, *child_resource; - devl_lock(devlink); + lockdep_assert_held(&devlink->lock); list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, list) { @@ -10657,34 +10684,65 @@ void devlink_resources_unregister(struct devlink *devlink) list_del(&child_resource->list); kfree(child_resource); } +} +EXPORT_SYMBOL_GPL(devl_resources_unregister); +/** + * devlink_resources_unregister - free all resources + * + * @devlink: devlink + * + * Context: Takes and release devlink->lock . + */ +void devlink_resources_unregister(struct devlink *devlink) +{ + devl_lock(devlink); + devl_resources_unregister(devlink); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resources_unregister); +/** + * devl_resource_size_get - get and update size + * + * @devlink: devlink + * @resource_id: the requested resource id + * @p_resource_size: ptr to update + */ +int devl_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size) +{ + struct devlink_resource *resource; + + lockdep_assert_held(&devlink->lock); + + resource = devlink_resource_find(devlink, NULL, resource_id); + if (!resource) + return -EINVAL; + *p_resource_size = resource->size_new; + resource->size = resource->size_new; + return 0; +} +EXPORT_SYMBOL_GPL(devl_resource_size_get); + /** * devlink_resource_size_get - get and update size * * @devlink: devlink * @resource_id: the requested resource id * @p_resource_size: ptr to update + * + * Context: Takes and release devlink->lock . */ int devlink_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size) { - struct devlink_resource *resource; - int err = 0; + int err; devl_lock(devlink); - resource = devlink_resource_find(devlink, NULL, resource_id); - if (!resource) { - err = -EINVAL; - goto out; - } - *p_resource_size = resource->size_new; - resource->size = resource->size_new; -out: + err = devl_resource_size_get(devlink, resource_id, p_resource_size); devl_unlock(devlink); return err; } @@ -10721,6 +10779,33 @@ out: } EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); +/** + * devl_resource_occ_get_register - register occupancy getter + * + * @devlink: devlink + * @resource_id: resource id + * @occ_get: occupancy getter callback + * @occ_get_priv: occupancy getter callback priv + */ +void devl_resource_occ_get_register(struct devlink *devlink, + u64 resource_id, + devlink_resource_occ_get_t *occ_get, + void *occ_get_priv) +{ + struct devlink_resource *resource; + + lockdep_assert_held(&devlink->lock); + + resource = devlink_resource_find(devlink, NULL, resource_id); + if (WARN_ON(!resource)) + return; + WARN_ON(resource->occ_get); + + resource->occ_get = occ_get; + resource->occ_get_priv = occ_get_priv; +} +EXPORT_SYMBOL_GPL(devl_resource_occ_get_register); + /** * devlink_resource_occ_get_register - register occupancy getter * @@ -10728,47 +10813,57 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); * @resource_id: resource id * @occ_get: occupancy getter callback * @occ_get_priv: occupancy getter callback priv + * + * Context: Takes and release devlink->lock . */ void devlink_resource_occ_get_register(struct devlink *devlink, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv) { - struct devlink_resource *resource; - devl_lock(devlink); - resource = devlink_resource_find(devlink, NULL, resource_id); - if (WARN_ON(!resource)) - goto out; - WARN_ON(resource->occ_get); - - resource->occ_get = occ_get; - resource->occ_get_priv = occ_get_priv; -out: + devl_resource_occ_get_register(devlink, resource_id, + occ_get, occ_get_priv); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register); /** - * devlink_resource_occ_get_unregister - unregister occupancy getter + * devl_resource_occ_get_unregister - unregister occupancy getter * - * @devlink: devlink - * @resource_id: resource id + * @devlink: devlink + * @resource_id: resource id */ -void devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id) +void devl_resource_occ_get_unregister(struct devlink *devlink, + u64 resource_id) { struct devlink_resource *resource; - devl_lock(devlink); + lockdep_assert_held(&devlink->lock); + resource = devlink_resource_find(devlink, NULL, resource_id); if (WARN_ON(!resource)) - goto out; + return; WARN_ON(!resource->occ_get); resource->occ_get = NULL; resource->occ_get_priv = NULL; -out: +} +EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister); + +/** + * devlink_resource_occ_get_unregister - unregister occupancy getter + * + * @devlink: devlink + * @resource_id: resource id + * + * Context: Takes and release devlink->lock . + */ +void devlink_resource_occ_get_unregister(struct devlink *devlink, + u64 resource_id) +{ + devl_lock(devlink); + devl_resource_occ_get_unregister(devlink, resource_id); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister); -- cgit v1.2.3 From 755cfa69c4ece770c5a15dd51a9da2a7aafafa7c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:36 +0200 Subject: net: devlink: add unlocked variants of devlink_sb*() functions Add unlocked variants of devlink_sb*() functions to be used in drivers called-in with devlink->lock held. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 5 +++++ net/core/devlink.c | 54 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 41 insertions(+), 18 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index d341753753ce..0057809a13b0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1579,10 +1579,15 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); +int devl_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count); +void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, diff --git a/net/core/devlink.c b/net/core/devlink.c index 1688271ef7b2..64dab4024d11 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10375,25 +10375,21 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); -int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, - u32 size, u16 ingress_pools_count, - u16 egress_pools_count, u16 ingress_tc_count, - u16 egress_tc_count) +int devl_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) { struct devlink_sb *devlink_sb; - int err = 0; - devl_lock(devlink); - if (devlink_sb_index_exists(devlink, sb_index)) { - err = -EEXIST; - goto unlock; - } + lockdep_assert_held(&devlink->lock); + + if (devlink_sb_index_exists(devlink, sb_index)) + return -EEXIST; devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL); - if (!devlink_sb) { - err = -ENOMEM; - goto unlock; - } + if (!devlink_sb) + return -ENOMEM; devlink_sb->index = sb_index; devlink_sb->size = size; devlink_sb->ingress_pools_count = ingress_pools_count; @@ -10401,23 +10397,45 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, devlink_sb->ingress_tc_count = ingress_tc_count; devlink_sb->egress_tc_count = egress_tc_count; list_add_tail(&devlink_sb->list, &devlink->sb_list); -unlock: + return 0; +} +EXPORT_SYMBOL_GPL(devl_sb_register); + +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) +{ + int err; + + devl_lock(devlink); + err = devl_sb_register(devlink, sb_index, size, ingress_pools_count, + egress_pools_count, ingress_tc_count, + egress_tc_count); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_sb_register); -void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index) { struct devlink_sb *devlink_sb; - devl_lock(devlink); + lockdep_assert_held(&devlink->lock); + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); WARN_ON(!devlink_sb); list_del(&devlink_sb->list); - devl_unlock(devlink); kfree(devlink_sb); } +EXPORT_SYMBOL_GPL(devl_sb_unregister); + +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +{ + devl_lock(devlink); + devl_sb_unregister(devlink, sb_index); + devl_unlock(devlink); +} EXPORT_SYMBOL_GPL(devlink_sb_unregister); /** -- cgit v1.2.3 From 70a2ff89369d17c55efacb789171c3bd05c21817 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:37 +0200 Subject: net: devlink: add unlocked variants of devlink_dpipe*() functions Add unlocked variants of devlink_dpipe*() functions to be used in drivers called-in with devlink->lock held. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 12 ++++ net/core/devlink.c | 181 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 147 insertions(+), 46 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0057809a13b0..18ad88527847 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1589,14 +1589,23 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u16 egress_tc_count); void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); +int devl_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, bool counter_control_extern); int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern); +void devl_dpipe_table_unregister(struct devlink *devlink, + const char *table_name); void devlink_dpipe_table_unregister(struct devlink *devlink, const char *table_name); +void devl_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers); void devlink_dpipe_headers_register(struct devlink *devlink, struct devlink_dpipe_headers *dpipe_headers); +void devl_dpipe_headers_unregister(struct devlink *devlink); void devlink_dpipe_headers_unregister(struct devlink *devlink); bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, const char *table_name); @@ -1633,6 +1642,9 @@ int devl_resource_size_get(struct devlink *devlink, int devlink_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); +int devl_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units); int devlink_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units); diff --git a/net/core/devlink.c b/net/core/devlink.c index 64dab4024d11..b249c18a8bbc 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10438,6 +10438,23 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) } EXPORT_SYMBOL_GPL(devlink_sb_unregister); +/** + * devl_dpipe_headers_register - register dpipe headers + * + * @devlink: devlink + * @dpipe_headers: dpipe header array + * + * Register the headers supported by hardware. + */ +void devl_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) +{ + lockdep_assert_held(&devlink->lock); + + devlink->dpipe_headers = dpipe_headers; +} +EXPORT_SYMBOL_GPL(devl_dpipe_headers_register); + /** * devlink_dpipe_headers_register - register dpipe headers * @@ -10445,27 +10462,46 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); * @dpipe_headers: dpipe header array * * Register the headers supported by hardware. + * + * Context: Takes and release devlink->lock . */ void devlink_dpipe_headers_register(struct devlink *devlink, struct devlink_dpipe_headers *dpipe_headers) { devl_lock(devlink); - devlink->dpipe_headers = dpipe_headers; + devl_dpipe_headers_register(devlink, dpipe_headers); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); +/** + * devl_dpipe_headers_unregister - unregister dpipe headers + * + * @devlink: devlink + * + * Unregister the headers supported by hardware. + */ +void devl_dpipe_headers_unregister(struct devlink *devlink) +{ + lockdep_assert_held(&devlink->lock); + + devlink->dpipe_headers = NULL; +} +EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister); + /** * devlink_dpipe_headers_unregister - unregister dpipe headers * * @devlink: devlink * * Unregister the headers supported by hardware. + * + * Context: Takes and release devlink->lock . */ void devlink_dpipe_headers_unregister(struct devlink *devlink) { devl_lock(devlink); - devlink->dpipe_headers = NULL; + devl_dpipe_headers_unregister(devlink); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); @@ -10502,38 +10538,33 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); /** - * devlink_dpipe_table_register - register dpipe table + * devl_dpipe_table_register - register dpipe table * - * @devlink: devlink - * @table_name: table name - * @table_ops: table ops - * @priv: priv - * @counter_control_extern: external control for counters + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @counter_control_extern: external control for counters */ -int devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) +int devl_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; - int err = 0; + + lockdep_assert_held(&devlink->lock); if (WARN_ON(!table_ops->size_get)) return -EINVAL; - devl_lock(devlink); - if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, - devlink)) { - err = -EEXIST; - goto unlock; - } + devlink)) + return -EEXIST; table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) { - err = -ENOMEM; - goto unlock; - } + if (!table) + return -ENOMEM; table->name = table_name; table->table_ops = table_ops; @@ -10541,33 +10572,72 @@ int devlink_dpipe_table_register(struct devlink *devlink, table->counter_control_extern = counter_control_extern; list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); -unlock: + + return 0; +} +EXPORT_SYMBOL_GPL(devl_dpipe_table_register); + +/** + * devlink_dpipe_table_register - register dpipe table + * + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @counter_control_extern: external control for counters + * + * Context: Takes and release devlink->lock . + */ +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, bool counter_control_extern) +{ + int err; + + devl_lock(devlink); + err = devl_dpipe_table_register(devlink, table_name, table_ops, priv, + counter_control_extern); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); /** - * devlink_dpipe_table_unregister - unregister dpipe table + * devl_dpipe_table_unregister - unregister dpipe table * - * @devlink: devlink - * @table_name: table name + * @devlink: devlink + * @table_name: table name */ -void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) +void devl_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) { struct devlink_dpipe_table *table; - devl_lock(devlink); + lockdep_assert_held(&devlink->lock); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, devlink); if (!table) - goto unlock; + return; list_del_rcu(&table->list); - devl_unlock(devlink); kfree_rcu(table, rcu); - return; -unlock: +} +EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister); + +/** + * devlink_dpipe_table_unregister - unregister dpipe table + * + * @devlink: devlink + * @table_name: table name + * + * Context: Takes and release devlink->lock . + */ +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ + devl_lock(devlink); + devl_dpipe_table_unregister(devlink, table_name); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); @@ -10766,6 +10836,32 @@ int devlink_resource_size_get(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_resource_size_get); +/** + * devl_dpipe_table_resource_set - set the resource id + * + * @devlink: devlink + * @table_name: table name + * @resource_id: resource id + * @resource_units: number of resource's units consumed per table's entry + */ +int devl_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units) +{ + struct devlink_dpipe_table *table; + + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name, devlink); + if (!table) + return -EINVAL; + + table->resource_id = resource_id; + table->resource_units = resource_units; + table->resource_valid = true; + return 0; +} +EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set); + /** * devlink_dpipe_table_resource_set - set the resource id * @@ -10773,25 +10869,18 @@ EXPORT_SYMBOL_GPL(devlink_resource_size_get); * @table_name: table name * @resource_id: resource id * @resource_units: number of resource's units consumed per table's entry + * + * Context: Takes and release devlink->lock . */ int devlink_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units) { - struct devlink_dpipe_table *table; - int err = 0; + int err; devl_lock(devlink); - table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name, devlink); - if (!table) { - err = -EINVAL; - goto out; - } - table->resource_id = resource_id; - table->resource_units = resource_units; - table->resource_valid = true; -out: + err = devl_dpipe_table_resource_set(devlink, table_name, + resource_id, resource_units); devl_unlock(devlink); return err; } -- cgit v1.2.3 From eb0e9fa2c6355e744d3ea7d07d34d89a4735320e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:39 +0200 Subject: net: devlink: add unlocked variants of devlink_region_create/destroy() functions Add unlocked variants of devlink_region_create/destroy() functions to be used in drivers called-in with devlink->lock held. Signed-off-by: Jiri Pirko Reviewed-by: Moshe Shemesh Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 5 +++ net/core/devlink.c | 89 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 66 insertions(+), 28 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 18ad88527847..391d401ddb55 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1676,6 +1676,10 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +struct devlink_region *devl_region_create(struct devlink *devlink, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, + u64 region_size); struct devlink_region * devlink_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, @@ -1684,6 +1688,7 @@ struct devlink_region * devlink_port_region_create(struct devlink_port *port, const struct devlink_port_region_ops *ops, u32 region_max_snapshots, u64 region_size); +void devl_region_destroy(struct devlink_region *region); void devlink_region_destroy(struct devlink_region *region); void devlink_port_region_destroy(struct devlink_region *region); diff --git a/net/core/devlink.c b/net/core/devlink.c index b249c18a8bbc..fe9657d6162f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -11192,36 +11192,31 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** - * devlink_region_create - create a new address region + * devl_region_create - create a new address region * - * @devlink: devlink - * @ops: region operations and name - * @region_max_snapshots: Maximum supported number of snapshots for region - * @region_size: size of region + * @devlink: devlink + * @ops: region operations and name + * @region_max_snapshots: Maximum supported number of snapshots for region + * @region_size: size of region */ -struct devlink_region * -devlink_region_create(struct devlink *devlink, - const struct devlink_region_ops *ops, - u32 region_max_snapshots, u64 region_size) +struct devlink_region *devl_region_create(struct devlink *devlink, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, + u64 region_size) { struct devlink_region *region; - int err = 0; + + devl_assert_locked(devlink); if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); - devl_lock(devlink); - - if (devlink_region_get_by_name(devlink, ops->name)) { - err = -EEXIST; - goto unlock; - } + if (devlink_region_get_by_name(devlink, ops->name)) + return ERR_PTR(-EEXIST); region = kzalloc(sizeof(*region), GFP_KERNEL); - if (!region) { - err = -ENOMEM; - goto unlock; - } + if (!region) + return ERR_PTR(-ENOMEM); region->devlink = devlink; region->max_snapshots = region_max_snapshots; @@ -11231,12 +11226,32 @@ devlink_region_create(struct devlink *devlink, list_add_tail(®ion->list, &devlink->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - devl_unlock(devlink); return region; +} +EXPORT_SYMBOL_GPL(devl_region_create); -unlock: +/** + * devlink_region_create - create a new address region + * + * @devlink: devlink + * @ops: region operations and name + * @region_max_snapshots: Maximum supported number of snapshots for region + * @region_size: size of region + * + * Context: Takes and release devlink->lock . + */ +struct devlink_region * +devlink_region_create(struct devlink *devlink, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, u64 region_size) +{ + struct devlink_region *region; + + devl_lock(devlink); + region = devl_region_create(devlink, ops, region_max_snapshots, + region_size); devl_unlock(devlink); - return ERR_PTR(err); + return region; } EXPORT_SYMBOL_GPL(devlink_region_create); @@ -11247,6 +11262,8 @@ EXPORT_SYMBOL_GPL(devlink_region_create); * @ops: region operations and name * @region_max_snapshots: Maximum supported number of snapshots for region * @region_size: size of region + * + * Context: Takes and release devlink->lock . */ struct devlink_region * devlink_port_region_create(struct devlink_port *port, @@ -11292,16 +11309,16 @@ unlock: EXPORT_SYMBOL_GPL(devlink_port_region_create); /** - * devlink_region_destroy - destroy address region + * devl_region_destroy - destroy address region * - * @region: devlink region to destroy + * @region: devlink region to destroy */ -void devlink_region_destroy(struct devlink_region *region) +void devl_region_destroy(struct devlink_region *region) { struct devlink *devlink = region->devlink; struct devlink_snapshot *snapshot, *ts; - devl_lock(devlink); + devl_assert_locked(devlink); /* Free all snapshots of region */ list_for_each_entry_safe(snapshot, ts, ®ion->snapshot_list, list) @@ -11310,9 +11327,25 @@ void devlink_region_destroy(struct devlink_region *region) list_del(®ion->list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); - devl_unlock(devlink); kfree(region); } +EXPORT_SYMBOL_GPL(devl_region_destroy); + +/** + * devlink_region_destroy - destroy address region + * + * @region: devlink region to destroy + * + * Context: Takes and release devlink->lock . + */ +void devlink_region_destroy(struct devlink_region *region) +{ + struct devlink *devlink = region->devlink; + + devl_lock(devlink); + devl_region_destroy(region); + devl_unlock(devlink); +} EXPORT_SYMBOL_GPL(devlink_region_destroy); /** -- cgit v1.2.3 From 012ec02ae4410207f796a9b280a60b80b6cc790a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:40 +0200 Subject: netdevsim: convert driver to use unlocked devlink API during init/fini Prepare for devlink reload being called with devlink->lock held and convert the netdevsim driver to use unlocked devlink API during init and fini flows. Take devl_lock() in reload_down() and reload_up() ops in the meantime before reload cmd is converted to take the lock itself. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bus.c | 19 ------ drivers/net/netdevsim/dev.c | 134 ++++++++++++++++++-------------------- drivers/net/netdevsim/fib.c | 62 +++++++++--------- drivers/net/netdevsim/netdevsim.h | 3 - include/net/devlink.h | 1 + net/core/devlink.c | 6 ++ 6 files changed, 102 insertions(+), 123 deletions(-) (limited to 'net/core') diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 25cb2e600d53..b5f4df1a07a3 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -72,16 +72,7 @@ new_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) - return -EBUSY; - - if (nsim_bus_dev->in_reload) { - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); - return -EBUSY; - } - ret = nsim_drv_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -102,16 +93,7 @@ del_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) - return -EBUSY; - - if (nsim_bus_dev->in_reload) { - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); - return -EBUSY; - } - ret = nsim_drv_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -298,7 +280,6 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queu nsim_bus_dev->num_queues = num_queues; nsim_bus_dev->initial_net = current->nsproxy->net_ns; nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; - mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); /* Disallow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, false); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 57a3ac893792..5802e80e8fe1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -436,62 +436,62 @@ static int nsim_dev_resources_register(struct devlink *devlink) int err; /* Resources for IPv4 */ - err = devlink_resource_register(devlink, "IPv4", (u64)-1, - NSIM_RESOURCE_IPV4, - DEVLINK_RESOURCE_ID_PARENT_TOP, - ¶ms); + err = devl_resource_register(devlink, "IPv4", (u64)-1, + NSIM_RESOURCE_IPV4, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); if (err) { pr_err("Failed to register IPv4 top resource\n"); goto out; } - err = devlink_resource_register(devlink, "fib", (u64)-1, - NSIM_RESOURCE_IPV4_FIB, - NSIM_RESOURCE_IPV4, ¶ms); + err = devl_resource_register(devlink, "fib", (u64)-1, + NSIM_RESOURCE_IPV4_FIB, + NSIM_RESOURCE_IPV4, ¶ms); if (err) { pr_err("Failed to register IPv4 FIB resource\n"); return err; } - err = devlink_resource_register(devlink, "fib-rules", (u64)-1, - NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV4, ¶ms); + err = devl_resource_register(devlink, "fib-rules", (u64)-1, + NSIM_RESOURCE_IPV4_FIB_RULES, + NSIM_RESOURCE_IPV4, ¶ms); if (err) { pr_err("Failed to register IPv4 FIB rules resource\n"); return err; } /* Resources for IPv6 */ - err = devlink_resource_register(devlink, "IPv6", (u64)-1, - NSIM_RESOURCE_IPV6, - DEVLINK_RESOURCE_ID_PARENT_TOP, - ¶ms); + err = devl_resource_register(devlink, "IPv6", (u64)-1, + NSIM_RESOURCE_IPV6, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); if (err) { pr_err("Failed to register IPv6 top resource\n"); goto out; } - err = devlink_resource_register(devlink, "fib", (u64)-1, - NSIM_RESOURCE_IPV6_FIB, - NSIM_RESOURCE_IPV6, ¶ms); + err = devl_resource_register(devlink, "fib", (u64)-1, + NSIM_RESOURCE_IPV6_FIB, + NSIM_RESOURCE_IPV6, ¶ms); if (err) { pr_err("Failed to register IPv6 FIB resource\n"); return err; } - err = devlink_resource_register(devlink, "fib-rules", (u64)-1, - NSIM_RESOURCE_IPV6_FIB_RULES, - NSIM_RESOURCE_IPV6, ¶ms); + err = devl_resource_register(devlink, "fib-rules", (u64)-1, + NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_IPV6, ¶ms); if (err) { pr_err("Failed to register IPv6 FIB rules resource\n"); return err; } /* Resources for nexthops */ - err = devlink_resource_register(devlink, "nexthops", (u64)-1, - NSIM_RESOURCE_NEXTHOPS, - DEVLINK_RESOURCE_ID_PARENT_TOP, - ¶ms); + err = devl_resource_register(devlink, "nexthops", (u64)-1, + NSIM_RESOURCE_NEXTHOPS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); out: return err; @@ -557,15 +557,15 @@ static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev, struct devlink *devlink) { nsim_dev->dummy_region = - devlink_region_create(devlink, &dummy_region_ops, - NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX, - NSIM_DEV_DUMMY_REGION_SIZE); + devl_region_create(devlink, &dummy_region_ops, + NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX, + NSIM_DEV_DUMMY_REGION_SIZE); return PTR_ERR_OR_ZERO(nsim_dev->dummy_region); } static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) { - devlink_region_destroy(nsim_dev->dummy_region); + devl_region_destroy(nsim_dev->dummy_region); } static int @@ -832,7 +832,11 @@ static void nsim_dev_trap_report_work(struct work_struct *work) /* For each running port and enabled packet trap, generate a UDP * packet with a random 5-tuple and report it. */ - devl_lock(priv_to_devlink(nsim_dev)); + if (!devl_trylock(priv_to_devlink(nsim_dev))) { + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0); + return; + } + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { if (!netif_running(nsim_dev_port->ns->netdev)) continue; @@ -880,18 +884,18 @@ static int nsim_dev_traps_init(struct devlink *devlink) nsim_trap_data->nsim_dev = nsim_dev; nsim_dev->trap_data = nsim_trap_data; - err = devlink_trap_policers_register(devlink, nsim_trap_policers_arr, - policers_count); + err = devl_trap_policers_register(devlink, nsim_trap_policers_arr, + policers_count); if (err) goto err_trap_policers_cnt_free; - err = devlink_trap_groups_register(devlink, nsim_trap_groups_arr, - ARRAY_SIZE(nsim_trap_groups_arr)); + err = devl_trap_groups_register(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); if (err) goto err_trap_policers_unregister; - err = devlink_traps_register(devlink, nsim_traps_arr, - ARRAY_SIZE(nsim_traps_arr), NULL); + err = devl_traps_register(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr), NULL); if (err) goto err_trap_groups_unregister; @@ -903,11 +907,11 @@ static int nsim_dev_traps_init(struct devlink *devlink) return 0; err_trap_groups_unregister: - devlink_trap_groups_unregister(devlink, nsim_trap_groups_arr, - ARRAY_SIZE(nsim_trap_groups_arr)); + devl_trap_groups_unregister(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); err_trap_policers_unregister: - devlink_trap_policers_unregister(devlink, nsim_trap_policers_arr, - ARRAY_SIZE(nsim_trap_policers_arr)); + devl_trap_policers_unregister(devlink, nsim_trap_policers_arr, + ARRAY_SIZE(nsim_trap_policers_arr)); err_trap_policers_cnt_free: kfree(nsim_trap_data->trap_policers_cnt_arr); err_trap_items_free: @@ -923,12 +927,12 @@ static void nsim_dev_traps_exit(struct devlink *devlink) /* caution, trap work takes devlink lock */ cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw); - devlink_traps_unregister(devlink, nsim_traps_arr, - ARRAY_SIZE(nsim_traps_arr)); - devlink_trap_groups_unregister(devlink, nsim_trap_groups_arr, - ARRAY_SIZE(nsim_trap_groups_arr)); - devlink_trap_policers_unregister(devlink, nsim_trap_policers_arr, - ARRAY_SIZE(nsim_trap_policers_arr)); + devl_traps_unregister(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr)); + devl_trap_groups_unregister(devlink, nsim_trap_groups_arr, + ARRAY_SIZE(nsim_trap_groups_arr)); + devl_trap_policers_unregister(devlink, nsim_trap_policers_arr, + ARRAY_SIZE(nsim_trap_policers_arr)); kfree(nsim_dev->trap_data->trap_policers_cnt_arr); kfree(nsim_dev->trap_data->trap_items_arr); kfree(nsim_dev->trap_data); @@ -943,24 +947,19 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); - struct nsim_bus_dev *nsim_bus_dev; - - nsim_bus_dev = nsim_dev->nsim_bus_dev; - if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) - return -EOPNOTSUPP; + devl_lock(devlink); if (nsim_dev->dont_allow_reload) { /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + devl_unlock(devlink); return -EOPNOTSUPP; } - nsim_bus_dev->in_reload = true; nsim_dev_reload_destroy(nsim_dev); - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + devl_unlock(devlink); return 0; } @@ -969,25 +968,21 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); - struct nsim_bus_dev *nsim_bus_dev; int ret; - nsim_bus_dev = nsim_dev->nsim_bus_dev; - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - nsim_bus_dev->in_reload = false; - + devl_lock(devlink); if (nsim_dev->fail_reload) { /* For testing purposes, user set debugfs fail_reload * value to true. Fail right away. */ NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + devl_unlock(devlink); return -EINVAL; } *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); ret = nsim_dev_reload_create(nsim_dev, extack); - mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + devl_unlock(devlink); return ret; } @@ -1434,11 +1429,9 @@ static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) { struct nsim_dev_port *nsim_dev_port, *tmp; - devl_lock(priv_to_devlink(nsim_dev)); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) __nsim_dev_port_del(nsim_dev_port); - devl_unlock(priv_to_devlink(nsim_dev)); } static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, @@ -1447,9 +1440,7 @@ static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, int i, err; for (i = 0; i < port_count; i++) { - devl_lock(priv_to_devlink(nsim_dev)); err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_PF, i); - devl_unlock(priv_to_devlink(nsim_dev)); if (err) goto err_port_del_all; } @@ -1537,6 +1528,7 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) nsim_bus_dev->initial_net, &nsim_bus_dev->dev); if (!devlink) return -ENOMEM; + devl_lock(devlink); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); @@ -1555,7 +1547,7 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) GFP_KERNEL | __GFP_NOWARN); if (!nsim_dev->vfconfigs) { err = -ENOMEM; - goto err_devlink_free; + goto err_devlink_unlock; } err = nsim_dev_resources_register(devlink); @@ -1608,6 +1600,7 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; devlink_set_features(devlink, DEVLINK_F_RELOAD); + devl_unlock(devlink); devlink_register(devlink); return 0; @@ -1631,10 +1624,11 @@ err_params_unregister: devlink_params_unregister(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: - devlink_resources_unregister(devlink); + devl_resources_unregister(devlink); err_vfc_free: kfree(nsim_dev->vfconfigs); -err_devlink_free: +err_devlink_unlock: + devl_unlock(devlink); devlink_free(devlink); dev_set_drvdata(&nsim_bus_dev->dev, NULL); return err; @@ -1648,13 +1642,11 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) return; debugfs_remove(nsim_dev->take_snapshot); - devl_lock(devlink); if (nsim_dev_get_vfs(nsim_dev)) { nsim_bus_dev_set_vfs(nsim_dev->nsim_bus_dev, 0); if (nsim_esw_mode_is_switchdev(nsim_dev)) nsim_esw_legacy_enable(nsim_dev, NULL); } - devl_unlock(devlink); nsim_dev_port_del_all(nsim_dev); nsim_dev_hwstats_exit(nsim_dev); @@ -1671,14 +1663,16 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) struct devlink *devlink = priv_to_devlink(nsim_dev); devlink_unregister(devlink); + devl_lock(devlink); nsim_dev_reload_destroy(nsim_dev); nsim_bpf_dev_exit(nsim_dev); nsim_dev_debugfs_exit(nsim_dev); devlink_params_unregister(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); - devlink_resources_unregister(devlink); + devl_resources_unregister(devlink); kfree(nsim_dev->vfconfigs); + devl_unlock(devlink); devlink_free(devlink); dev_set_drvdata(&nsim_bus_dev->dev, NULL); } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index c8f398f5bc5b..94e7512bef94 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -1453,7 +1453,7 @@ static void nsim_fib_set_max_all(struct nsim_fib_data *data, int err; u64 val; - err = devlink_resource_size_get(devlink, res_ids[i], &val); + err = devl_resource_size_get(devlink, res_ids[i], &val); if (err) val = (u64) -1; nsim_fib_set_max(data, res_ids[i], val); @@ -1562,26 +1562,26 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, goto err_nexthop_nb_unregister; } - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB, - nsim_fib_ipv4_resource_occ_get, - data); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB_RULES, - nsim_fib_ipv4_rules_res_occ_get, - data); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB, - nsim_fib_ipv6_resource_occ_get, - data); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB_RULES, - nsim_fib_ipv6_rules_res_occ_get, - data); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_NEXTHOPS, - nsim_fib_nexthops_res_occ_get, - data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB, + nsim_fib_ipv4_resource_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES, + nsim_fib_ipv4_rules_res_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB, + nsim_fib_ipv6_resource_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES, + nsim_fib_ipv6_rules_res_occ_get, + data); + devl_resource_occ_get_register(devlink, + NSIM_RESOURCE_NEXTHOPS, + nsim_fib_nexthops_res_occ_get, + data); return data; err_nexthop_nb_unregister: @@ -1604,16 +1604,16 @@ err_data_free: void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) { - devlink_resource_occ_get_unregister(devlink, - NSIM_RESOURCE_NEXTHOPS); - devlink_resource_occ_get_unregister(devlink, - NSIM_RESOURCE_IPV6_FIB_RULES); - devlink_resource_occ_get_unregister(devlink, - NSIM_RESOURCE_IPV6_FIB); - devlink_resource_occ_get_unregister(devlink, - NSIM_RESOURCE_IPV4_FIB_RULES); - devlink_resource_occ_get_unregister(devlink, - NSIM_RESOURCE_IPV4_FIB); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_NEXTHOPS); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB_RULES); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV6_FIB); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB_RULES); + devl_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); flush_work(&data->fib_event_work); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 0b122872b2c9..7d8ed8d8df5c 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -376,9 +376,6 @@ struct nsim_bus_dev { */ unsigned int max_vfs; unsigned int num_vfs; - /* Lock for devlink->reload_enabled in netdevsim module */ - struct mutex nsim_bus_reload_lock; - bool in_reload; bool init; }; diff --git a/include/net/devlink.h b/include/net/devlink.h index 391d401ddb55..242798967a44 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1517,6 +1517,7 @@ struct device *devlink_to_dev(const struct devlink *devlink); /* Devlink instance explicit locking */ void devl_lock(struct devlink *devlink); +int devl_trylock(struct devlink *devlink); void devl_unlock(struct devlink *devlink); void devl_assert_locked(struct devlink *devlink); bool devl_lock_is_held(struct devlink *devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index fe9657d6162f..1c75de6f6388 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -266,6 +266,12 @@ void devl_lock(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devl_lock); +int devl_trylock(struct devlink *devlink) +{ + return mutex_trylock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_trylock); + void devl_unlock(struct devlink *devlink) { mutex_unlock(&devlink->lock); -- cgit v1.2.3 From f655dacb59ac987a56b82d2e73d85de411eb02aa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Jul 2022 13:02:41 +0200 Subject: net: devlink: remove unused locked functions Remove locked versions of functions that are no longer used by anyone. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 20 ------ net/core/devlink.c | 168 -------------------------------------------------- 2 files changed, 188 deletions(-) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 242798967a44..780744b550b8 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1594,20 +1594,11 @@ int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern); -int devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern); void devl_dpipe_table_unregister(struct devlink *devlink, const char *table_name); -void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name); void devl_dpipe_headers_register(struct devlink *devlink, struct devlink_dpipe_headers *dpipe_headers); -void devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers *dpipe_headers); void devl_dpipe_headers_unregister(struct devlink *devlink); -void devlink_dpipe_headers_unregister(struct devlink *devlink); bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, const char *table_name); int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); @@ -1640,9 +1631,6 @@ void devlink_resources_unregister(struct devlink *devlink); int devl_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); -int devlink_resource_size_get(struct devlink *devlink, - u64 resource_id, - u64 *p_resource_size); int devl_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units); @@ -1817,18 +1805,10 @@ int devl_trap_policers_register(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count); -int -devlink_trap_policers_register(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count); void devl_trap_policers_unregister(struct devlink *devlink, const struct devlink_trap_policer *policers, size_t policers_count); -void -devlink_trap_policers_unregister(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count); #if IS_ENABLED(CONFIG_NET_DEVLINK) diff --git a/net/core/devlink.c b/net/core/devlink.c index 1c75de6f6388..98d79feeb3dc 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10461,25 +10461,6 @@ void devl_dpipe_headers_register(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_dpipe_headers_register); -/** - * devlink_dpipe_headers_register - register dpipe headers - * - * @devlink: devlink - * @dpipe_headers: dpipe header array - * - * Register the headers supported by hardware. - * - * Context: Takes and release devlink->lock . - */ -void devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers *dpipe_headers) -{ - devl_lock(devlink); - devl_dpipe_headers_register(devlink, dpipe_headers); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); - /** * devl_dpipe_headers_unregister - unregister dpipe headers * @@ -10495,23 +10476,6 @@ void devl_dpipe_headers_unregister(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister); -/** - * devlink_dpipe_headers_unregister - unregister dpipe headers - * - * @devlink: devlink - * - * Unregister the headers supported by hardware. - * - * Context: Takes and release devlink->lock . - */ -void devlink_dpipe_headers_unregister(struct devlink *devlink) -{ - devl_lock(devlink); - devl_dpipe_headers_unregister(devlink); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); - /** * devlink_dpipe_table_counter_enabled - check if counter allocation * required @@ -10583,32 +10547,6 @@ int devl_dpipe_table_register(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_dpipe_table_register); -/** - * devlink_dpipe_table_register - register dpipe table - * - * @devlink: devlink - * @table_name: table name - * @table_ops: table ops - * @priv: priv - * @counter_control_extern: external control for counters - * - * Context: Takes and release devlink->lock . - */ -int devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) -{ - int err; - - devl_lock(devlink); - err = devl_dpipe_table_register(devlink, table_name, table_ops, priv, - counter_control_extern); - devl_unlock(devlink); - return err; -} -EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); - /** * devl_dpipe_table_unregister - unregister dpipe table * @@ -10631,23 +10569,6 @@ void devl_dpipe_table_unregister(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister); -/** - * devlink_dpipe_table_unregister - unregister dpipe table - * - * @devlink: devlink - * @table_name: table name - * - * Context: Takes and release devlink->lock . - */ -void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) -{ - devl_lock(devlink); - devl_dpipe_table_unregister(devlink, table_name); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); - /** * devl_resource_register - devlink resource register * @@ -10820,28 +10741,6 @@ int devl_resource_size_get(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_resource_size_get); -/** - * devlink_resource_size_get - get and update size - * - * @devlink: devlink - * @resource_id: the requested resource id - * @p_resource_size: ptr to update - * - * Context: Takes and release devlink->lock . - */ -int devlink_resource_size_get(struct devlink *devlink, - u64 resource_id, - u64 *p_resource_size) -{ - int err; - - devl_lock(devlink); - err = devl_resource_size_get(devlink, resource_id, p_resource_size); - devl_unlock(devlink); - return err; -} -EXPORT_SYMBOL_GPL(devlink_resource_size_get); - /** * devl_dpipe_table_resource_set - set the resource id * @@ -10868,30 +10767,6 @@ int devl_dpipe_table_resource_set(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set); -/** - * devlink_dpipe_table_resource_set - set the resource id - * - * @devlink: devlink - * @table_name: table name - * @resource_id: resource id - * @resource_units: number of resource's units consumed per table's entry - * - * Context: Takes and release devlink->lock . - */ -int devlink_dpipe_table_resource_set(struct devlink *devlink, - const char *table_name, u64 resource_id, - u64 resource_units) -{ - int err; - - devl_lock(devlink); - err = devl_dpipe_table_resource_set(devlink, table_name, - resource_id, resource_units); - devl_unlock(devlink); - return err; -} -EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); - /** * devl_resource_occ_get_register - register occupancy getter * @@ -12262,30 +12137,6 @@ err_trap_policer_verify: } EXPORT_SYMBOL_GPL(devl_trap_policers_register); -/** - * devlink_trap_policers_register - Register packet trap policers with devlink. - * @devlink: devlink. - * @policers: Packet trap policers. - * @policers_count: Count of provided packet trap policers. - * - * Return: Non-zero value on failure. - * - * Context: Takes and release devlink->lock . - */ -int -devlink_trap_policers_register(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count) -{ - int err; - - devl_lock(devlink); - err = devl_trap_policers_register(devlink, policers, policers_count); - devl_unlock(devlink); - return err; -} -EXPORT_SYMBOL_GPL(devlink_trap_policers_register); - /** * devl_trap_policers_unregister - Unregister packet trap policers from devlink. * @devlink: devlink. @@ -12305,25 +12156,6 @@ devl_trap_policers_unregister(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_trap_policers_unregister); -/** - * devlink_trap_policers_unregister - Unregister packet trap policers from devlink. - * @devlink: devlink. - * @policers: Packet trap policers. - * @policers_count: Count of provided packet trap policers. - * - * Context: Takes and release devlink->lock . - */ -void -devlink_trap_policers_unregister(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count) -{ - devl_lock(devlink); - devl_trap_policers_unregister(devlink, policers, policers_count); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister); - static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { -- cgit v1.2.3 From fd1894224407c484f652ad456e1ce423e89bb3eb Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 15 Jul 2022 19:55:59 +0800 Subject: bpf: Don't redirect packets with invalid pkt_len Syzbot found an issue [1]: fq_codel_drop() try to drop a flow whitout any skbs, that is, the flow->head is null. The root cause, as the [2] says, is because that bpf_prog_test_run_skb() run a bpf prog which redirects empty skbs. So we should determine whether the length of the packet modified by bpf prog or others like bpf_prog_test is valid before forwarding it directly. LINK: [1] https://syzkaller.appspot.com/bug?id=0b84da80c2917757915afa89f7738a9d16ec96c5 LINK: [2] https://www.spinics.net/lists/netdev/msg777503.html Reported-by: syzbot+7a12909485b94426aceb@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220715115559.139691-1-shaozhengchao@huawei.com Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 8 ++++++++ net/bpf/test_run.c | 3 +++ net/core/dev.c | 1 + 3 files changed, 12 insertions(+) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f6a27ab19202..82e8368ba6e6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2459,6 +2459,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_assert_len(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + if (WARN_ONCE(!skb->len, "%s\n", __func__)) + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); +#endif /* CONFIG_DEBUG_NET */ +} + /* * Add data to an sk_buff */ diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2ca96acbc50a..dc9dc0bedca0 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -955,6 +955,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + if (!skb->len) + return -EINVAL; + if (!__skb) return 0; diff --git a/net/core/dev.c b/net/core/dev.c index 978ed0622d8f..e241a475036f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4168,6 +4168,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) bool again = false; skb_reset_mac_header(skb); + skb_assert_len(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); -- cgit v1.2.3 From b8fff748521c7178b9a7d32b5a34a81cec8396f3 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:32:34 +0200 Subject: bpf: Set flow flag to allow any source IP in bpf_tunnel_key Commit 26101f5ab6bd ("bpf: Add source ip in "struct bpf_tunnel_key"") added support for getting and setting the outer source IP of encapsulated packets via the bpf_skb_{get,set}_tunnel_key BPF helper. This change allows BPF programs to set any IP address as the source, including for example the IP address of a container running on the same host. In that last case, however, the encapsulated packets are dropped when looking up the route because the source IP address isn't assigned to any interface on the host. To avoid this, we need to set the FLOWI_FLAG_ANYSRC flag. Fixes: 26101f5ab6bd ("bpf: Add source ip in "struct bpf_tunnel_key"") Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/76873d384e21288abe5767551a0799ac93ec07fb.1658759380.git.paul@isovalent.com --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 57c5e4c4efd2..dffc7dcda96a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4653,6 +4653,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); + info->key.flow_flags = FLOWI_FLAG_ANYSRC; } return 0; -- cgit v1.2.3 From bbd52178e249fe893ef4a9b87cde5b6c473b0a7c Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 22 Jul 2022 15:01:05 -0700 Subject: bpf: Fix bpf_xdp_pointer return pointer For the case where offset + len == size, bpf_xdp_pointer should return a valid pointer to the addr because that access is permitted. We should only return NULL in the case where offset + len exceeds size. Fixes: 3f364222d032 ("net: xdp: introduce bpf_xdp_pointer utility routine") Signed-off-by: Joanne Koong Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/bpf/20220722220105.2065466-1-joannelkoong@gmail.com --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index dffc7dcda96a..5669248aff25 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3918,7 +3918,7 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) offset -= frag_size; } out: - return offset + len < size ? addr + offset : NULL; + return offset + len <= size ? addr + offset : NULL; } BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, -- cgit v1.2.3 From 46126db9c86110e5fc1e369b9bb89735ddefdae4 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Mon, 18 Jul 2022 14:18:10 +0200 Subject: flow_dissector: Add PPPoE dissectors Allow to dissect PPPoE specific fields which are: - session ID (16 bits) - ppp protocol (16 bits) - type (16 bits) - this is PPPoE ethertype, for now only ETH_P_PPP_SES is supported, possible ETH_P_PPP_DISC in the future The goal is to make the following TC command possible: # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \ flower \ pppoe_sid 12 \ ppp_proto ip \ action drop Note that only PPPoE Session is supported. Signed-off-by: Wojciech Drewek Acked-by: Guillaume Nault Signed-off-by: Tony Nguyen --- include/linux/ppp_defs.h | 14 ++++++++++++ include/net/flow_dissector.h | 13 +++++++++++ net/core/flow_dissector.c | 53 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 73 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h index 9d2b388fae1a..b7e57fdbd413 100644 --- a/include/linux/ppp_defs.h +++ b/include/linux/ppp_defs.h @@ -11,4 +11,18 @@ #include #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c) + +/** + * ppp_proto_is_valid - checks if PPP protocol is valid + * @proto: PPP protocol + * + * Assumes proto is not compressed. + * Protocol is valid if the value is odd and the least significant bit of the + * most significant octet is 0 (see RFC 1661, section 2). + */ +static inline bool ppp_proto_is_valid(u16 proto) +{ + return !!((proto & 0x0101) == 0x0001); +} + #endif /* _PPP_DEFS_H_ */ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 0f9544a9bb9e..6c74812d64b2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -277,6 +277,18 @@ struct flow_dissector_key_num_of_vlans { u8 num_of_vlans; }; +/** + * struct flow_dissector_key_pppoe: + * @session_id: pppoe session id + * @ppp_proto: ppp protocol + * @type: pppoe eth type + */ +struct flow_dissector_key_pppoe { + __be16 session_id; + __be16 ppp_proto; + __be16 type; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -307,6 +319,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ + FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6aee04f75e3e..237d396b6e41 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -895,6 +895,11 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, return result == BPF_OK; } +static bool is_pppoe_ses_hdr_valid(struct pppoe_hdr hdr) +{ + return hdr.ver == 1 && hdr.type == 1 && hdr.code == 0; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @net: associated network namespace, derived from @skb if NULL @@ -1214,26 +1219,60 @@ proto_again: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; + u16 ppp_proto; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } - nhoff += PPPOE_SES_HLEN; - switch (hdr->proto) { - case htons(PPP_IP): + if (!is_pppoe_ses_hdr_valid(hdr->hdr)) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + /* least significant bit of the most significant octet + * indicates if protocol field was compressed + */ + ppp_proto = ntohs(hdr->proto); + if (ppp_proto & 0x0100) { + ppp_proto = ppp_proto >> 8; + nhoff += PPPOE_SES_HLEN - 1; + } else { + nhoff += PPPOE_SES_HLEN; + } + + if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - case htons(PPP_IPV6): + } else if (ppp_proto == PPP_IPV6) { proto = htons(ETH_P_IPV6); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - default: + } else if (ppp_proto == PPP_MPLS_UC) { + proto = htons(ETH_P_MPLS_UC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto == PPP_MPLS_MC) { + proto = htons(ETH_P_MPLS_MC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto_is_valid(ppp_proto)) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + } else { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE)) { + struct flow_dissector_key_pppoe *key_pppoe; + + key_pppoe = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE, + target_container); + key_pppoe->session_id = hdr->hdr.sid; + key_pppoe->ppp_proto = htons(ppp_proto); + key_pppoe->type = htons(ETH_P_PPP_SES); + } break; } case htons(ETH_P_TIPC): { -- cgit v1.2.3 From 6a21b0856daaf9b7f63225f5b449ddee170c6f0a Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Mon, 18 Jul 2022 14:18:12 +0200 Subject: flow_offload: Introduce flow_match_pppoe Allow to offload PPPoE filters by adding flow_rule_match_pppoe. Drivers can extract PPPoE specific fields from now on. Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/net/flow_offload.h | 6 ++++++ net/core/flow_offload.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'net/core') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index a8d8512b7059..2a9a9e42e7fd 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -76,6 +76,10 @@ struct flow_match_ct { struct flow_dissector_key_ct *key, *mask; }; +struct flow_match_pppoe { + struct flow_dissector_key_pppoe *key, *mask; +}; + struct flow_rule; void flow_rule_match_meta(const struct flow_rule *rule, @@ -122,6 +126,8 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out); void flow_rule_match_ct(const struct flow_rule *rule, struct flow_match_ct *out); +void flow_rule_match_pppoe(const struct flow_rule *rule, + struct flow_match_pppoe *out); enum flow_action_id { FLOW_ACTION_ACCEPT = 0, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 0d3075d3c8fb..8cfb63528d18 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -230,6 +230,13 @@ void flow_rule_match_ct(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_ct); +void flow_rule_match_pppoe(const struct flow_rule *rule, + struct flow_match_pppoe *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PPPOE, out); +} +EXPORT_SYMBOL(flow_rule_match_pppoe); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) -- cgit v1.2.3 From 30bab7cdb56da4819ff081ad658646f2df16c098 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jul 2022 10:29:14 +0200 Subject: net: devlink: make sure that devlink_try_get() works with valid pointer during xarray iteration Remove dependency on devlink_mutex during devlinks xarray iteration. The reason is that devlink_register/unregister() functions taking devlink_mutex would deadlock during devlink reload operation of devlink instance which registers/unregisters nested devlink instances. The devlinks xarray consistency is ensured internally by xarray. There is a reference taken when working with devlink using devlink_try_get(). But there is no guarantee that devlink pointer picked during xarray iteration is not freed before devlink_try_get() is called. Make sure that devlink_try_get() works with valid pointer. Achieve it by: 1) Splitting devlink_put() so the completion is sent only after grace period. Completion unblocks the devlink_unregister() routine, which is followed-up by devlink_free() 2) During devlinks xa_array iteration, get devlink pointer from xa_array holding RCU read lock and taking reference using devlink_try_get() before unlock. Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 171 +++++++++++++++++++++++++---------------------------- 1 file changed, 80 insertions(+), 91 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 98d79feeb3dc..c7abd928f389 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -70,6 +70,7 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct completion comp; + struct rcu_head rcu; char priv[] __aligned(NETDEV_ALIGN); }; @@ -221,8 +222,6 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); /* devlink_mutex * * An overall lock guarding every operation coming from userspace. - * It also guards devlink devices list and it is taken when - * driver registers/unregisters it. */ static DEFINE_MUTEX(devlink_mutex); @@ -232,10 +231,21 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); +static void __devlink_put_rcu(struct rcu_head *head) +{ + struct devlink *devlink = container_of(head, struct devlink, rcu); + + complete(&devlink->comp); +} + void devlink_put(struct devlink *devlink) { if (refcount_dec_and_test(&devlink->refcount)) - complete(&devlink->comp); + /* Make sure unregister operation that may await the completion + * is unblocked only after all users are after the end of + * RCU grace period. + */ + call_rcu(&devlink->rcu, __devlink_put_rcu); } struct devlink *__must_check devlink_try_get(struct devlink *devlink) @@ -278,12 +288,55 @@ void devl_unlock(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devl_unlock); +static struct devlink * +devlinks_xa_find_get(unsigned long *indexp, xa_mark_t filter, + void * (*xa_find_fn)(struct xarray *, unsigned long *, + unsigned long, xa_mark_t)) +{ + struct devlink *devlink; + + rcu_read_lock(); +retry: + devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); + if (!devlink) + goto unlock; + /* For a possible retry, the xa_find_after() should be always used */ + xa_find_fn = xa_find_after; + if (!devlink_try_get(devlink)) + goto retry; +unlock: + rcu_read_unlock(); + return devlink; +} + +static struct devlink *devlinks_xa_find_get_first(unsigned long *indexp, + xa_mark_t filter) +{ + return devlinks_xa_find_get(indexp, filter, xa_find); +} + +static struct devlink *devlinks_xa_find_get_next(unsigned long *indexp, + xa_mark_t filter) +{ + return devlinks_xa_find_get(indexp, filter, xa_find_after); +} + +/* Iterate over devlink pointers which were possible to get reference to. + * devlink_put() needs to be called for each iterated devlink pointer + * in loop body in order to release the reference. + */ +#define devlinks_xa_for_each_get(index, devlink, filter) \ + for (index = 0, devlink = devlinks_xa_find_get_first(&index, filter); \ + devlink; devlink = devlinks_xa_find_get_next(&index, filter)) + +#define devlinks_xa_for_each_registered_get(index, devlink) \ + devlinks_xa_for_each_get(index, devlink, DEVLINK_REGISTERED) + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; unsigned long index; - bool found = false; char *busname; char *devname; @@ -293,21 +346,15 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); - lockdep_assert_held(&devlink_mutex); - - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + devlinks_xa_for_each_registered_get(index, devlink) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) { - found = true; - break; - } + net_eq(devlink_net(devlink), net)) + return devlink; + devlink_put(devlink); } - if (!found || !devlink_try_get(devlink)) - devlink = ERR_PTR(-ENODEV); - - return devlink; + return ERR_PTR(-ENODEV); } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, @@ -1329,10 +1376,7 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -1432,10 +1476,7 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { devlink_put(devlink); continue; @@ -1495,10 +1536,7 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -2177,10 +2215,7 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -2449,10 +2484,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -2601,10 +2633,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_pool_get) goto retry; @@ -2822,10 +2851,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_port_pool_get) goto retry; @@ -3071,10 +3097,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_tc_pool_bind_get) goto retry; @@ -5158,10 +5181,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -5393,10 +5413,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -5977,10 +5994,7 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -6511,10 +6525,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -7691,10 +7702,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry_rep; @@ -7721,10 +7729,7 @@ retry_rep: devlink_put(devlink); } - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry_port; @@ -8291,10 +8296,7 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -8518,10 +8520,7 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -8832,10 +8831,7 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) goto retry; @@ -9589,10 +9585,8 @@ void devlink_register(struct devlink *devlink) ASSERT_DEVLINK_NOT_REGISTERED(devlink); /* Make sure that we are in .probe() routine */ - mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); - mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_register); @@ -9609,10 +9603,8 @@ void devlink_unregister(struct devlink *devlink) devlink_put(devlink); wait_for_completion(&devlink->comp); - mutex_lock(&devlink_mutex); devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -12281,10 +12273,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) * all devlink instances from this namespace into init_net. */ mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - + devlinks_xa_for_each_registered_get(index, devlink) { if (!net_eq(devlink_net(devlink), net)) goto retry; -- cgit v1.2.3 From 294c4f57cfe3303ee2f050d1728c76a401e573a7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jul 2022 10:29:15 +0200 Subject: net: devlink: move net check into devlinks_xa_for_each_registered_get() Benefit from having devlinks iterator helper devlinks_xa_for_each_registered_get() and move the net pointer check inside. Suggested-by: Jakub Kicinski Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 135 ++++++++++++++++------------------------------------- 1 file changed, 39 insertions(+), 96 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index c7abd928f389..865232a1455f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -289,7 +289,7 @@ void devl_unlock(struct devlink *devlink) EXPORT_SYMBOL_GPL(devl_unlock); static struct devlink * -devlinks_xa_find_get(unsigned long *indexp, xa_mark_t filter, +devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter, void * (*xa_find_fn)(struct xarray *, unsigned long *, unsigned long, xa_mark_t)) { @@ -304,33 +304,40 @@ retry: xa_find_fn = xa_find_after; if (!devlink_try_get(devlink)) goto retry; + if (!net_eq(devlink_net(devlink), net)) { + devlink_put(devlink); + goto retry; + } unlock: rcu_read_unlock(); return devlink; } -static struct devlink *devlinks_xa_find_get_first(unsigned long *indexp, +static struct devlink *devlinks_xa_find_get_first(struct net *net, + unsigned long *indexp, xa_mark_t filter) { - return devlinks_xa_find_get(indexp, filter, xa_find); + return devlinks_xa_find_get(net, indexp, filter, xa_find); } -static struct devlink *devlinks_xa_find_get_next(unsigned long *indexp, +static struct devlink *devlinks_xa_find_get_next(struct net *net, + unsigned long *indexp, xa_mark_t filter) { - return devlinks_xa_find_get(indexp, filter, xa_find_after); + return devlinks_xa_find_get(net, indexp, filter, xa_find_after); } /* Iterate over devlink pointers which were possible to get reference to. * devlink_put() needs to be called for each iterated devlink pointer * in loop body in order to release the reference. */ -#define devlinks_xa_for_each_get(index, devlink, filter) \ - for (index = 0, devlink = devlinks_xa_find_get_first(&index, filter); \ - devlink; devlink = devlinks_xa_find_get_next(&index, filter)) +#define devlinks_xa_for_each_get(net, index, devlink, filter) \ + for (index = 0, \ + devlink = devlinks_xa_find_get_first(net, &index, filter); \ + devlink; devlink = devlinks_xa_find_get_next(net, &index, filter)) -#define devlinks_xa_for_each_registered_get(index, devlink) \ - devlinks_xa_for_each_get(index, devlink, DEVLINK_REGISTERED) +#define devlinks_xa_for_each_registered_get(net, index, devlink) \ + devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED) static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) @@ -346,10 +353,9 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); - devlinks_xa_for_each_registered_get(index, devlink) { + devlinks_xa_for_each_registered_get(net, index, devlink) { if (strcmp(devlink->dev->bus->name, busname) == 0 && - strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) + strcmp(dev_name(devlink->dev), devname) == 0) return devlink; devlink_put(devlink); } @@ -1376,10 +1382,7 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; @@ -1400,7 +1403,6 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -1476,12 +1478,7 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { - devlink_put(devlink); - continue; - } - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start) { idx++; devlink_put(devlink); @@ -1536,10 +1533,7 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { @@ -1559,7 +1553,6 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -2215,10 +2208,7 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->linecards_lock); list_for_each_entry(linecard, &devlink->linecard_list, list) { if (idx < start) { @@ -2241,7 +2231,6 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, idx++; } mutex_unlock(&devlink->linecards_lock); -retry: devlink_put(devlink); } out: @@ -2484,10 +2473,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { @@ -2507,7 +2493,6 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -2633,7 +2618,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_pool_get) goto retry; @@ -2851,9 +2836,8 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_port_pool_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_port_pool_get) goto retry; devl_lock(devlink); @@ -3097,9 +3081,8 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_tc_pool_bind_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_tc_pool_bind_get) goto retry; devl_lock(devlink); @@ -5181,10 +5164,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { @@ -5206,7 +5186,6 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -5413,10 +5392,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, @@ -5443,7 +5419,6 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, } } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -5994,13 +5969,9 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); -retry: devlink_put(devlink); if (err) goto out; @@ -6525,10 +6496,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int err = 0; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start || !devlink->ops->info_get) goto inc; @@ -6546,7 +6514,6 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, } inc: idx++; -retry: devlink_put(devlink); } mutex_unlock(&devlink_mutex); @@ -7702,10 +7669,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry_rep; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { @@ -7725,14 +7689,10 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; } mutex_unlock(&devlink->reporters_lock); -retry_rep: devlink_put(devlink); } - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry_port; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); @@ -7757,7 +7717,6 @@ retry_rep: mutex_unlock(&port->reporters_lock); } devl_unlock(devlink); -retry_port: devlink_put(devlink); } out: @@ -8296,10 +8255,7 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { @@ -8319,7 +8275,6 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -8520,10 +8475,7 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(group_item, &devlink->trap_group_list, list) { @@ -8544,7 +8496,6 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -8831,10 +8782,7 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { @@ -8855,7 +8803,6 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, idx++; } devl_unlock(devlink); -retry: devlink_put(devlink); } out: @@ -12273,10 +12220,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) * all devlink instances from this namespace into init_net. */ mutex_lock(&devlink_mutex); - devlinks_xa_for_each_registered_get(index, devlink) { - if (!net_eq(devlink_net(devlink), net)) - goto retry; - + devlinks_xa_for_each_registered_get(net, index, devlink) { WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, @@ -12284,7 +12228,6 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) &actions_performed, NULL); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); -retry: devlink_put(devlink); } mutex_unlock(&devlink_mutex); -- cgit v1.2.3 From 7b2d9a1a50ec3bedf067fe234a4a71196c89e826 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jul 2022 10:29:16 +0200 Subject: net: devlink: introduce nested devlink entity for line card For the purpose of exposing device info and allow flash update which is going to be implemented in follow-up patches, introduce a possibility for a line card to expose relation to nested devlink entity. The nested devlink entity represents the line card. Example: $ devlink lc show pci/0000:01:00.0 lc 1 pci/0000:01:00.0: lc 1 state active type 16x100G nested_devlink auxiliary/mlxsw_core.lc.0 supported_types: 16x100G $ devlink dev show auxiliary/mlxsw_core.lc.0 auxiliary/mlxsw_core.lc.0 Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 2 ++ include/uapi/linux/devlink.h | 2 ++ net/core/devlink.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) (limited to 'net/core') diff --git a/include/net/devlink.h b/include/net/devlink.h index 780744b550b8..5bd3fac12e9e 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1580,6 +1580,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); +void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3d40a5d72ff..541321695f52 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -576,6 +576,8 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ + DEVLINK_ATTR_NESTED_DEVLINK, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 865232a1455f..698b2d6e0ec7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -89,6 +89,7 @@ struct devlink_linecard { const char *type; struct devlink_linecard_type *types; unsigned int types_count; + struct devlink *nested_devlink; }; /** @@ -856,6 +857,24 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink) +{ + struct nlattr *nested_attr; + + nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); + if (!nested_attr) + return -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + nla_nest_end(msg, nested_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nested_attr); + return -EMSGSIZE; +} + struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; @@ -2135,6 +2154,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } + if (linecard->nested_devlink && + devlink_nl_put_nested_handle(msg, linecard->nested_devlink)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -10255,6 +10278,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); @@ -10273,6 +10297,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); void devlink_linecard_provision_fail(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); mutex_unlock(&linecard->state_lock); @@ -10320,6 +10345,23 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); +/** + * devlink_linecard_nested_dl_set - Attach/detach nested devlink + * instance to linecard. + * + * @linecard: devlink linecard + * @nested_devlink: devlink instance to attach or NULL to detach + */ +void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink) +{ + mutex_lock(&linecard->state_lock); + linecard->nested_devlink = nested_devlink; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set); + int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, -- cgit v1.2.3 From 2bb88b2c4f7334bd91c734f3983492a133250edb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 27 Jul 2022 07:59:12 +0200 Subject: net: devlink: remove redundant net_eq() check from sb_pool_get_dumpit() The net_eq() check is already performed inside devlinks_xa_for_each_registered_get() helper, so remove the redundant appearance. Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20220727055912.568391-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 698b2d6e0ec7..ca4c9939d569 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2642,8 +2642,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_pool_get) + if (!devlink->ops->sb_pool_get) goto retry; devl_lock(devlink); -- cgit v1.2.3 From 08f588fa301bef264576fc915da6bf31b585a824 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Wed, 27 Jul 2022 22:27:20 +0530 Subject: devlink: introduce framework for selftests Add a framework for running selftests. Framework exposes devlink commands and test suite(s) to the user to execute and query the supported tests by the driver. Below are new entries in devlink_nl_ops devlink_nl_cmd_selftests_show_doit/dumpit: To query the supported selftests by the drivers. devlink_nl_cmd_selftests_run: To execute selftests. Users can provide a test mask for executing group tests or standalone tests. Documentation/networking/devlink/ path is already part of MAINTAINERS & the new files come under this path. Hence no update needed to the MAINTAINERS Signed-off-by: Vikas Gupta Reviewed-by: Andy Gospodarek Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-selftests.rst | 38 ++++ include/net/devlink.h | 21 ++ include/uapi/linux/devlink.h | 29 +++ net/core/devlink.c | 216 +++++++++++++++++++++ 4 files changed, 304 insertions(+) create mode 100644 Documentation/networking/devlink/devlink-selftests.rst (limited to 'net/core') diff --git a/Documentation/networking/devlink/devlink-selftests.rst b/Documentation/networking/devlink/devlink-selftests.rst new file mode 100644 index 000000000000..c0aa1f3aef0d --- /dev/null +++ b/Documentation/networking/devlink/devlink-selftests.rst @@ -0,0 +1,38 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +================= +Devlink Selftests +================= + +The ``devlink-selftests`` API allows executing selftests on the device. + +Tests Mask +========== +The ``devlink-selftests`` command should be run with a mask indicating +the tests to be executed. + +Tests Description +================= +The following is a list of tests that drivers may execute. + +.. list-table:: List of tests + :widths: 5 90 + + * - Name + - Description + * - ``DEVLINK_SELFTEST_FLASH`` + - Devices may have the firmware on non-volatile memory on the board, e.g. + flash. This particular test helps to run a flash selftest on the device. + Implementation of the test is left to the driver/firmware. + +example usage +------------- + +.. code:: shell + + # Query selftests supported on the devlink device + $ devlink dev selftests show DEV + # Query selftests supported on all devlink devices + $ devlink dev selftests show + # Executes selftests on the device + $ devlink dev selftests run DEV id flash diff --git a/include/net/devlink.h b/include/net/devlink.h index 5bd3fac12e9e..119ed1ffb988 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1509,6 +1509,27 @@ struct devlink_ops { struct devlink_rate *parent, void *priv_child, void *priv_parent, struct netlink_ext_ack *extack); + /** + * selftests_check() - queries if selftest is supported + * @devlink: devlink instance + * @id: test index + * @extack: extack for reporting error messages + * + * Return: true if test is supported by the driver + */ + bool (*selftest_check)(struct devlink *devlink, unsigned int id, + struct netlink_ext_ack *extack); + /** + * selftest_run() - Runs a selftest + * @devlink: devlink instance + * @id: test index + * @extack: extack for reporting error messages + * + * Return: status of the test + */ + enum devlink_selftest_status + (*selftest_run)(struct devlink *devlink, unsigned int id, + struct netlink_ext_ack *extack); }; void *devlink_priv(struct devlink *devlink); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 541321695f52..2f24b53a87a5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,6 +136,9 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, + DEVLINK_CMD_SELFTESTS_GET, /* can dump */ + DEVLINK_CMD_SELFTESTS_RUN, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -276,6 +279,30 @@ enum { #define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \ (_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1) +enum devlink_attr_selftest_id { + DEVLINK_ATTR_SELFTEST_ID_UNSPEC, + DEVLINK_ATTR_SELFTEST_ID_FLASH, /* flag */ + + __DEVLINK_ATTR_SELFTEST_ID_MAX, + DEVLINK_ATTR_SELFTEST_ID_MAX = __DEVLINK_ATTR_SELFTEST_ID_MAX - 1 +}; + +enum devlink_selftest_status { + DEVLINK_SELFTEST_STATUS_SKIP, + DEVLINK_SELFTEST_STATUS_PASS, + DEVLINK_SELFTEST_STATUS_FAIL +}; + +enum devlink_attr_selftest_result { + DEVLINK_ATTR_SELFTEST_RESULT_UNSPEC, + DEVLINK_ATTR_SELFTEST_RESULT, /* nested */ + DEVLINK_ATTR_SELFTEST_RESULT_ID, /* u32, enum devlink_attr_selftest_id */ + DEVLINK_ATTR_SELFTEST_RESULT_STATUS, /* u8, enum devlink_selftest_status */ + + __DEVLINK_ATTR_SELFTEST_RESULT_MAX, + DEVLINK_ATTR_SELFTEST_RESULT_MAX = __DEVLINK_ATTR_SELFTEST_RESULT_MAX - 1 +}; + /** * enum devlink_trap_action - Packet trap action. * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not @@ -578,6 +605,8 @@ enum devlink_attr { DEVLINK_ATTR_NESTED_DEVLINK, /* nested */ + DEVLINK_ATTR_SELFTESTS, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index ca4c9939d569..efeba223b9b8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -201,6 +201,10 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), }; +static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { + [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, +}; + static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 @@ -4826,6 +4830,206 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, return ret; } +static int +devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink, + u32 portid, u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + struct nlattr *selftests; + void *hdr; + int err; + int i; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, + DEVLINK_CMD_SELFTESTS_GET); + if (!hdr) + return -EMSGSIZE; + + err = -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto err_cancel_msg; + + selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); + if (!selftests) + goto err_cancel_msg; + + for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; + i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { + if (devlink->ops->selftest_check(devlink, i, extack)) { + err = nla_put_flag(msg, i); + if (err) + goto err_cancel_msg; + } + } + + nla_nest_end(msg, selftests); + genlmsg_end(msg, hdr); + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); + return err; +} + +static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct sk_buff *msg; + int err; + + if (!devlink->ops->selftest_check) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid, + info->snd_seq, 0, info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err = 0; + + mutex_lock(&devlink_mutex); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (idx < start || !devlink->ops->selftest_check) + goto inc; + + devl_lock(devlink); + err = devlink_nl_selftests_fill(msg, devlink, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); + devl_unlock(devlink); + if (err) { + devlink_put(devlink); + break; + } +inc: + idx++; + devlink_put(devlink); + } + mutex_unlock(&devlink_mutex); + + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, + enum devlink_selftest_status test_status) +{ + struct nlattr *result_attr; + + result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT); + if (!result_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) || + nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS, + test_status)) + goto nla_put_failure; + + nla_nest_end(skb, result_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, result_attr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_selftests_run(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *attrs, *selftests; + struct sk_buff *msg; + void *hdr; + int err; + int i; + + if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_SELFTESTS]) { + NL_SET_ERR_MSG_MOD(info->extack, "selftest required"); + return -EINVAL; + } + + attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; + + err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs, + devlink_selftest_nl_policy, info->extack); + if (err < 0) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = -EMSGSIZE; + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN); + if (!hdr) + goto free_msg; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); + if (!selftests) + goto genlmsg_cancel; + + for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; + i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { + enum devlink_selftest_status test_status; + + if (nla_get_flag(tb[i])) { + if (!devlink->ops->selftest_check(devlink, i, + info->extack)) { + if (devlink_selftest_result_put(msg, i, + DEVLINK_SELFTEST_STATUS_SKIP)) + goto selftests_nest_cancel; + continue; + } + + test_status = devlink->ops->selftest_run(devlink, i, + info->extack); + if (devlink_selftest_result_put(msg, i, test_status)) + goto selftests_nest_cancel; + } + } + + nla_nest_end(msg, selftests); + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +selftests_nest_cancel: + nla_nest_cancel(msg, selftests); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return err; +} + static const struct devlink_param devlink_param_generic[] = { { .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, @@ -8969,6 +9173,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -9328,6 +9533,17 @@ static const struct genl_small_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_trap_policer_set_doit, .flags = GENL_ADMIN_PERM, }, + { + .cmd = DEVLINK_CMD_SELFTESTS_GET, + .doit = devlink_nl_cmd_selftests_get_doit, + .dumpit = devlink_nl_cmd_selftests_get_dumpit + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SELFTESTS_RUN, + .doit = devlink_nl_cmd_selftests_run, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { -- cgit v1.2.3 From 5502e8712c9be509a0b16e15ed64ef4e4bbc7e2c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 28 Jul 2022 18:53:42 +0300 Subject: net: devlink: remove region snapshot ID tracking dependency on devlink->lock After mlx4 driver is converted to do locked reload, functions to get/put regions snapshot ID may be called from both locked and unlocked context. So resolve this by removing dependency on devlink->lock for region snapshot ID tracking by using internal xa_lock() to maintain shapshot_ids xa_array consistency. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 64 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 31 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index efeba223b9b8..c8e20cafde1d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5894,21 +5894,28 @@ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id) { unsigned long count; void *p; + int err; - devl_assert_locked(devlink); - + xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); - if (WARN_ON(!p)) - return -EINVAL; + if (WARN_ON(!p)) { + err = -EINVAL; + goto unlock; + } - if (WARN_ON(!xa_is_value(p))) - return -EINVAL; + if (WARN_ON(!xa_is_value(p))) { + err = -EINVAL; + goto unlock; + } count = xa_to_value(p); count++; - return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), - GFP_KERNEL)); + err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), + GFP_ATOMIC)); +unlock: + xa_unlock(&devlink->snapshot_ids); + return err; } /** @@ -5931,25 +5938,26 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) unsigned long count; void *p; - devl_assert_locked(devlink); - + xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) - return; + goto unlock; if (WARN_ON(!xa_is_value(p))) - return; + goto unlock; count = xa_to_value(p); if (count > 1) { count--; - xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), - GFP_KERNEL); + __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), + GFP_ATOMIC); } else { /* If this was the last user, we can erase this id */ - xa_erase(&devlink->snapshot_ids, id); + __xa_erase(&devlink->snapshot_ids, id); } +unlock: + xa_unlock(&devlink->snapshot_ids); } /** @@ -5970,13 +5978,17 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) */ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) { - devl_assert_locked(devlink); + int err; - if (xa_load(&devlink->snapshot_ids, id)) + xa_lock(&devlink->snapshot_ids); + if (xa_load(&devlink->snapshot_ids, id)) { + xa_unlock(&devlink->snapshot_ids); return -EEXIST; - - return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), - GFP_KERNEL)); + } + err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), + GFP_ATOMIC)); + xa_unlock(&devlink->snapshot_ids); + return err; } /** @@ -5997,8 +6009,6 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) */ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - devl_assert_locked(devlink); - return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1), xa_limit_32b, GFP_KERNEL); } @@ -11442,13 +11452,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy); */ int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - int err; - - devl_lock(devlink); - err = __devlink_region_snapshot_id_get(devlink, id); - devl_unlock(devlink); - - return err; + return __devlink_region_snapshot_id_get(devlink, id); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); @@ -11464,9 +11468,7 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); */ void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id) { - devl_lock(devlink); __devlink_snapshot_id_decrement(devlink, id); - devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); -- cgit v1.2.3 From 2dec18ad826f52658f7781ee995d236cc449b678 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 28 Jul 2022 18:53:43 +0300 Subject: net: devlink: remove region snapshots list dependency on devlink->lock After mlx4 driver is converted to do locked reload, devlink_region_snapshot_create() may be called from both locked and unlocked context. Note that in mlx4 region snapshots could be created on any command failure. That can happen in any flow that involves commands to FW, which means most of the driver flows. So resolve this by removing dependency on devlink->lock for region snapshots list consistency and introduce new mutex to ensure it. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index c8e20cafde1d..216f26b9943d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -699,6 +699,10 @@ struct devlink_region { const struct devlink_region_ops *ops; const struct devlink_port_region_ops *port_ops; }; + struct mutex snapshot_lock; /* protects snapshot_list, + * max_snapshots and cur_snapshots + * consistency. + */ struct list_head snapshot_list; u32 max_snapshots; u32 cur_snapshots; @@ -6021,7 +6025,7 @@ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) * Multiple snapshots can be created on a region. * The @snapshot_id should be obtained using the getter function. * - * Must be called only while holding the devlink instance lock. + * Must be called only while holding the region snapshot lock. * * @region: devlink region of the snapshot * @data: snapshot data @@ -6035,7 +6039,7 @@ __devlink_region_snapshot_create(struct devlink_region *region, struct devlink_snapshot *snapshot; int err; - devl_assert_locked(devlink); + lockdep_assert_held(®ion->snapshot_lock); /* check if region can hold one more snapshot */ if (region->cur_snapshots == region->max_snapshots) @@ -6073,7 +6077,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region, { struct devlink *devlink = region->devlink; - devl_assert_locked(devlink); + lockdep_assert_held(®ion->snapshot_lock); devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); region->cur_snapshots--; @@ -6252,11 +6256,15 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, if (!region) return -EINVAL; + mutex_lock(®ion->snapshot_lock); snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (!snapshot) + if (!snapshot) { + mutex_unlock(®ion->snapshot_lock); return -EINVAL; + } devlink_region_snapshot_del(region, snapshot); + mutex_unlock(®ion->snapshot_lock); return 0; } @@ -6304,9 +6312,12 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } + mutex_lock(®ion->snapshot_lock); + if (region->cur_snapshots == region->max_snapshots) { NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots"); - return -ENOSPC; + err = -ENOSPC; + goto unlock; } snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; @@ -6315,17 +6326,18 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use"); - return -EEXIST; + err = -EEXIST; + goto unlock; } err = __devlink_snapshot_id_insert(devlink, snapshot_id); if (err) - return err; + goto unlock; } else { err = __devlink_region_snapshot_id_get(devlink, &snapshot_id); if (err) { NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id"); - return err; + goto unlock; } } @@ -6363,16 +6375,20 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) goto err_notify; } + mutex_unlock(®ion->snapshot_lock); return 0; err_snapshot_create: region->ops->destructor(data); err_snapshot_capture: __devlink_snapshot_id_decrement(devlink, snapshot_id); + mutex_unlock(®ion->snapshot_lock); return err; err_notify: devlink_region_snapshot_del(region, snapshot); +unlock: + mutex_unlock(®ion->snapshot_lock); return err; } @@ -11310,6 +11326,7 @@ struct devlink_region *devl_region_create(struct devlink *devlink, region->ops = ops; region->size = region_size; INIT_LIST_HEAD(®ion->snapshot_list); + mutex_init(®ion->snapshot_lock); list_add_tail(®ion->list, &devlink->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); @@ -11383,6 +11400,7 @@ devlink_port_region_create(struct devlink_port *port, region->port_ops = ops; region->size = region_size; INIT_LIST_HEAD(®ion->snapshot_list); + mutex_init(®ion->snapshot_lock); list_add_tail(®ion->list, &port->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); @@ -11412,6 +11430,7 @@ void devl_region_destroy(struct devlink_region *region) devlink_region_snapshot_del(region, snapshot); list_del(®ion->list); + mutex_destroy(®ion->snapshot_lock); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); kfree(region); @@ -11487,13 +11506,11 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id) { - struct devlink *devlink = region->devlink; int err; - devl_lock(devlink); + mutex_lock(®ion->snapshot_lock); err = __devlink_region_snapshot_create(region, data, snapshot_id); - devl_unlock(devlink); - + mutex_unlock(®ion->snapshot_lock); return err; } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create); -- cgit v1.2.3 From c90005b5f75ce3ef6170281ac4d77e8b7123995b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 28 Jul 2022 18:53:50 +0300 Subject: devlink: Hold the instance lock in health callbacks Let the core take the devlink instance lock around health callbacks and remove the now redundant locking in the drivers. Signed-off-by: Moshe Shemesh Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 8 +------ net/core/devlink.c | 30 +++++++++++------------- 2 files changed, 15 insertions(+), 23 deletions(-) (limited to 'net/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 6e154b5c2bc6..2cf2c9948446 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -622,14 +622,8 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); - struct devlink *devlink = priv_to_devlink(dev); - int ret; - devl_lock(devlink); - ret = mlx5_health_try_recover(dev); - devl_unlock(devlink); - - return ret; + return mlx5_health_try_recover(dev); } static int diff --git a/net/core/devlink.c b/net/core/devlink.c index 216f26b9943d..c43c96554a3e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7757,6 +7757,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state prev_health_state; struct devlink *devlink = reporter->devlink; unsigned long recover_ts_threshold; + int ret; /* write a log message of the current error */ WARN_ON(!msg); @@ -7790,11 +7791,14 @@ int devlink_health_report(struct devlink_health_reporter *reporter, mutex_unlock(&reporter->dump_lock); } - if (reporter->auto_recover) - return devlink_health_reporter_recover(reporter, - priv_ctx, NULL); + if (!reporter->auto_recover) + return 0; - return 0; + devl_lock(devlink); + ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL); + devl_unlock(devlink); + + return ret; } EXPORT_SYMBOL_GPL(devlink_health_report); @@ -9469,8 +9473,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, /* can be retrieved by unprivileged users */ }, { @@ -9478,24 +9481,21 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, @@ -9509,16 +9509,14 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_test_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_FLASH_UPDATE, -- cgit v1.2.3 From c2368b19807affd7621f7c4638cd2e17fec13021 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 29 Jul 2022 09:10:35 +0200 Subject: net: devlink: introduce "unregistering" mark and use it during devlinks iteration Add new mark called "unregistering" to be set at the beginning of devlink_unregister() function. Check this mark during devlinks iteration in order to prevent getting a reference of devlink which is being currently unregistered. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index c43c96554a3e..6b20196ada1a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -207,6 +207,7 @@ static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 +#define DEVLINK_UNREGISTERING XA_MARK_2 /* devlink instances are open to the access from the user space after * devlink_register() call. Such logical barrier allows us to have certain @@ -305,6 +306,14 @@ retry: devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); if (!devlink) goto unlock; + + /* In case devlink_unregister() was already called and "unregistering" + * mark was set, do not allow to get a devlink reference here. + * This prevents live-lock of devlink_unregister() wait for completion. + */ + if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING)) + goto retry; + /* For a possible retry, the xa_find_after() should be always used */ xa_find_fn = xa_find_after; if (!devlink_try_get(devlink)) @@ -9809,11 +9818,13 @@ void devlink_unregister(struct devlink *devlink) ASSERT_DEVLINK_REGISTERED(devlink); /* Make sure that we are in .remove() routine */ + xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING); devlink_put(devlink); wait_for_completion(&devlink->comp); devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING); } EXPORT_SYMBOL_GPL(devlink_unregister); -- cgit v1.2.3 From 644a66c60f02f302d82c3008ae2ffe67cf495383 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 29 Jul 2022 09:10:36 +0200 Subject: net: devlink: convert reload command to take implicit devlink->lock Convert reload command to behave the same way as the rest of the commands and let if be called with devlink->lock held. Remove the temporary devl_lock taking from drivers. As the DEVLINK_NL_FLAG_NO_LOCK flag is no longer used, remove it alongside. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 4 ---- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 4 ---- drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ---- drivers/net/netdevsim/dev.c | 6 ------ net/core/devlink.c | 18 +++++------------- 5 files changed, 5 insertions(+), 31 deletions(-) (limited to 'net/core') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2c764d1d897d..78c5f40382c9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3958,11 +3958,9 @@ static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); return -EOPNOTSUPP; } - devl_lock(devlink); if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); mlx4_restart_one_down(persist->pdev); - devl_unlock(devlink); return 0; } @@ -3975,10 +3973,8 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a struct mlx4_dev_persistent *persist = dev->persist; int err; - devl_lock(devlink); *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); err = mlx4_restart_one_up(persist->pdev, true, devlink); - devl_unlock(devlink); if (err) mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 1c05a7091698..66c6a7017695 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -164,7 +164,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); } - devl_lock(devlink); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: mlx5_unload_one_devl_locked(dev); @@ -181,7 +180,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, ret = -EOPNOTSUPP; } - devl_unlock(devlink); return ret; } @@ -192,7 +190,6 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a struct mlx5_core_dev *dev = devlink_priv(devlink); int ret = 0; - devl_lock(devlink); *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: @@ -211,7 +208,6 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a ret = -EOPNOTSUPP; } - devl_unlock(devlink); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 7c93bd04a3a1..75553eb2c7f2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1499,9 +1499,7 @@ mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET)) return -EOPNOTSUPP; - devl_lock(devlink); mlxsw_core_bus_device_unregister(mlxsw_core, true); - devl_unlock(devlink); return 0; } @@ -1515,12 +1513,10 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, enum devlink_re *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); - devl_lock(devlink); err = mlxsw_core_bus_device_register(mlxsw_core->bus_info, mlxsw_core->bus, mlxsw_core->bus_priv, true, devlink, extack); - devl_unlock(devlink); return err; } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 5802e80e8fe1..e88f783c297e 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -948,18 +948,15 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, { struct nsim_dev *nsim_dev = devlink_priv(devlink); - devl_lock(devlink); if (nsim_dev->dont_allow_reload) { /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); - devl_unlock(devlink); return -EOPNOTSUPP; } nsim_dev_reload_destroy(nsim_dev); - devl_unlock(devlink); return 0; } @@ -970,19 +967,16 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio struct nsim_dev *nsim_dev = devlink_priv(devlink); int ret; - devl_lock(devlink); if (nsim_dev->fail_reload) { /* For testing purposes, user set debugfs fail_reload * value to true. Fail right away. */ NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); - devl_unlock(devlink); return -EINVAL; } *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); ret = nsim_dev_reload_create(nsim_dev, extack); - devl_unlock(devlink); return ret; } diff --git a/net/core/devlink.c b/net/core/devlink.c index 6b20196ada1a..57865b231364 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -768,12 +768,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) #define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4) -/* The per devlink instance lock is taken by default in the pre-doit - * operation, yet several commands do not require this. The global - * devlink lock is taken and protects from disruption by user-calls. - */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(5) - static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { @@ -788,8 +782,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); } - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - devl_lock(devlink); + devl_lock(devlink); info->user_ptr[0] = devlink; if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { devlink_port = devlink_port_get_from_info(devlink, info); @@ -831,8 +824,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, return 0; unlock: - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - devl_unlock(devlink); + devl_unlock(devlink); devlink_put(devlink); mutex_unlock(&devlink_mutex); return err; @@ -849,8 +841,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, linecard = info->user_ptr[1]; devlink_linecard_put(linecard); } - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - devl_unlock(devlink); + devl_unlock(devlink); devlink_put(devlink); mutex_unlock(&devlink_mutex); } @@ -9414,7 +9405,6 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_reload, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PARAM_GET, @@ -12507,10 +12497,12 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(net, index, devlink) { WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); + mutex_lock(&devlink->lock); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, &actions_performed, NULL); + mutex_unlock(&devlink->lock); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); devlink_put(devlink); -- cgit v1.2.3 From d3efc2a6a6d851bcd53805309f4632e018007436 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 29 Jul 2022 09:10:37 +0200 Subject: net: devlink: remove devlink_mutex All accesses to devlink structure from userspace and drivers are locked with devlink->lock instance mutex. Also, devlinks xa_array iteration is taken care of by iteration helpers taking devlink reference. Therefore, remove devlink_mutex as it is no longer needed. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 80 +++--------------------------------------------------- 1 file changed, 4 insertions(+), 76 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 57865b231364..06cd7c1a1f0a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -225,12 +225,6 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define ASSERT_DEVLINK_NOT_REGISTERED(d) \ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) -/* devlink_mutex - * - * An overall lock guarding every operation coming from userspace. - */ -static DEFINE_MUTEX(devlink_mutex); - struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); @@ -776,12 +770,9 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, struct devlink *devlink; int err; - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(devlink)) { - mutex_unlock(&devlink_mutex); + if (IS_ERR(devlink)) return PTR_ERR(devlink); - } devl_lock(devlink); info->user_ptr[0] = devlink; if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { @@ -826,7 +817,6 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: devl_unlock(devlink); devlink_put(devlink); - mutex_unlock(&devlink_mutex); return err; } @@ -843,7 +833,6 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, } devl_unlock(devlink); devlink_put(devlink); - mutex_unlock(&devlink_mutex); } static struct genl_family devlink_nl_family; @@ -1408,7 +1397,6 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { @@ -1433,7 +1421,6 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); if (err != -EMSGSIZE) return err; @@ -1504,7 +1491,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start) { idx++; @@ -1521,8 +1507,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, idx++; } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -1559,7 +1543,6 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { @@ -1583,8 +1566,6 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -2238,7 +2219,6 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->linecards_lock); list_for_each_entry(linecard, &devlink->linecard_list, list) { @@ -2265,8 +2245,6 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -2503,7 +2481,6 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { @@ -2527,8 +2504,6 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -2648,7 +2623,6 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (!devlink->ops->sb_pool_get) goto retry; @@ -2672,8 +2646,6 @@ retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -2865,7 +2837,6 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (!devlink->ops->sb_port_pool_get) goto retry; @@ -2889,8 +2860,6 @@ retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -3110,7 +3079,6 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (!devlink->ops->sb_tc_pool_bind_get) goto retry; @@ -3135,8 +3103,6 @@ retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -4908,7 +4874,6 @@ static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start || !devlink->ops->selftest_check) goto inc; @@ -4927,7 +4892,6 @@ inc: idx++; devlink_put(devlink); } - mutex_unlock(&devlink_mutex); if (err != -EMSGSIZE) return err; @@ -5393,7 +5357,6 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(param_item, &devlink->param_list, list) { @@ -5419,8 +5382,6 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -5621,7 +5582,6 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { @@ -5652,8 +5612,6 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -6208,7 +6166,6 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); @@ -6217,7 +6174,6 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, goto out; } out: - mutex_unlock(&devlink_mutex); cb->args[0] = idx; return msg->len; } @@ -6483,12 +6439,9 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, start_offset = *((u64 *)&cb->args[0]); - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) { - err = PTR_ERR(devlink); - goto out_dev; - } + if (IS_ERR(devlink)) + return PTR_ERR(devlink); devl_lock(devlink); @@ -6588,8 +6541,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); devl_unlock(devlink); devlink_put(devlink); - mutex_unlock(&devlink_mutex); - return skb->len; nla_put_failure: @@ -6597,8 +6548,6 @@ nla_put_failure: out_unlock: devl_unlock(devlink); devlink_put(devlink); -out_dev: - mutex_unlock(&devlink_mutex); return err; } @@ -6747,7 +6696,6 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start || !devlink->ops->info_get) goto inc; @@ -6768,7 +6716,6 @@ inc: idx++; devlink_put(devlink); } - mutex_unlock(&devlink_mutex); if (err != -EMSGSIZE) return err; @@ -7847,18 +7794,13 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) struct nlattr **attrs = info->attrs; struct devlink *devlink; - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) - goto unlock; + return NULL; reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); devlink_put(devlink); - mutex_unlock(&devlink_mutex); return reporter; -unlock: - mutex_unlock(&devlink_mutex); - return NULL; } void @@ -7924,7 +7866,6 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, @@ -7976,8 +7917,6 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -8510,7 +8449,6 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(trap_item, &devlink->trap_list, list) { @@ -8534,8 +8472,6 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -8730,7 +8666,6 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(group_item, &devlink->trap_group_list, @@ -8755,8 +8690,6 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -9037,7 +8970,6 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); list_for_each_entry(policer_item, &devlink->trap_policer_list, @@ -9062,8 +8994,6 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -12494,7 +12424,6 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) /* In case network namespace is getting destroyed, reload * all devlink instances from this namespace into init_net. */ - mutex_lock(&devlink_mutex); devlinks_xa_for_each_registered_get(net, index, devlink) { WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); mutex_lock(&devlink->lock); @@ -12507,7 +12436,6 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) pr_warn("Failed to reload devlink instance into init_net\n"); devlink_put(devlink); } - mutex_unlock(&devlink_mutex); } static struct pernet_operations devlink_pernet_ops __net_initdata = { -- cgit v1.2.3 From 09b278462f16569c63dd78ffa29bbfe048b4e604 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 29 Jul 2022 09:10:38 +0200 Subject: net: devlink: enable parallel ops on netlink interface As the devlink_mutex was removed and all devlink instances are protected individually by devlink->lock mutex, allow the netlink ops to run in parallel and therefore allow user to execute commands on multiple devlink instances simultaneously. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 06cd7c1a1f0a..889e7e3d3e8a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9505,6 +9505,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .maxattr = DEVLINK_ATTR_MAX, .policy = devlink_nl_policy, .netnsok = true, + .parallel_ops = true, .pre_doit = devlink_nl_pre_doit, .post_doit = devlink_nl_post_doit, .module = THIS_MODULE, -- cgit v1.2.3 From 80ef928643c1558a0474389fcd680a5ccd6c86e6 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Mon, 1 Aug 2022 18:59:56 +0700 Subject: net: devlink: Fix missing mutex_unlock() call Commit 2dec18ad826f forgets to call mutex_unlock() before the function returns in the error path: New smatch warnings: net/core/devlink.c:6392 devlink_nl_cmd_region_new() warn: inconsistent \ returns '®ion->snapshot_lock'. Make sure we call mutex_unlock() in this error path. Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: 2dec18ad826f ("net: devlink: remove region snapshots list dependency on devlink->lock") Signed-off-by: Ammar Faizi Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20220801115742.1309329-1-ammar.faizi@intel.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 889e7e3d3e8a..5da5c7cca98a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6315,8 +6315,10 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (WARN_ON(!snapshot)) - return -EINVAL; + if (WARN_ON(!snapshot)) { + err = -EINVAL; + goto unlock; + } msg = devlink_nl_region_notify_build(region, snapshot, DEVLINK_CMD_REGION_NEW, -- cgit v1.2.3