From eb02d39ad3099c3dcd96c5b3fdc0303541766ed1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:31:16 -0700 Subject: netdevice.h: fix proto_down_reason kernel-doc warning Fix kernel-doc warning in : ../include/linux/netdevice.h:2158: warning: Function parameter or member 'proto_down_reason' not described in 'net_device' Fixes: 829eb208e80d ("rtnetlink: add support for protodown reason") Signed-off-by: Randy Dunlap Acked-by: Roopa Prabhu Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0e303f6603f..bf0e313486be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1784,6 +1784,7 @@ enum netdev_priv_flags { * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * + * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one -- cgit v1.2.3 From ffa59b0b396cb2c0ea6712ff30ee05c35d4c9c8d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:32:30 -0700 Subject: netdevice.h: fix xdp_state kernel-doc warning Fix kernel-doc warning in : ../include/linux/netdevice.h:2158: warning: Function parameter or member 'xdp_state' not described in 'net_device' Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Randy Dunlap Cc: Andrii Nakryiko Cc: Alexei Starovoitov Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf0e313486be..7bd4fcdd0738 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1849,6 +1849,7 @@ enum netdev_priv_flags { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @xdp_state: stores info on attached XDP BPF programs * * FIXME: cleanup struct net_device such that network protocol info * moves out. -- cgit v1.2.3 From 67cc570edaa02016a8685a06a0ee91f05a6277d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Aug 2020 19:28:42 +0200 Subject: netfilter: nf_tables: coalesce multiple notifications into one skbuff On x86_64, each notification results in one skbuff allocation which consumes at least 768 bytes due to the skbuff overhead. This patch coalesces several notifications into one single skbuff, so each notification consumes at least ~211 bytes, that ~3.5 times less memory consumption. As a result, this is reducing the chances to exhaust the netlink socket receive buffer. Rule of thumb is that each notification batch only contains netlink messages whose report flag is the same, nfnetlink_send() requires this to do appropriate delivery to userspace, either via unicast (echo mode) or multicast (monitor mode). The skbuff control buffer is used to annotate the report flag for later handling at the new coalescing routine. The batch skbuff notification size is NLMSG_GOODSIZE, using a larger skbuff would allow for more socket receiver buffer savings (to amortize the cost of the skbuff even more), however, going over that size might break userspace applications, so let's be conservative and stick to NLMSG_GOODSIZE. Reported-by: Phil Sutter Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + net/netfilter/nf_tables_api.c | 70 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a1a8d45adb42..6c0806bd8d1e 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -8,6 +8,7 @@ struct netns_nftables { struct list_head tables; struct list_head commit_list; struct list_head module_list; + struct list_head notify_list; struct mutex commit_mutex; unsigned int base_seq; u8 gencursor; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7dc1cbf40ea..4603b667973a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,6 +684,18 @@ nla_put_failure: return -1; } +struct nftnl_skb_parms { + bool report; +}; +#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb)) + +static void nft_notify_enqueue(struct sk_buff *skb, bool report, + struct list_head *notify_list) +{ + NFT_CB(skb).report = report; + list_add_tail(&skb->list, notify_list); +} + static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; @@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, - gfp_flags); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, - GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + nft_notify_enqueue(skb, report, &net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net) mutex_unlock(&net->nft.commit_mutex); } +static void nft_commit_notify(struct net *net, u32 portid) +{ + struct sk_buff *batch_skb = NULL, *nskb, *skb; + unsigned char *data; + int len; + + list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + if (!batch_skb) { +new_batch: + batch_skb = skb; + len = NLMSG_GOODSIZE - skb->len; + list_del(&skb->list); + continue; + } + len -= skb->len; + if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) { + data = skb_put(batch_skb, skb->len); + memcpy(data, skb->data, skb->len); + list_del(&skb->list); + kfree_skb(skb); + continue; + } + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + goto new_batch; + } + + if (batch_skb) { + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + } + + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_release(net); @@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft.module_list); + INIT_LIST_HEAD(&net->nft.notify_list); mutex_init(&net->nft.commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; @@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); } static struct pernet_operations nf_tables_net_ops = { -- cgit v1.2.3 From 4a009cb04aeca0de60b73f37b102573354214b52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Sep 2020 01:27:40 -0700 Subject: net: add __must_check to skb_put_padto() skb_put_padto() and __skb_put_padto() callers must check return values or risk use-after-free. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ed9bea924dc3..04a18e01b362 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3223,8 +3223,9 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ -static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, - bool free_on_error) +static inline int __must_check __skb_put_padto(struct sk_buff *skb, + unsigned int len, + bool free_on_error) { unsigned int size = skb->len; @@ -3247,7 +3248,7 @@ static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } -- cgit v1.2.3 From 2d2fe8433796603091ac8ea235b9165ac5a85f9a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:08 +0300 Subject: net: qed: Disable aRFS for NPAR and 100G In CMT and NPAR the PF is unknown when the GFS block processes the packet. Therefore cannot use searcher as it has a per PF database, and thus ARFS must be disabled. Fixes: d51e4af5c209 ("qed: aRFS infrastructure support") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 11 ++++++++++- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ include/linux/qed/qed_if.h | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b8f076e4e6b8..3db181f3617a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -4253,7 +4253,8 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | BIT(QED_MF_LLH_PROTO_CLSS) | BIT(QED_MF_LL2_NON_UNICAST) | - BIT(QED_MF_INTER_PF_SWITCH); + BIT(QED_MF_INTER_PF_SWITCH) | + BIT(QED_MF_DISABLE_ARFS); break; case NVM_CFG1_GLOB_MF_MODE_DEFAULT: cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | @@ -4266,6 +4267,14 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n", cdev->mf_bits); + + /* In CMT the PF is unknown when the GFS block processes the + * packet. Therefore cannot use searcher as it has a per PF + * database, and thus ARFS must be disabled. + * + */ + if (QED_IS_CMT(cdev)) + cdev->mf_bits |= BIT(QED_MF_DISABLE_ARFS); } DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 4c6ac8862744..07824bf9d68d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1980,6 +1980,9 @@ void qed_arfs_mode_configure(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_arfs_config_params *p_cfg_params) { + if (test_bit(QED_MF_DISABLE_ARFS, &p_hwfn->cdev->mf_bits)) + return; + if (p_cfg_params->mode != QED_FILTER_CONFIG_MODE_DISABLE) { qed_gft_config(p_hwfn, p_ptt, p_hwfn->rel_pf_id, p_cfg_params->tcp, diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f39f629242a1..50e5eb22e60a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -444,6 +444,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->fw_eng = FW_ENGINEERING_VERSION; dev_info->b_inter_pf_switch = test_bit(QED_MF_INTER_PF_SWITCH, &cdev->mf_bits); + if (!test_bit(QED_MF_DISABLE_ARFS, &cdev->mf_bits)) + dev_info->b_arfs_capable = true; dev_info->tx_switching = true; if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index cd6a5c7e56eb..cdd73afc4c46 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -623,6 +623,7 @@ struct qed_dev_info { #define QED_MFW_VERSION_3_OFFSET 24 u32 flash_size; + bool b_arfs_capable; bool b_inter_pf_switch; bool tx_switching; bool rdma_supported; -- cgit v1.2.3 From 83896b0bd8223ac33bcc609bcc82a57a587002ff Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 10 Sep 2020 04:46:40 -0400 Subject: net: Fix broken NETIF_F_CSUM_MASK spell in netdev_features.h Remove the weird space inside the NETIF_F_CSUM_MASK. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2cc3cf80b49a..0b17c4322b09 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -193,7 +193,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) -/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be +/* List of IP checksum features. Note that NETIF_F_HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory */ -- cgit v1.2.3 From 553d87b658fed0e22a0f86b4f1b093c39d3e3074 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 10 Sep 2020 15:34:39 +0200 Subject: netlink: fix doc about nlmsg_parse/nla_validate There is no @validate argument. CC: Johannes Berg Fixes: 3de644035446 ("netlink: re-add parse/validate functions in strict mode") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/netlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index c0411f14fb53..8e0eb2c9c528 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -726,7 +726,6 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected - * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() @@ -824,7 +823,6 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy - * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the -- cgit v1.2.3 From 1869e226a7b3ef75b4f70ede2f1b7229f7157fa4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 13 Sep 2020 12:43:39 -0600 Subject: ipv4: Initialize flowi4_multipath_hash in data path flowi4_multipath_hash was added by the commit referenced below for tunnels. Unfortunately, the patch did not initialize the new field for several fast path lookups that do not initialize the entire flow struct to 0. Fix those locations. Currently, flowi4_multipath_hash is random garbage and affects the hash value computed by fib_multipath_hash for multipath selection. Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash") Signed-off-by: David Ahern Cc: wenxu Signed-off-by: David S. Miller --- include/net/flow.h | 1 + net/core/filter.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 1 + 4 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 929d3ca614d0..b2531df3f65f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -116,6 +116,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_multipath_hash = 0; } /* Reset some input parameters after previous lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index 1f647ab986b6..1b168371ba96 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4838,6 +4838,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.saddr = params->ipv4_src; fl4.fl4_sport = params->sport; fl4.fl4_dport = params->dport; + fl4.flowi4_multipath_hash = 0; if (flags & BPF_FIB_LOOKUP_DIRECT) { u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 41079490a118..86a23e4a6a50 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -362,6 +362,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8ca6bcab7b03..e5f210d00851 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2147,6 +2147,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; -- cgit v1.2.3 From 13e6ce98aa65ab5ce19351c020419360dfe8af29 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:51:50 +0800 Subject: net: sched: only keep the available bits when setting vxlan md->gbp As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can be set to or parse from the packet for vxlan gbp option. So we'd better do the mask when set it in act_tunnel_key and cls_flower. Otherwise, when users don't know these bits, they may configure with a value which can never be matched. Reported-by: Shuang Li Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/vxlan.h | 3 +++ net/sched/act_tunnel_key.c | 1 + net/sched/cls_flower.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3a41627cbdfe..08537aa14f7c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -121,6 +121,9 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +#define VXLAN_GBP_MASK (VXLAN_GBP_DONT_LEARN | VXLAN_GBP_POLICY_APPLIED | \ + VXLAN_GBP_ID_MASK) + /* * VXLAN Generic Protocol Extension (VXLAN_F_GPE): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 536c4bc31be6..37f1e10f35e0 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -156,6 +156,7 @@ tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len, struct vxlan_metadata *md = dst; md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; } return sizeof(struct vxlan_metadata); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a4f7ef1de7e7..e8fda1bd4d9d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1175,8 +1175,10 @@ static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) + if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) { md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; + } return sizeof(*md); } -- cgit v1.2.3 From 19a83d36f9837e8bd27435ebb31564a717a5d15a Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 17 Sep 2020 01:04:10 +0200 Subject: ethtool: add and use message type for tunnel info reply Tunnel offload info code uses ETHTOOL_MSG_TUNNEL_INFO_GET message type (cmd field in genetlink header) for replies to tunnel info netlink request, i.e. the same value as the request have. This is a problem because we are using two separate enums for userspace to kernel and kernel to userspace message types so that this ETHTOOL_MSG_TUNNEL_INFO_GET (28) collides with ETHTOOL_MSG_CABLE_TEST_TDR_NTF which is what message type 28 means for kernel to userspace messages. As the tunnel info request reached mainline in 5.9 merge window, we should still be able to fix the reply message type without breaking backward compatibility. Fixes: c7d759eb7b12 ("ethtool: add tunnel info interface") Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 3 +++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/tunnels.c | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d53bcb31645a..b5a79881551f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -206,6 +206,7 @@ Userspace to kernel: ``ETHTOOL_MSG_TSINFO_GET`` get timestamping info ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test + ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info ===================================== ================================ Kernel to userspace: @@ -239,6 +240,7 @@ Kernel to userspace: ``ETHTOOL_MSG_TSINFO_GET_REPLY`` timestamping info ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results + ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1363,4 +1365,5 @@ are netlink only. ``ETHTOOL_SFECPARAM`` n/a n/a ''ETHTOOL_MSG_CABLE_TEST_ACT'' n/a ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT'' + n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5dcd24cb33ea..72ba36be9655 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -79,6 +79,7 @@ enum { ETHTOOL_MSG_TSINFO_GET_REPLY, ETHTOOL_MSG_CABLE_TEST_NTF, ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index 84f23289475b..d93bf2da0f34 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -200,7 +200,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.dev, - ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, ETHTOOL_A_TUNNEL_INFO_HEADER, info, &reply_payload); if (!rskb) { @@ -273,7 +273,7 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) goto cont; ehdr = ethnl_dump_put(skb, cb, - ETHTOOL_MSG_TUNNEL_INFO_GET); + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); if (!ehdr) { ret = -EMSGSIZE; goto out; -- cgit v1.2.3 From 6565243c0677aa2befa5a953cf11bc7b4a6f0a47 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:24 +0300 Subject: net: mscc: ocelot: add locking for the port TX timestamp ID The ocelot_port->ts_id is used to: (a) populate skb->cb[0] for matching the TX timestamp in the PTP IRQ with an skb. (b) populate the REW_OP from the injection header of the ongoing skb. Only then is ocelot_port->ts_id incremented. This is a problem because, at least theoretically, another timestampable skb might use the same ocelot_port->ts_id before that is incremented. Normally all transmit calls are serialized by the netdev transmit spinlock, but in this case, ocelot_port_add_txtstamp_skb() is also called by DSA, which has started declaring the NETIF_F_LLTX feature since commit 2b86cb829976 ("net: dsa: declare lockless TX feature for slave ports"). So the logic of using and incrementing the timestamp id should be atomic per port. The solution is to use the global ocelot_port->ts_id only while protected by the associated ocelot_port->ts_id_lock. That's where we populate skb->cb[0]. Note that for ocelot, ocelot_port_add_txtstamp_skb is called for the actual skb, but for felix, it is called for the skb's clone. That is something which will also be changed in the future. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 8 +++++++- drivers/net/ethernet/mscc/ocelot_net.c | 6 ++---- include/soc/mscc/ocelot.h | 1 + net/dsa/tag_ocelot.c | 11 +++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5abb7d2b0a9e..83eb7c325061 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -421,10 +421,15 @@ int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port, if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + spin_lock(&ocelot_port->ts_id_lock); + shinfo->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in cb[0] of sk_buff */ - skb->cb[0] = ocelot_port->ts_id % 4; + skb->cb[0] = ocelot_port->ts_id; + ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; skb_queue_tail(&ocelot_port->tx_skbs, skb); + + spin_unlock(&ocelot_port->ts_id_lock); return 0; } return -ENODATA; @@ -1300,6 +1305,7 @@ void ocelot_init_port(struct ocelot *ocelot, int port) struct ocelot_port *ocelot_port = ocelot->ports[port]; skb_queue_head_init(&ocelot_port->tx_skbs); + spin_lock_init(&ocelot_port->ts_id_lock); /* Basic L2 initialization */ diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index cacabc23215a..8490e42e9e2d 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -349,10 +349,8 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) { info.rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - info.rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + info.rew_op |= skb->cb[0] << 3; } ocelot_gen_ifh(ifh, &info); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index da369b12005f..4521dd602ddc 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -566,6 +566,7 @@ struct ocelot_port { u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; + spinlock_t ts_id_lock; phy_interface_t phy_mode; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 42f327c06dca..b4fc05cafaa6 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -160,11 +160,14 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct sk_buff *clone = DSA_SKB_CB(skb)->clone; + rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + /* Retrieve timestamp ID populated inside skb->cb[0] of the + * clone by ocelot_port_add_txtstamp_skb + */ + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + rew_op |= clone->cb[0] << 3; packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); } -- cgit v1.2.3 From e5fb512d81d021b7c7a0c2547c3dafb9de759285 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:30 +0300 Subject: net: mscc: ocelot: deinitialize only initialized ports Currently mscc_ocelot_init_ports() will skip initializing a port when it doesn't have a phy-handle, so the ocelot->ports[port] pointer will be NULL. Take this into consideration when tearing down the driver, and add a new function ocelot_deinit_port() to the switch library, mirror of ocelot_init_port(), which needs to be called by the driver for all ports it has initialized. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 3 +++ drivers/net/ethernet/mscc/ocelot.c | 16 ++++++++-------- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 2 ++ include/soc/mscc/ocelot.h | 1 + 4 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 66da447c4096..01427cd08448 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -643,10 +643,13 @@ static void felix_teardown(struct dsa_switch *ds) { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); + int port; if (felix->info->mdio_bus_free) felix->info->mdio_bus_free(ocelot); + for (port = 0; port < ocelot->num_phys_ports; port++) + ocelot_deinit_port(ocelot, port); ocelot_deinit_timestamp(ocelot); /* stop workqueue thread */ ocelot_deinit(ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 83eb7c325061..8518e1d60da4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1550,18 +1550,18 @@ EXPORT_SYMBOL(ocelot_init); void ocelot_deinit(struct ocelot *ocelot) { - struct ocelot_port *port; - int i; - cancel_delayed_work(&ocelot->stats_work); destroy_workqueue(ocelot->stats_queue); mutex_destroy(&ocelot->stats_lock); - - for (i = 0; i < ocelot->num_phys_ports; i++) { - port = ocelot->ports[i]; - skb_queue_purge(&port->tx_skbs); - } } EXPORT_SYMBOL(ocelot_deinit); +void ocelot_deinit_port(struct ocelot *ocelot, int port) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + skb_queue_purge(&ocelot_port->tx_skbs); +} +EXPORT_SYMBOL(ocelot_deinit_port); + MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 252c49b5f22b..e02fb8bfab63 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -908,6 +908,8 @@ static void mscc_ocelot_release_ports(struct ocelot *ocelot) if (!ocelot_port) continue; + ocelot_deinit_port(ocelot, port); + priv = container_of(ocelot_port, struct ocelot_port_private, port); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 4521dd602ddc..0ac4e7fba086 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -678,6 +678,7 @@ void ocelot_configure_cpu(struct ocelot *ocelot, int npi, int ocelot_init(struct ocelot *ocelot); void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); +void ocelot_deinit_port(struct ocelot *ocelot, int port); /* DSA callbacks */ void ocelot_port_enable(struct ocelot *ocelot, int port, -- cgit v1.2.3 From fe81d9f6182d1160e625894eecb3d7ff0222cac5 Mon Sep 17 00:00:00 2001 From: Henry Ptasinski Date: Sat, 19 Sep 2020 00:12:11 +0000 Subject: net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendant When calculating ancestor_size with IPv6 enabled, simply using sizeof(struct ipv6_pinfo) doesn't account for extra bytes needed for alignment in the struct sctp6_sock. On x86, there aren't any extra bytes, but on ARM the ipv6_pinfo structure is aligned on an 8-byte boundary so there were 4 pad bytes that were omitted from the ancestor_size calculation. This would lead to corruption of the pd_lobby pointers, causing an oops when trying to free the sctp structure on socket close. Fixes: 636d25d557d1 ("sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant") Signed-off-by: Henry Ptasinski Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 8 +++++--- net/sctp/socket.c | 9 +++------ 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b33f1aefad09..0bdff38eb4bb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -226,12 +226,14 @@ struct sctp_sock { data_ready_signalled:1; atomic_t pd_mode; + + /* Fields after this point will be skipped on copies, like on accept + * and peeloff operations + */ + /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; - /* These must be the last fields, as they will skipped on copies, - * like on accept and peeloff operations - */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 836615f71a7d..53d0a4161df3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9220,13 +9220,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, static inline void sctp_copy_descendant(struct sock *sk_to, const struct sock *sk_from) { - int ancestor_size = sizeof(struct inet_sock) + - sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, pd_lobby); - - if (sk_from->sk_family == PF_INET6) - ancestor_size += sizeof(struct ipv6_pinfo); + size_t ancestor_size = sizeof(struct inet_sock); + ancestor_size += sk_from->sk_prot->obj_size; + ancestor_size -= offsetof(struct sctp_sock, pd_lobby); __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } -- cgit v1.2.3