From b428336676dbca363262cc134b6218205df4f530 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Tue, 4 Aug 2020 17:44:09 -0400 Subject: netfilter: nf_tables: nft_exthdr: the presence return value should be little-endian On big-endian machine, the returned register data when the exthdr is present is not being compared correctly because little-endian is assumed. The function nft_cmp_fast_mask(), called by nft_cmp_fast_eval() and nft_cmp_fast_init(), calls cpu_to_le32(). The following dump also shows that little endian is assumed: $ nft --debug=netlink add rule ip recordroute forward ip option rr exists counter ip [ exthdr load ipv4 1b @ 7 + 0 present => reg 1 ] [ cmp eq reg 1 0x01000000 ] [ counter pkts 0 bytes 0 ] Lastly, debug print in nft_cmp_fast_init() and nft_cmp_fast_eval() when RR option exists in the packet shows that the comparison fails because the assumption: nft_cmp_fast_init:189 priv->sreg=4 desc.len=8 mask=0xff000000 data.data[0]=0x10003e0 nft_cmp_fast_eval:57 regs->data[priv->sreg=4]=0x1 mask=0xff000000 priv->data=0x1000000 v2: use nft_reg_store8() instead (Florian Westphal). Also to avoid the warnings reported by kernel test robot. Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Fixes: c078ca3b0c5b ("netfilter: nft_exthdr: Add support for existence check") Signed-off-by: Stephen Suryaputra Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 07782836fad6..3c48cdc8935d 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -44,7 +44,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); if (priv->flags & NFT_EXTHDR_F_PRESENT) { - *dest = (err >= 0); + nft_reg_store8(dest, err >= 0); return; } else if (err < 0) { goto err; @@ -141,7 +141,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr, err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type); if (priv->flags & NFT_EXTHDR_F_PRESENT) { - *dest = (err >= 0); + nft_reg_store8(dest, err >= 0); return; } else if (err < 0) { goto err; -- cgit v1.2.3 From 2f941622fd88328ca75806c45c9e9709286a0609 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Aug 2020 20:28:01 +0200 Subject: netfilter: nft_compat: remove flush counter optimization WARNING: CPU: 1 PID: 16059 at lib/refcount.c:31 refcount_warn_saturate+0xdf/0xf [..] __nft_mt_tg_destroy+0x42/0x50 [nft_compat] nft_target_destroy+0x63/0x80 [nft_compat] nf_tables_expr_destroy+0x1b/0x30 [nf_tables] nf_tables_rule_destroy+0x3a/0x70 [nf_tables] nf_tables_exit_net+0x186/0x3d0 [nf_tables] Happens when a compat expr is destoyed from abort path. There is no functional impact; after this work queue is flushed unconditionally if its pending. This removes the waitcount optimization. Test of repeated iptables-restore of a ~60k kubernetes ruleset doesn't indicate a slowdown. In case the counter is needed after all for some workloads we can revert this and increment the refcount for the != NFT_PREPARE_TRANS case to avoid the increment/decrement imbalance. While at it, also flush for match case, this was an oversight in the original patch. Fixes: ffe8923f109b7e ("netfilter: nft_compat: make sure xtables destructors have run") Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6428856ccbec..8e56f353ff35 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,8 +27,6 @@ struct nft_xt_match_priv { void *info; }; -static refcount_t nft_compat_pending_destroy = REFCOUNT_INIT(1); - static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx, const char *tablename) { @@ -215,6 +213,17 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return 0; } +static void nft_compat_wait_for_destructors(void) +{ + /* xtables matches or targets can have side effects, e.g. + * creation/destruction of /proc files. + * The xt ->destroy functions are run asynchronously from + * work queue. If we have pending invocations we thus + * need to wait for those to finish. + */ + nf_tables_trans_destroy_flush_work(); +} + static int nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -238,14 +247,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - /* xtables matches or targets can have side effects, e.g. - * creation/destruction of /proc files. - * The xt ->destroy functions are run asynchronously from - * work queue. If we have pending invocations we thus - * need to wait for those to finish. - */ - if (refcount_read(&nft_compat_pending_destroy) > 1) - nf_tables_trans_destroy_flush_work(); + nft_compat_wait_for_destructors(); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) @@ -260,7 +262,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, static void __nft_mt_tg_destroy(struct module *me, const struct nft_expr *expr) { - refcount_dec(&nft_compat_pending_destroy); module_put(me); kfree(expr->ops); } @@ -468,6 +469,8 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + nft_compat_wait_for_destructors(); + return xt_check_match(&par, size, proto, inv); } @@ -716,14 +719,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static struct nft_expr_type nft_match_type; -static void nft_mt_tg_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - enum nft_trans_phase phase) -{ - if (phase == NFT_TRANS_COMMIT) - refcount_inc(&nft_compat_pending_destroy); -} - static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -762,7 +757,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, ops->type = &nft_match_type; ops->eval = nft_match_eval; ops->init = nft_match_init; - ops->deactivate = nft_mt_tg_deactivate, ops->destroy = nft_match_destroy; ops->dump = nft_match_dump; ops->validate = nft_match_validate; @@ -853,7 +847,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); ops->init = nft_target_init; ops->destroy = nft_target_destroy; - ops->deactivate = nft_mt_tg_deactivate, ops->dump = nft_target_dump; ops->validate = nft_target_validate; ops->data = target; @@ -917,8 +910,6 @@ static void __exit nft_compat_module_exit(void) nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); - - WARN_ON_ONCE(refcount_read(&nft_compat_pending_destroy) != 1); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); -- cgit v1.2.3 From 068d9d1eba72423e99162aad3586727180715c2a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 11 Aug 2020 19:29:23 -0700 Subject: bpf: Fix XDP FD-based attach/detach logic around XDP_FLAGS_UPDATE_IF_NOEXIST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enforce XDP_FLAGS_UPDATE_IF_NOEXIST only if new BPF program to be attached is non-NULL (i.e., we are not detaching a BPF program). Fixes: d4baa9368a5e ("bpf, xdp: Extract common XDP program attachment logic") Reported-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Tested-by: Stanislav Fomichev Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200812022923.1217922-1-andriin@fb.com --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7df6c9617321..b5d1129d8310 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8913,10 +8913,6 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Active program does not match expected"); return -EEXIST; } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { - NL_SET_ERR_MSG(extack, "XDP program already attached"); - return -EBUSY; - } /* put effective new program into new_prog */ if (link) @@ -8927,6 +8923,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB ? XDP_MODE_DRV : XDP_MODE_SKB; + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); + return -EBUSY; + } if (!offload && dev_xdp_prog(dev, other_mode)) { NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); return -EEXIST; -- cgit v1.2.3 From 2404b73c3f1a5f15726c6ecd226b56f6f992767f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 10 Aug 2020 13:52:15 +0200 Subject: netfilter: avoid ipv6 -> nf_defrag_ipv6 module dependency nf_ct_frag6_gather is part of nf_defrag_ipv6.ko, not ipv6 core. The current use of the netfilter ipv6 stub indirections causes a module dependency between ipv6 and nf_defrag_ipv6. This prevents nf_defrag_ipv6 module from being removed because ipv6 can't be unloaded. Remove the indirection and always use a direct call. This creates a depency from nf_conntrack_bridge to nf_defrag_ipv6 instead: modinfo nf_conntrack depends: nf_conntrack,nf_defrag_ipv6,bridge .. and nf_conntrack already depends on nf_defrag_ipv6 anyway. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 18 ------------------ net/bridge/netfilter/nf_conntrack_bridge.c | 8 ++++++-- net/ipv6/netfilter.c | 3 --- 3 files changed, 6 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index aac42c28fe62..9b67394471e1 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -58,7 +58,6 @@ struct nf_ipv6_ops { int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); #if IS_MODULE(CONFIG_IPV6) - int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user); int (*br_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, @@ -117,23 +116,6 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, #include -static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb, - u32 user) -{ -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return 1; - - return v6_ops->br_defrag(net, skb, user); -#elif IS_BUILTIN(CONFIG_IPV6) - return nf_ct_frag6_gather(net, skb, user); -#else - return 1; -#endif -} - int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 809673222382..8d033a75a766 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -168,6 +168,7 @@ static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, const struct nf_hook_state *state) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) u16 zone_id = NF_CT_DEFAULT_ZONE_ID; enum ip_conntrack_info ctinfo; struct br_input_skb_cb cb; @@ -180,14 +181,17 @@ static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); - err = nf_ipv6_br_defrag(state->net, skb, - IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + err = nf_ct_frag6_gather(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); /* queued */ if (err == -EINPROGRESS) return NF_STOLEN; br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); return err == 0 ? NF_ACCEPT : NF_DROP; +#else + return NF_ACCEPT; +#endif } static int nf_ct_br_ip_check(const struct sk_buff *skb) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 409e79b84a83..6d0e942d082d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -245,9 +245,6 @@ static const struct nf_ipv6_ops ipv6ops = { .route_input = ip6_route_input, .fragment = ip6_fragment, .reroute = nf_ip6_reroute, -#if IS_MODULE(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - .br_defrag = nf_ct_frag6_gather, -#endif #if IS_MODULE(CONFIG_IPV6) .br_fragment = br_ip6_fragment, #endif -- cgit v1.2.3 From 59136aa3b2649796a3a1fd90158675f1f640ce0e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 11 Aug 2020 19:39:09 +0200 Subject: netfilter: nf_tables: free chain context when BINDING flag is missing syzbot found a memory leak in nf_tables_addchain() because the chain object is not free'd correctly on error. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: syzbot+c99868fde67014f7e9f5@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d878e34e3354..fd814e514f94 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2018,8 +2018,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nla[NFTA_CHAIN_NAME]) { chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL); } else { - if (!(flags & NFT_CHAIN_BINDING)) - return -EINVAL; + if (!(flags & NFT_CHAIN_BINDING)) { + err = -EINVAL; + goto err1; + } snprintf(name, sizeof(name), "__chain%llu", ++chain_id); chain->name = kstrdup(name, GFP_KERNEL); -- cgit v1.2.3 From fd09af010788a884de1c39537c288830c3d305db Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 11 Aug 2020 15:04:37 -0700 Subject: bpf: sock_ops ctx access may stomp registers in corner case I had a sockmap program that after doing some refactoring started spewing this splat at me: [18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 [...] [18610.807359] Call Trace: [18610.807370] ? 0xffffffffc114d0d5 [18610.807382] __cgroup_bpf_run_filter_sock_ops+0x7d/0xb0 [18610.807391] tcp_connect+0x895/0xd50 [18610.807400] tcp_v4_connect+0x465/0x4e0 [18610.807407] __inet_stream_connect+0xd6/0x3a0 [18610.807412] ? __inet_stream_connect+0x5/0x3a0 [18610.807417] inet_stream_connect+0x3b/0x60 [18610.807425] __sys_connect+0xed/0x120 After some debugging I was able to build this simple reproducer, __section("sockops/reproducer_bad") int bpf_reproducer_bad(struct bpf_sock_ops *skops) { volatile __maybe_unused __u32 i = skops->snd_ssthresh; return 0; } And along the way noticed that below program ran without splat, __section("sockops/reproducer_good") int bpf_reproducer_good(struct bpf_sock_ops *skops) { volatile __maybe_unused __u32 i = skops->snd_ssthresh; volatile __maybe_unused __u32 family; compiler_barrier(); family = skops->family; return 0; } So I decided to check out the code we generate for the above two programs and noticed each generates the BPF code you would expect, 0000000000000000 : ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: r1 = *(u32 *)(r1 + 96) 1: *(u32 *)(r10 - 4) = r1 ; return 0; 2: r0 = 0 3: exit 0000000000000000 : ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: r2 = *(u32 *)(r1 + 96) 1: *(u32 *)(r10 - 4) = r2 ; family = skops->family; 2: r1 = *(u32 *)(r1 + 20) 3: *(u32 *)(r10 - 8) = r1 ; return 0; 4: r0 = 0 5: exit So we get reasonable assembly, but still something was causing the null pointer dereference. So, we load the programs and dump the xlated version observing that line 0 above 'r* = *(u32 *)(r1 +96)' is going to be translated by the skops access helpers. int bpf_reproducer_bad(struct bpf_sock_ops * skops): ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: (61) r1 = *(u32 *)(r1 +28) 1: (15) if r1 == 0x0 goto pc+2 2: (79) r1 = *(u64 *)(r1 +0) 3: (61) r1 = *(u32 *)(r1 +2340) ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 4: (63) *(u32 *)(r10 -4) = r1 ; return 0; 5: (b7) r0 = 0 6: (95) exit int bpf_reproducer_good(struct bpf_sock_ops * skops): ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: (61) r2 = *(u32 *)(r1 +28) 1: (15) if r2 == 0x0 goto pc+2 2: (79) r2 = *(u64 *)(r1 +0) 3: (61) r2 = *(u32 *)(r2 +2340) ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 4: (63) *(u32 *)(r10 -4) = r2 ; family = skops->family; 5: (79) r1 = *(u64 *)(r1 +0) 6: (69) r1 = *(u16 *)(r1 +16) ; family = skops->family; 7: (63) *(u32 *)(r10 -8) = r1 ; return 0; 8: (b7) r0 = 0 9: (95) exit Then we look at lines 0 and 2 above. In the good case we do the zero check in r2 and then load 'r1 + 0' at line 2. Do a quick cross-check into the bpf_sock_ops check and we can confirm that is the 'struct sock *sk' pointer field. But, in the bad case, 0: (61) r1 = *(u32 *)(r1 +28) 1: (15) if r1 == 0x0 goto pc+2 2: (79) r1 = *(u64 *)(r1 +0) Oh no, we read 'r1 +28' into r1, this is skops->fullsock and then in line 2 we read the 'r1 +0' as a pointer. Now jumping back to our spat, [18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 The 0x01 makes sense because that is exactly the fullsock value. And its not a valid dereference so we splat. To fix we need to guard the case when a program is doing a sock_ops field access with src_reg == dst_reg. This is already handled in the load case where the ctx_access handler uses a tmp register being careful to store the old value and restore it. To fix the get case test if src_reg == dst_reg and in this case do the is_fullsock test in the temporary register. Remembering to restore the temporary register before writing to either dst_reg or src_reg to avoid smashing the pointer into the struct holding the tmp variable. Adding this inline code to test_tcpbpf_kern will now be generated correctly from, 9: r2 = *(u32 *)(r2 + 96) to xlated code, 12: (7b) *(u64 *)(r2 +32) = r9 13: (61) r9 = *(u32 *)(r2 +28) 14: (15) if r9 == 0x0 goto pc+4 15: (79) r9 = *(u64 *)(r2 +32) 16: (79) r2 = *(u64 *)(r2 +0) 17: (61) r2 = *(u32 *)(r2 +2348) 18: (05) goto pc+1 19: (79) r9 = *(u64 *)(r2 +32) And in the normal case we keep the original code, because really this is an edge case. From this, 9: r2 = *(u32 *)(r6 + 96) to xlated code, 22: (61) r2 = *(u32 *)(r6 +28) 23: (15) if r2 == 0x0 goto pc+2 24: (79) r2 = *(u64 *)(r6 +0) 25: (61) r2 = *(u32 *)(r2 +2348) So three additional instructions if dst == src register, but I scanned my current code base and did not see this pattern anywhere so should not be a big deal. Further, it seems no one else has hit this or at least reported it so it must a fairly rare pattern. Fixes: 9b1f3d6e5af29 ("bpf: Refactor sock_ops_convert_ctx_access") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159718347772.4728.2781381670567919577.stgit@john-Precision-5820-Tower --- net/core/filter.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 7124f0fe6974..1baeeff2fcd2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8317,15 +8317,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, /* Helper macro for adding read access to tcp_sock or sock fields. */ #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ do { \ + int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + fullsock_reg = reg; \ + jmp += 2; \ + } \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, \ is_fullsock), \ - si->dst_reg, si->src_reg, \ + fullsock_reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ + if (si->dst_reg == si->src_reg) \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, sk),\ si->dst_reg, si->src_reg, \ @@ -8334,6 +8350,12 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, OBJ_FIELD), \ si->dst_reg, si->dst_reg, \ offsetof(OBJ, OBJ_FIELD)); \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } \ } while (0) #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ -- cgit v1.2.3 From 84f44df664e9f0e261157e16ee1acd77cc1bb78d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 11 Aug 2020 15:04:56 -0700 Subject: bpf: sock_ops sk access may stomp registers when dst_reg = src_reg Similar to patch ("bpf: sock_ops ctx access may stomp registers") if the src_reg = dst_reg when reading the sk field of a sock_ops struct we generate xlated code, 53: (61) r9 = *(u32 *)(r9 +28) 54: (15) if r9 == 0x0 goto pc+3 56: (79) r9 = *(u64 *)(r9 +0) This stomps on the r9 reg to do the sk_fullsock check and then when reading the skops->sk field instead of the sk pointer we get the sk_fullsock. To fix use similar pattern noted in the previous fix and use the temp field to save/restore a register used to do sk_fullsock check. After the fix the generated xlated code reads, 52: (7b) *(u64 *)(r9 +32) = r8 53: (61) r8 = *(u32 *)(r9 +28) 54: (15) if r9 == 0x0 goto pc+3 55: (79) r8 = *(u64 *)(r9 +32) 56: (79) r9 = *(u64 *)(r9 +0) 57: (05) goto pc+1 58: (79) r8 = *(u64 *)(r9 +32) Here r9 register was in-use so r8 is chosen as the temporary register. In line 52 r8 is saved in temp variable and at line 54 restored in case fullsock != 0. Finally we handle fullsock == 0 case by restoring at line 58. This adds a new macro SOCK_OPS_GET_SK it is almost possible to merge this with SOCK_OPS_GET_FIELD, but I found the extra branch logic a bit more confusing than just adding a new macro despite a bit of duplicating code. Fixes: 1314ef561102e ("bpf: export bpf_sock for BPF_PROG_TYPE_SOCK_OPS prog type") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159718349653.4728.6559437186853473612.stgit@john-Precision-5820-Tower --- net/core/filter.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 1baeeff2fcd2..b2df52086445 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8358,6 +8358,43 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, } \ } while (0) +#define SOCK_OPS_GET_SK() \ + do { \ + int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + fullsock_reg = reg; \ + jmp += 2; \ + } \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + fullsock_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ + if (si->dst_reg == si->src_reg) \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } \ + } while (0) + #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) @@ -8642,17 +8679,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked); break; case offsetof(struct bpf_sock_ops, sk): - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( - struct bpf_sock_ops_kern, - is_fullsock), - si->dst_reg, si->src_reg, - offsetof(struct bpf_sock_ops_kern, - is_fullsock)); - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( - struct bpf_sock_ops_kern, sk), - si->dst_reg, si->src_reg, - offsetof(struct bpf_sock_ops_kern, sk)); + SOCK_OPS_GET_SK(); break; } return insn - insn_buf; -- cgit v1.2.3 From 5c04da55c754c44937b3d19c6522f9023fd5c5d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Aug 2020 09:46:11 +0200 Subject: netfilter: ebtables: reject bogus getopt len value syzkaller reports splat: ------------[ cut here ]------------ Buffer overflow detected (80 < 137)! Call Trace: do_ebt_get_ctl+0x2b4/0x790 net/bridge/netfilter/ebtables.c:2317 nf_getsockopt+0x72/0xd0 net/netfilter/nf_sockopt.c:116 ip_getsockopt net/ipv4/ip_sockglue.c:1778 [inline] caused by a copy-to-user with a too-large "*len" value. This adds a argument check on *len just like in the non-compat version of the handler. Before the "Fixes" commit, the reproducer fails with -EINVAL as expected: 1. core calls the "compat" getsockopt version 2. compat getsockopt version detects the *len value is possibly in 64-bit layout (*len != compat_len) 3. compat getsockopt version delegates everything to native getsockopt version 4. native getsockopt rejects invalid *len -> compat handler only sees len == sizeof(compat_struct) for GET_ENTRIES. After the refactor, event sequence is: 1. getsockopt calls "compat" version (len != native_len) 2. compat version attempts to copy *len bytes, where *len is random value from userspace Fixes: fc66de8e16ec ("netfilter/ebtables: clean up compat {get, set}sockopt handling") Reported-by: syzbot+5accb5c62faa1d346480@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Christoph Hellwig Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 1641f414d1ba..ebe33b60efd6 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2238,6 +2238,10 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, struct ebt_table *t; struct net *net = sock_net(sk); + if ((cmd == EBT_SO_GET_INFO || cmd == EBT_SO_GET_INIT_INFO) && + *len != sizeof(struct compat_ebt_replace)) + return -EINVAL; + if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; -- cgit v1.2.3 From 38ba8b9241f5848a49b80fddac9ab5f4692e434e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Aug 2020 09:18:34 -0700 Subject: can: j1939: fix kernel-infoleak in j1939_sk_sock2sockaddr_can() syzbot found that at least 2 bytes of kernel information were leaked during getsockname() on AF_CAN CAN_J1939 socket. Since struct sockaddr_can has in fact two holes, simply clear the whole area before filling it with useful data. BUG: KMSAN: kernel-infoleak in kmsan_copy_to_user+0x81/0x90 mm/kmsan/kmsan_hooks.c:253 CPU: 0 PID: 8466 Comm: syz-executor511 Not tainted 5.8.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121 kmsan_internal_check_memory+0x238/0x3d0 mm/kmsan/kmsan.c:423 kmsan_copy_to_user+0x81/0x90 mm/kmsan/kmsan_hooks.c:253 instrument_copy_to_user include/linux/instrumented.h:91 [inline] _copy_to_user+0x18e/0x260 lib/usercopy.c:39 copy_to_user include/linux/uaccess.h:186 [inline] move_addr_to_user+0x3de/0x670 net/socket.c:237 __sys_getsockname+0x407/0x5e0 net/socket.c:1909 __do_sys_getsockname net/socket.c:1920 [inline] __se_sys_getsockname+0x91/0xb0 net/socket.c:1917 __x64_sys_getsockname+0x4a/0x70 net/socket.c:1917 do_syscall_64+0xad/0x160 arch/x86/entry/common.c:386 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x440219 Code: Bad RIP value. RSP: 002b:00007ffe5ee150c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000033 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440219 RDX: 0000000020000240 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401a20 R13: 0000000000401ab0 R14: 0000000000000000 R15: 0000000000000000 Local variable ----address@__sys_getsockname created at: __sys_getsockname+0x91/0x5e0 net/socket.c:1894 __sys_getsockname+0x91/0x5e0 net/socket.c:1894 Bytes 2-3 of 24 are uninitialized Memory access of size 24 starts at ffff8880ba2c7de8 Data copied to user address 0000000020000100 Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Robin van der Gracht Cc: Oleksij Rempel Cc: Pengutronix Kernel Team Cc: linux-can@vger.kernel.org Acked-by: Oleksij Rempel Link: https://lore.kernel.org/r/20200813161834.4021638-1-edumazet@google.com Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 78ff9b3f1d40..b634b680177f 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -553,6 +553,11 @@ static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr, static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr, const struct j1939_sock *jsk, int peer) { + /* There are two holes (2 bytes and 3 bytes) to clear to avoid + * leaking kernel information to user space. + */ + memset(addr, 0, J1939_MIN_NAMELEN); + addr->can_family = AF_CAN; addr->can_ifindex = jsk->ifindex; addr->can_addr.j1939.pgn = jsk->addr.pgn; -- cgit v1.2.3 From b43e3a82bc432c1caaed8950e7662c143470c54c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Aug 2020 12:51:56 +0200 Subject: can: j1939: transport: j1939_simple_recv(): ignore local J1939 messages send not by J1939 stack In current J1939 stack implementation, we process all locally send messages as own messages. Even if it was send by CAN_RAW socket. To reproduce it use following commands: testj1939 -P -r can0:0x80 & cansend can0 18238040#0123 This step will trigger false positive not critical warning: j1939_simple_recv: Received already invalidated message With this patch we add additional check to make sure, related skb is own echo message. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20200807105200.26441-2-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 1 + net/can/j1939/transport.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b634b680177f..ad973370de12 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -398,6 +398,7 @@ static int j1939_sk_init(struct sock *sk) spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); sk->sk_destruct = j1939_sk_sock_destruct; + sk->sk_protocol = CAN_J1939; return 0; } diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 9f99af5b0b11..b135c5e2a86e 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2017,6 +2017,10 @@ void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb) if (!skb->sk) return; + if (skb->sk->sk_family != AF_CAN || + skb->sk->sk_protocol != CAN_J1939) + return; + j1939_session_list_lock(priv); session = j1939_session_get_simple(priv, skb); j1939_session_list_unlock(priv); -- cgit v1.2.3 From cd3b3636c99fcac52c598b64061f3fe4413c6a12 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Aug 2020 12:51:57 +0200 Subject: can: j1939: transport: j1939_session_tx_dat(): fix use-after-free read in j1939_tp_txtimer() The current stack implementation do not support ECTS requests of not aligned TP sized blocks. If ECTS will request a block with size and offset spanning two TP blocks, this will cause memcpy() to read beyond the queued skb (which does only contain one TP sized block). Sometimes KASAN will detect this read if the memory region beyond the skb was previously allocated and freed. In other situations it will stay undetected. The ETP transfer in any case will be corrupted. This patch adds a sanity check to avoid this kind of read and abort the session with error J1939_XTP_ABORT_ECTS_TOO_BIG. Reported-by: syzbot+5322482fe520b02aea30@syzkaller.appspotmail.com Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: linux-stable # >= v5.4 Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20200807105200.26441-3-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index b135c5e2a86e..30957c9a8eb7 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -787,6 +787,18 @@ static int j1939_session_tx_dat(struct j1939_session *session) if (len > 7) len = 7; + if (offset + len > se_skb->len) { + netdev_err_once(priv->ndev, + "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", + __func__, session, skcb->offset, se_skb->len , session->pkt.tx); + return -EOVERFLOW; + } + + if (!len) { + ret = -ENOBUFS; + break; + } + memcpy(&dat[1], &tpdat[offset], len); ret = j1939_tp_tx_dat(session, dat, len + 1); if (ret < 0) { @@ -1120,6 +1132,9 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) * cleanup including propagation of the error to user space. */ break; + case -EOVERFLOW: + j1939_session_cancel(session, J1939_XTP_ABORT_ECTS_TOO_BIG); + break; case 0: session->tx_retry = 0; break; -- cgit v1.2.3 From af804b7826350d5af728dca4715e473338fbd7e5 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Aug 2020 12:51:58 +0200 Subject: can: j1939: socket: j1939_sk_bind(): make sure ml_priv is allocated This patch adds check to ensure that the struct net_device::ml_priv is allocated, as it is used later by the j1939 stack. The allocation is done by all mainline CAN network drivers, but when using bond or team devices this is not the case. Bail out if no ml_priv is allocated. Reported-by: syzbot+f03d384f3455d28833eb@syzkaller.appspotmail.com Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: linux-stable # >= v5.4 Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20200807105200.26441-4-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index ad973370de12..b93876c57fc4 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -467,6 +467,14 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) goto out_release_sock; } + if (!ndev->ml_priv) { + netdev_warn_once(ndev, + "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n"); + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + priv = j1939_netdev_start(ndev); dev_put(ndev); if (IS_ERR(priv)) { -- cgit v1.2.3 From 840835c9281215341d84966a8855f267a971e6a3 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Aug 2020 12:51:59 +0200 Subject: can: j1939: transport: add j1939_session_skb_find_by_offset() function Sometimes it makes no sense to search the skb by pkt.dpo, since we need next the skb within the transaction block. This may happen if we have an ETP session with CTS set to less than 255 packets. After this patch, we will be able to work with ETP sessions where the block size (ETP.CM_CTS byte 2) is less than 255 packets. Reported-by: Henrique Figueira Reported-by: https://github.com/linux-can/can-utils/issues/228 Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20200807105200.26441-5-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 30957c9a8eb7..90a2baac8a4a 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -352,17 +352,16 @@ void j1939_session_skb_queue(struct j1939_session *session, skb_queue_tail(&session->skb_queue, skb); } -static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) +static struct +sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session, + unsigned int offset_start) { struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *do_skcb; struct sk_buff *skb = NULL; struct sk_buff *do_skb; - struct j1939_sk_buff_cb *do_skcb; - unsigned int offset_start; unsigned long flags; - offset_start = session->pkt.dpo * 7; - spin_lock_irqsave(&session->skb_queue.lock, flags); skb_queue_walk(&session->skb_queue, do_skb) { do_skcb = j1939_skb_to_cb(do_skb); @@ -382,6 +381,14 @@ static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) return skb; } +static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) +{ + unsigned int offset_start; + + offset_start = session->pkt.dpo * 7; + return j1939_session_skb_find_by_offset(session, offset_start); +} + /* see if we are receiver * returns 0 for broadcasts, although we will receive them */ @@ -766,7 +773,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) int ret = 0; u8 dat[8]; - se_skb = j1939_session_skb_find(session); + se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7); if (!se_skb) return -ENOBUFS; @@ -1765,7 +1772,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, __func__, session); goto out_session_cancel; } - se_skb = j1939_session_skb_find(session); + + se_skb = j1939_session_skb_find_by_offset(session, packet * 7); if (!se_skb) { netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, session); -- cgit v1.2.3 From e052d0540298bfe0f6cbbecdc7e2ea9b859575b2 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Aug 2020 12:52:00 +0200 Subject: can: j1939: transport: j1939_xtp_rx_dat_one(): compare own packets to detect corruptions Since the stack relays on receiving own packets, it was overwriting own transmit buffer from received packets. At least theoretically, the received echo buffer can be corrupt or changed and the session partner can request to resend previous data. In this case we will re-send bad data. With this patch we will stop to overwrite own TX buffer and use it for sanity checking. Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20200807105200.26441-6-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 90a2baac8a4a..5cf107cb447c 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1792,7 +1792,20 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, } tpdat = se_skb->data; - memcpy(&tpdat[offset], &dat[1], nbytes); + if (!session->transmission) { + memcpy(&tpdat[offset], &dat[1], nbytes); + } else { + int err; + + err = memcmp(&tpdat[offset], &dat[1], nbytes); + if (err) + netdev_err_once(priv->ndev, + "%s: 0x%p: Data of RX-looped back packet (%*ph) doesn't match TX data (%*ph)!\n", + __func__, session, + nbytes, &dat[1], + nbytes, &tpdat[offset]); + } + if (packet == session->pkt.rx) session->pkt.rx++; -- cgit v1.2.3 From 35759383133f64d90eba120a0d3efe8f71241650 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 14 Aug 2020 15:56:34 +0200 Subject: mptcp: sendmsg: reset iter on error Once we've copied data from the iterator we need to revert in case we end up not sending any data. This bug doesn't trigger with normal 'poll' based tests, because we only feed a small chunk of data to kernel after poll indicated POLLOUT. With blocking IO and large writes this triggers. Receiver ends up with less data than it should get. Fixes: 72511aab95c94d ("mptcp: avoid blocking in tcp_sendpages") Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8c1d1a595701..c84b4051c2a4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -725,8 +725,10 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (!psize) return -EINVAL; - if (!sk_wmem_schedule(sk, psize + dfrag->overhead)) + if (!sk_wmem_schedule(sk, psize + dfrag->overhead)) { + iov_iter_revert(&msg->msg_iter, psize); return -ENOMEM; + } } else { offset = dfrag->offset; psize = min_t(size_t, dfrag->data_len, avail_size); @@ -737,8 +739,10 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, */ ret = do_tcp_sendpages(ssk, page, offset, psize, msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT); - if (ret <= 0) + if (ret <= 0) { + iov_iter_revert(&msg->msg_iter, psize); return ret; + } frag_truesize += ret; if (!retransmission) { -- cgit v1.2.3 From f4fd77fd87e9b214c26bb2ebd4f90055eaea5ade Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 5 Aug 2020 11:50:22 +0800 Subject: can: j1939: fix support for multipacket broadcast message Currently j1939_tp_im_involved_anydir() in j1939_tp_recv() check the previously set flags J1939_ECU_LOCAL_DST and J1939_ECU_LOCAL_SRC of incoming skb, thus multipacket broadcast message was aborted by receive side because it may come from remote ECUs and have no exact dst address. Similarly, j1939_tp_cmd_recv() and j1939_xtp_rx_dat() didn't process broadcast message. So fix it by checking and process broadcast message in j1939_tp_recv(), j1939_tp_cmd_recv() and j1939_xtp_rx_dat(). Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1596599425-5534-2-git-send-email-zhangchangzhong@huawei.com Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 5cf107cb447c..868ecb2bfa45 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1673,8 +1673,12 @@ static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, return; } session = j1939_xtp_rx_rts_session_new(priv, skb); - if (!session) + if (!session) { + if (cmd == J1939_TP_CMD_BAM && j1939_sk_recv_match(priv, skcb)) + netdev_info(priv->ndev, "%s: failed to create TP BAM session\n", + __func__); return; + } } else { if (j1939_xtp_rx_rts_session_active(session, skb)) { j1939_session_put(session); @@ -1865,6 +1869,13 @@ static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb) else j1939_xtp_rx_dat_one(session, skb); } + + if (j1939_cb_is_broadcast(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + false); + if (session) + j1939_xtp_rx_dat_one(session, skb); + } } /* j1939 main intf */ @@ -1956,7 +1967,7 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_rts(priv, skb, true); - if (j1939_tp_im_receiver(skcb)) + if (j1939_tp_im_receiver(skcb) || j1939_cb_is_broadcast(skcb)) j1939_xtp_rx_rts(priv, skb, false); break; @@ -2020,7 +2031,7 @@ int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); - if (!j1939_tp_im_involved_anydir(skcb)) + if (!j1939_tp_im_involved_anydir(skcb) && !j1939_cb_is_broadcast(skcb)) return 0; switch (skcb->addr.pgn) { -- cgit v1.2.3 From e8b17653088f28a87c81845fa41a2d295a3b458c Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 5 Aug 2020 11:50:23 +0800 Subject: can: j1939: cancel rxtimer on multipacket broadcast session complete If j1939_xtp_rx_dat_one() receive last frame of multipacket broadcast message, j1939_session_timers_cancel() should be called to cancel rxtimer. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1596599425-5534-3-git-send-email-zhangchangzhong@huawei.com Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 868ecb2bfa45..2f3c3afd5071 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1824,6 +1824,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, } if (final) { + j1939_session_timers_cancel(session); j1939_session_completed(session); } else if (do_cts_eoma) { j1939_tp_set_rxtimeout(session, 1250); -- cgit v1.2.3 From 2b8b2e31555cf55ba3680fb28e2b382e168d7ea1 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 5 Aug 2020 11:50:24 +0800 Subject: can: j1939: abort multipacket broadcast session when timeout occurs If timeout occurs, j1939_tp_rxtimer() first calls hrtimer_start() to restart rxtimer, and then calls __j1939_session_cancel() to set session->state = J1939_SESSION_WAITING_ABORT. At next timeout expiration, because of the J1939_SESSION_WAITING_ABORT session state j1939_tp_rxtimer() will call j1939_session_deactivate_activate_next() to deactivate current session, and rxtimer won't be set. But for multipacket broadcast session, __j1939_session_cancel() don't set session->state = J1939_SESSION_WAITING_ABORT, thus current session won't be deactivate and hrtimer_start() is called to start new rxtimer again and again. So fix it by moving session->state = J1939_SESSION_WAITING_ABORT out of if (!j1939_cb_is_broadcast(&session->skcb)) statement. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1596599425-5534-4-git-send-email-zhangchangzhong@huawei.com Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 2f3c3afd5071..047118d5270b 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1074,9 +1074,9 @@ static void __j1939_session_cancel(struct j1939_session *session, lockdep_assert_held(&session->priv->active_session_list_lock); session->err = j1939_xtp_abort_to_errno(priv, err); + session->state = J1939_SESSION_WAITING_ABORT; /* do not send aborts on incoming broadcasts */ if (!j1939_cb_is_broadcast(&session->skcb)) { - session->state = J1939_SESSION_WAITING_ABORT; j1939_xtp_tx_abort(priv, &session->skcb, !session->transmission, err, session->skcb.addr.pgn); -- cgit v1.2.3 From 0ae18a82686f9b9965a8ce0dd81371871b306ffe Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 5 Aug 2020 11:50:25 +0800 Subject: can: j1939: add rxtimer for multipacket broadcast session According to SAE J1939/21 (Chapter 5.12.3 and APPENDIX C), for transmit side the required time interval between packets of a multipacket broadcast message is 50 to 200 ms, the responder shall use a timeout of 250ms (provides margin allowing for the maximumm spacing of 200ms). For receive side a timeout will occur when a time of greater than 750 ms elapsed between two message packets when more packets were expected. So this patch fix and add rxtimer for multipacket broadcast session. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1596599425-5534-5-git-send-email-zhangchangzhong@huawei.com Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 047118d5270b..a8dd956b5e8e 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -723,10 +723,12 @@ static int j1939_session_tx_rts(struct j1939_session *session) return ret; session->last_txcmd = dat[0]; - if (dat[0] == J1939_TP_CMD_BAM) + if (dat[0] == J1939_TP_CMD_BAM) { j1939_tp_schedule_txtimer(session, 50); - - j1939_tp_set_rxtimeout(session, 1250); + j1939_tp_set_rxtimeout(session, 250); + } else { + j1939_tp_set_rxtimeout(session, 1250); + } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); @@ -1687,11 +1689,15 @@ static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, } session->last_cmd = cmd; - j1939_tp_set_rxtimeout(session, 1250); - - if (cmd != J1939_TP_CMD_BAM && !session->transmission) { - j1939_session_txtimer_cancel(session); - j1939_tp_schedule_txtimer(session, 0); + if (cmd == J1939_TP_CMD_BAM) { + if (!session->transmission) + j1939_tp_set_rxtimeout(session, 750); + } else { + if (!session->transmission) { + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + j1939_tp_set_rxtimeout(session, 1250); } j1939_session_put(session); @@ -1742,6 +1748,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, int offset; int nbytes; bool final = false; + bool remain = false; bool do_cts_eoma = false; int packet; @@ -1817,6 +1824,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, j1939_cb_is_broadcast(&session->skcb)) { if (session->pkt.rx >= session->pkt.total) final = true; + else + remain = true; } else { /* never final, an EOMA must follow */ if (session->pkt.rx >= session->pkt.last) @@ -1826,6 +1835,9 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (final) { j1939_session_timers_cancel(session); j1939_session_completed(session); + } else if (remain) { + if (!session->transmission) + j1939_tp_set_rxtimeout(session, 750); } else if (do_cts_eoma) { j1939_tp_set_rxtimeout(session, 1250); if (!session->transmission) -- cgit v1.2.3 From f8414a8d886b613b90d9fdf7cda6feea313b1069 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 15 Aug 2020 09:29:30 +0200 Subject: net: xdp: pull ethernet header off packet after computing skb->protocol When an XDP program changes the ethernet header protocol field, eth_type_trans is used to recalculate skb->protocol. In order for eth_type_trans to work correctly, the ethernet header must actually be part of the skb data segment, so the code first pushes that onto the head of the skb. However, it subsequently forgets to pull it back off, making the behavior of the passed-on packet inconsistent between the protocol modifying case and the static protocol case. This patch fixes the issue by simply pulling the ethernet header back off of the skb head. Fixes: 297249569932 ("net: fix generic XDP to handle if eth header was mangled") Cc: Jesper Dangaard Brouer Cc: David S. Miller Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b5d1129d8310..7592d7dd6109 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4676,6 +4676,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) { __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); + __skb_pull(skb, ETH_HLEN); } switch (act) { -- cgit v1.2.3 From 55eff0eb7460c3d50716ed9eccf22257b046ca92 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 15 Aug 2020 04:44:31 -0400 Subject: net: Fix potential wrong skb->protocol in skb_vlan_untag() We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). So we should pull VLAN_HLEN + sizeof(unsigned short) in skb_vlan_untag() or we may access the wrong data. Fixes: 0d5501c1c828 ("net: Always untag vlan-tagged traffic on input.") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7e2e502ef519..5c3b906aeef3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5418,8 +5418,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb) skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto err_free; - - if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */ + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short)))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; -- cgit v1.2.3 From 47733f9daf4fe4f7e0eb9e273f21ad3a19130487 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 15 Aug 2020 16:29:15 -0700 Subject: tipc: fix uninit skb->data in tipc_nl_compat_dumpit() __tipc_nl_compat_dumpit() has two callers, and it expects them to pass a valid nlmsghdr via arg->data. This header is artificial and crafted just for __tipc_nl_compat_dumpit(). tipc_nl_compat_publ_dump() does so by putting a genlmsghdr as well as some nested attribute, TIPC_NLA_SOCK. But the other caller tipc_nl_compat_dumpit() does not, this leaves arg->data uninitialized on this call path. Fix this by just adding a similar nlmsghdr without any payload in tipc_nl_compat_dumpit(). This bug exists since day 1, but the recent commit 6ea67769ff33 ("net: tipc: prepare attrs in __tipc_nl_compat_dumpit()") makes it easier to appear. Reported-and-tested-by: syzbot+0e7181deafa7e0b79923@syzkaller.appspotmail.com Fixes: d0796d1ef63d ("tipc: convert legacy nl bearer dump to nl compat") Cc: Jon Maloy Cc: Ying Xue Cc: Richard Alpe Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 217516357ef2..90e3c70a91ad 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -275,8 +275,9 @@ err_out: static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg) { - int err; + struct nlmsghdr *nlh; struct sk_buff *arg; + int err; if (msg->req_type && (!msg->req_size || !TLV_CHECK_TYPE(msg->req, msg->req_type))) @@ -305,6 +306,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; } + nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI); + if (!nlh) { + kfree_skb(arg); + kfree_skb(msg->rep); + msg->rep = NULL; + return -EMSGSIZE; + } + nlmsg_end(arg, nlh); + err = __tipc_nl_compat_dumpit(cmd, msg, arg); if (err) { kfree_skb(msg->rep); -- cgit v1.2.3 From c530189905efe91b6a464db4ec1b56b4c069609f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 16 Aug 2020 17:32:03 +0800 Subject: tipc: not enable tipc when ipv6 works as a module When using ipv6_dev_find() in one module, it requires ipv6 not to work as a module. Otherwise, this error occurs in build: undefined reference to `ipv6_dev_find'. So fix it by adding "depends on IPV6 || IPV6=n" to tipc/Kconfig, as it does in sctp/Kconfig. Fixes: 5a6f6f579178 ("tipc: set ub->ifindex for local ipv6 address") Reported-by: kernel test robot Acked-by: Randy Dunlap Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 9dd780215eef..be1c4003d67d 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -6,6 +6,7 @@ menuconfig TIPC tristate "The TIPC Protocol" depends on INET + depends on IPV6 || IPV6=n help The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol -- cgit v1.2.3 From bd71ea60673180eccf17b1a1dda3504a04783789 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 16 Aug 2020 21:26:38 +0200 Subject: net: devlink: Remove overzealous WARN_ON with snapshots It is possible to trigger this WARN_ON from user space by triggering a devlink snapshot with an ID which already exists. We don't need both -EEXISTS being reported and spamming the kernel log. Signed-off-by: Andrew Lunn Tested-by: Chris Healy Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index e674f0f46dc2..e5feb87beca7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4063,7 +4063,7 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) { lockdep_assert_held(&devlink->lock); - if (WARN_ON(xa_load(&devlink->snapshot_ids, id))) + if (xa_load(&devlink->snapshot_ids, id)) return -EEXIST; return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), -- cgit v1.2.3 From b3b2854dcf704c1db05d897072f98e8b79398af1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 16 Aug 2020 23:14:20 +0200 Subject: mptcp: sendmsg: reset iter on error redux This fix wasn't correct: When this function is invoked from the retransmission worker, the iterator contains garbage and resetting it causes a crash. As the work queue should not be performance critical also zero the msghdr struct. Fixes: 35759383133f64d "(mptcp: sendmsg: reset iter on error)" Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c84b4051c2a4..1aad411a0e46 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -740,7 +740,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, ret = do_tcp_sendpages(ssk, page, offset, psize, msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT); if (ret <= 0) { - iov_iter_revert(&msg->msg_iter, psize); + if (!retransmission) + iov_iter_revert(&msg->msg_iter, psize); return ret; } @@ -1392,7 +1393,9 @@ static void mptcp_worker(struct work_struct *work) struct mptcp_data_frag *dfrag; u64 orig_write_seq; size_t copied = 0; - struct msghdr msg; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT, + }; long timeo = 0; lock_sock(sk); @@ -1425,7 +1428,6 @@ static void mptcp_worker(struct work_struct *work) lock_sock(ssk); - msg.msg_flags = MSG_DONTWAIT; orig_len = dfrag->data_len; orig_offset = dfrag->offset; orig_write_seq = dfrag->data_seq; -- cgit v1.2.3 From 7f9bf6e82461b97ce43a912cb4a959c5a41367ac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 17 Aug 2020 11:48:05 -0700 Subject: Revert "net: xdp: pull ethernet header off packet after computing skb->protocol" This reverts commit f8414a8d886b613b90d9fdf7cda6feea313b1069. eth_type_trans() does the necessary pull on the skb. Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7592d7dd6109..b5d1129d8310 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4676,7 +4676,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) { __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); - __skb_pull(skb, ETH_HLEN); } switch (act) { -- cgit v1.2.3 From 8dfddfb79653df7c38a9c8c4c034f242a36acee9 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Mon, 17 Aug 2020 15:54:48 +0000 Subject: net: qrtr: fix usage of idr in port assignment to socket Passing large uint32 sockaddr_qrtr.port numbers for port allocation triggers a warning within idr_alloc() since the port number is cast to int, and thus interpreted as a negative number. This leads to the rejection of such valid port numbers in qrtr_port_assign() as idr_alloc() fails. To avoid the problem, switch to idr_alloc_u32() instead. Fixes: bdabad3e363d ("net: Add Qualcomm IPC router") Reported-by: syzbot+f31428628ef672716ea8@syzkaller.appspotmail.com Signed-off-by: Necip Fazil Yildiran Reviewed-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index b4c0db0b7d31..90c558f89d46 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -692,23 +692,25 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { + u32 min_port; int rc; mutex_lock(&qrtr_port_lock); if (!*port) { - rc = idr_alloc(&qrtr_ports, ipc, - QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1, - GFP_ATOMIC); - if (rc >= 0) - *port = rc; + min_port = QRTR_MIN_EPH_SOCKET; + rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC); + if (!rc) + *port = min_port; } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { - rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC); + min_port = 0; + rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC); } else { - rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC); - if (rc >= 0) - *port = rc; + min_port = *port; + rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC); + if (!rc) + *port = min_port; } mutex_unlock(&qrtr_port_lock); -- cgit v1.2.3