From b428336676dbca363262cc134b6218205df4f530 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Tue, 4 Aug 2020 17:44:09 -0400
Subject: netfilter: nf_tables: nft_exthdr: the presence return value should be
 little-endian

On big-endian machine, the returned register data when the exthdr is
present is not being compared correctly because little-endian is
assumed. The function nft_cmp_fast_mask(), called by nft_cmp_fast_eval()
and nft_cmp_fast_init(), calls cpu_to_le32().

The following dump also shows that little endian is assumed:

$ nft --debug=netlink add rule ip recordroute forward ip option rr exists counter
ip
  [ exthdr load ipv4 1b @ 7 + 0 present => reg 1 ]
  [ cmp eq reg 1 0x01000000 ]
  [ counter pkts 0 bytes 0 ]

Lastly, debug print in nft_cmp_fast_init() and nft_cmp_fast_eval() when
RR option exists in the packet shows that the comparison fails because
the assumption:

nft_cmp_fast_init:189 priv->sreg=4 desc.len=8 mask=0xff000000 data.data[0]=0x10003e0
nft_cmp_fast_eval:57 regs->data[priv->sreg=4]=0x1 mask=0xff000000 priv->data=0x1000000

v2: use nft_reg_store8() instead (Florian Westphal). Also to avoid the
    warnings reported by kernel test robot.

Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options")
Fixes: c078ca3b0c5b ("netfilter: nft_exthdr: Add support for existence check")
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_exthdr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 07782836fad6..3c48cdc8935d 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -44,7 +44,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
 
 	err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
 	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
-		*dest = (err >= 0);
+		nft_reg_store8(dest, err >= 0);
 		return;
 	} else if (err < 0) {
 		goto err;
@@ -141,7 +141,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
 
 	err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
 	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
-		*dest = (err >= 0);
+		nft_reg_store8(dest, err >= 0);
 		return;
 	} else if (err < 0) {
 		goto err;
-- 
cgit v1.2.3


From 2f941622fd88328ca75806c45c9e9709286a0609 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 9 Aug 2020 20:28:01 +0200
Subject: netfilter: nft_compat: remove flush counter optimization

WARNING: CPU: 1 PID: 16059 at lib/refcount.c:31 refcount_warn_saturate+0xdf/0xf
[..]
 __nft_mt_tg_destroy+0x42/0x50 [nft_compat]
 nft_target_destroy+0x63/0x80 [nft_compat]
 nf_tables_expr_destroy+0x1b/0x30 [nf_tables]
 nf_tables_rule_destroy+0x3a/0x70 [nf_tables]
 nf_tables_exit_net+0x186/0x3d0 [nf_tables]

Happens when a compat expr is destoyed from abort path.
There is no functional impact; after this work queue is flushed
unconditionally if its pending.

This removes the waitcount optimization.  Test of repeated
iptables-restore of a ~60k kubernetes ruleset doesn't indicate
a slowdown.  In case the counter is needed after all for some workloads
we can revert this and increment the refcount for the
!= NFT_PREPARE_TRANS case to avoid the increment/decrement imbalance.

While at it, also flush for match case, this was an oversight
in the original patch.

Fixes: ffe8923f109b7e ("netfilter: nft_compat: make sure xtables destructors have run")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 6428856ccbec..8e56f353ff35 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -27,8 +27,6 @@ struct nft_xt_match_priv {
 	void *info;
 };
 
-static refcount_t nft_compat_pending_destroy = REFCOUNT_INIT(1);
-
 static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
 						const char *tablename)
 {
@@ -215,6 +213,17 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
 	return 0;
 }
 
+static void nft_compat_wait_for_destructors(void)
+{
+	/* xtables matches or targets can have side effects, e.g.
+	 * creation/destruction of /proc files.
+	 * The xt ->destroy functions are run asynchronously from
+	 * work queue.  If we have pending invocations we thus
+	 * need to wait for those to finish.
+	 */
+	nf_tables_trans_destroy_flush_work();
+}
+
 static int
 nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		const struct nlattr * const tb[])
@@ -238,14 +247,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 
 	nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
 
-	/* xtables matches or targets can have side effects, e.g.
-	 * creation/destruction of /proc files.
-	 * The xt ->destroy functions are run asynchronously from
-	 * work queue.  If we have pending invocations we thus
-	 * need to wait for those to finish.
-	 */
-	if (refcount_read(&nft_compat_pending_destroy) > 1)
-		nf_tables_trans_destroy_flush_work();
+	nft_compat_wait_for_destructors();
 
 	ret = xt_check_target(&par, size, proto, inv);
 	if (ret < 0)
@@ -260,7 +262,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 
 static void __nft_mt_tg_destroy(struct module *me, const struct nft_expr *expr)
 {
-	refcount_dec(&nft_compat_pending_destroy);
 	module_put(me);
 	kfree(expr->ops);
 }
@@ -468,6 +469,8 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 
 	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
 
+	nft_compat_wait_for_destructors();
+
 	return xt_check_match(&par, size, proto, inv);
 }
 
@@ -716,14 +719,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
 
 static struct nft_expr_type nft_match_type;
 
-static void nft_mt_tg_deactivate(const struct nft_ctx *ctx,
-				 const struct nft_expr *expr,
-				 enum nft_trans_phase phase)
-{
-	if (phase == NFT_TRANS_COMMIT)
-		refcount_inc(&nft_compat_pending_destroy);
-}
-
 static const struct nft_expr_ops *
 nft_match_select_ops(const struct nft_ctx *ctx,
 		     const struct nlattr * const tb[])
@@ -762,7 +757,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	ops->type = &nft_match_type;
 	ops->eval = nft_match_eval;
 	ops->init = nft_match_init;
-	ops->deactivate = nft_mt_tg_deactivate,
 	ops->destroy = nft_match_destroy;
 	ops->dump = nft_match_dump;
 	ops->validate = nft_match_validate;
@@ -853,7 +847,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
 	ops->init = nft_target_init;
 	ops->destroy = nft_target_destroy;
-	ops->deactivate = nft_mt_tg_deactivate,
 	ops->dump = nft_target_dump;
 	ops->validate = nft_target_validate;
 	ops->data = target;
@@ -917,8 +910,6 @@ static void __exit nft_compat_module_exit(void)
 	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
 	nft_unregister_expr(&nft_target_type);
 	nft_unregister_expr(&nft_match_type);
-
-	WARN_ON_ONCE(refcount_read(&nft_compat_pending_destroy) != 1);
 }
 
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
-- 
cgit v1.2.3


From 068d9d1eba72423e99162aad3586727180715c2a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 11 Aug 2020 19:29:23 -0700
Subject: bpf: Fix XDP FD-based attach/detach logic around
 XDP_FLAGS_UPDATE_IF_NOEXIST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enforce XDP_FLAGS_UPDATE_IF_NOEXIST only if new BPF program to be attached is
non-NULL (i.e., we are not detaching a BPF program).

Fixes: d4baa9368a5e ("bpf, xdp: Extract common XDP program attachment logic")
Reported-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200812022923.1217922-1-andriin@fb.com
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7df6c9617321..b5d1129d8310 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8913,10 +8913,6 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
 		NL_SET_ERR_MSG(extack, "Active program does not match expected");
 		return -EEXIST;
 	}
-	if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
-		NL_SET_ERR_MSG(extack, "XDP program already attached");
-		return -EBUSY;
-	}
 
 	/* put effective new program into new_prog */
 	if (link)
@@ -8927,6 +8923,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
 		enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
 					       ? XDP_MODE_DRV : XDP_MODE_SKB;
 
+		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
+			NL_SET_ERR_MSG(extack, "XDP program already attached");
+			return -EBUSY;
+		}
 		if (!offload && dev_xdp_prog(dev, other_mode)) {
 			NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
 			return -EEXIST;
-- 
cgit v1.2.3


From 2404b73c3f1a5f15726c6ecd226b56f6f992767f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 10 Aug 2020 13:52:15 +0200
Subject: netfilter: avoid ipv6 -> nf_defrag_ipv6 module dependency

nf_ct_frag6_gather is part of nf_defrag_ipv6.ko, not ipv6 core.

The current use of the netfilter ipv6 stub indirections  causes a module
dependency between ipv6 and nf_defrag_ipv6.

This prevents nf_defrag_ipv6 module from being removed because ipv6 can't
be unloaded.

Remove the indirection and always use a direct call.  This creates a
depency from nf_conntrack_bridge to nf_defrag_ipv6 instead:

modinfo nf_conntrack
depends:        nf_conntrack,nf_defrag_ipv6,bridge

.. and nf_conntrack already depends on nf_defrag_ipv6 anyway.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h             | 18 ------------------
 net/bridge/netfilter/nf_conntrack_bridge.c |  8 ++++++--
 net/ipv6/netfilter.c                       |  3 ---
 3 files changed, 6 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index aac42c28fe62..9b67394471e1 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -58,7 +58,6 @@ struct nf_ipv6_ops {
 			int (*output)(struct net *, struct sock *, struct sk_buff *));
 	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
 #if IS_MODULE(CONFIG_IPV6)
-	int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user);
 	int (*br_fragment)(struct net *net, struct sock *sk,
 			   struct sk_buff *skb,
 			   struct nf_bridge_frag_data *data,
@@ -117,23 +116,6 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
 
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
-static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb,
-				    u32 user)
-{
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return 1;
-
-	return v6_ops->br_defrag(net, skb, user);
-#elif IS_BUILTIN(CONFIG_IPV6)
-	return nf_ct_frag6_gather(net, skb, user);
-#else
-	return 1;
-#endif
-}
-
 int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		    struct nf_bridge_frag_data *data,
 		    int (*output)(struct net *, struct sock *sk,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 809673222382..8d033a75a766 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -168,6 +168,7 @@ static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 	enum ip_conntrack_info ctinfo;
 	struct br_input_skb_cb cb;
@@ -180,14 +181,17 @@ static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
 
 	br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
 
-	err = nf_ipv6_br_defrag(state->net, skb,
-				IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+	err = nf_ct_frag6_gather(state->net, skb,
+				 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
 	/* queued */
 	if (err == -EINPROGRESS)
 		return NF_STOLEN;
 
 	br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
 	return err == 0 ? NF_ACCEPT : NF_DROP;
+#else
+	return NF_ACCEPT;
+#endif
 }
 
 static int nf_ct_br_ip_check(const struct sk_buff *skb)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 409e79b84a83..6d0e942d082d 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -245,9 +245,6 @@ static const struct nf_ipv6_ops ipv6ops = {
 	.route_input		= ip6_route_input,
 	.fragment		= ip6_fragment,
 	.reroute		= nf_ip6_reroute,
-#if IS_MODULE(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-	.br_defrag		= nf_ct_frag6_gather,
-#endif
 #if IS_MODULE(CONFIG_IPV6)
 	.br_fragment		= br_ip6_fragment,
 #endif
-- 
cgit v1.2.3


From 59136aa3b2649796a3a1fd90158675f1f640ce0e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 11 Aug 2020 19:39:09 +0200
Subject: netfilter: nf_tables: free chain context when BINDING flag is missing

syzbot found a memory leak in nf_tables_addchain() because the chain
object is not free'd correctly on error.

Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
Reported-by: syzbot+c99868fde67014f7e9f5@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d878e34e3354..fd814e514f94 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2018,8 +2018,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	if (nla[NFTA_CHAIN_NAME]) {
 		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
 	} else {
-		if (!(flags & NFT_CHAIN_BINDING))
-			return -EINVAL;
+		if (!(flags & NFT_CHAIN_BINDING)) {
+			err = -EINVAL;
+			goto err1;
+		}
 
 		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
 		chain->name = kstrdup(name, GFP_KERNEL);
-- 
cgit v1.2.3


From fd09af010788a884de1c39537c288830c3d305db Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 11 Aug 2020 15:04:37 -0700
Subject: bpf: sock_ops ctx access may stomp registers in corner case

I had a sockmap program that after doing some refactoring started spewing
this splat at me:

[18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
[...]
[18610.807359] Call Trace:
[18610.807370]  ? 0xffffffffc114d0d5
[18610.807382]  __cgroup_bpf_run_filter_sock_ops+0x7d/0xb0
[18610.807391]  tcp_connect+0x895/0xd50
[18610.807400]  tcp_v4_connect+0x465/0x4e0
[18610.807407]  __inet_stream_connect+0xd6/0x3a0
[18610.807412]  ? __inet_stream_connect+0x5/0x3a0
[18610.807417]  inet_stream_connect+0x3b/0x60
[18610.807425]  __sys_connect+0xed/0x120

After some debugging I was able to build this simple reproducer,

 __section("sockops/reproducer_bad")
 int bpf_reproducer_bad(struct bpf_sock_ops *skops)
 {
        volatile __maybe_unused __u32 i = skops->snd_ssthresh;
        return 0;
 }

And along the way noticed that below program ran without splat,

__section("sockops/reproducer_good")
int bpf_reproducer_good(struct bpf_sock_ops *skops)
{
        volatile __maybe_unused __u32 i = skops->snd_ssthresh;
        volatile __maybe_unused __u32 family;

        compiler_barrier();

        family = skops->family;
        return 0;
}

So I decided to check out the code we generate for the above two
programs and noticed each generates the BPF code you would expect,

0000000000000000 <bpf_reproducer_bad>:
;       volatile __maybe_unused __u32 i = skops->snd_ssthresh;
       0:       r1 = *(u32 *)(r1 + 96)
       1:       *(u32 *)(r10 - 4) = r1
;       return 0;
       2:       r0 = 0
       3:       exit

0000000000000000 <bpf_reproducer_good>:
;       volatile __maybe_unused __u32 i = skops->snd_ssthresh;
       0:       r2 = *(u32 *)(r1 + 96)
       1:       *(u32 *)(r10 - 4) = r2
;       family = skops->family;
       2:       r1 = *(u32 *)(r1 + 20)
       3:       *(u32 *)(r10 - 8) = r1
;       return 0;
       4:       r0 = 0
       5:       exit

So we get reasonable assembly, but still something was causing the null
pointer dereference. So, we load the programs and dump the xlated version
observing that line 0 above 'r* = *(u32 *)(r1 +96)' is going to be
translated by the skops access helpers.

int bpf_reproducer_bad(struct bpf_sock_ops * skops):
; volatile __maybe_unused __u32 i = skops->snd_ssthresh;
   0: (61) r1 = *(u32 *)(r1 +28)
   1: (15) if r1 == 0x0 goto pc+2
   2: (79) r1 = *(u64 *)(r1 +0)
   3: (61) r1 = *(u32 *)(r1 +2340)
; volatile __maybe_unused __u32 i = skops->snd_ssthresh;
   4: (63) *(u32 *)(r10 -4) = r1
; return 0;
   5: (b7) r0 = 0
   6: (95) exit

int bpf_reproducer_good(struct bpf_sock_ops * skops):
; volatile __maybe_unused __u32 i = skops->snd_ssthresh;
   0: (61) r2 = *(u32 *)(r1 +28)
   1: (15) if r2 == 0x0 goto pc+2
   2: (79) r2 = *(u64 *)(r1 +0)
   3: (61) r2 = *(u32 *)(r2 +2340)
; volatile __maybe_unused __u32 i = skops->snd_ssthresh;
   4: (63) *(u32 *)(r10 -4) = r2
; family = skops->family;
   5: (79) r1 = *(u64 *)(r1 +0)
   6: (69) r1 = *(u16 *)(r1 +16)
; family = skops->family;
   7: (63) *(u32 *)(r10 -8) = r1
; return 0;
   8: (b7) r0 = 0
   9: (95) exit

Then we look at lines 0 and 2 above. In the good case we do the zero
check in r2 and then load 'r1 + 0' at line 2. Do a quick cross-check
into the bpf_sock_ops check and we can confirm that is the 'struct
sock *sk' pointer field. But, in the bad case,

   0: (61) r1 = *(u32 *)(r1 +28)
   1: (15) if r1 == 0x0 goto pc+2
   2: (79) r1 = *(u64 *)(r1 +0)

Oh no, we read 'r1 +28' into r1, this is skops->fullsock and then in
line 2 we read the 'r1 +0' as a pointer. Now jumping back to our spat,

[18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001

The 0x01 makes sense because that is exactly the fullsock value. And
its not a valid dereference so we splat.

To fix we need to guard the case when a program is doing a sock_ops field
access with src_reg == dst_reg. This is already handled in the load case
where the ctx_access handler uses a tmp register being careful to
store the old value and restore it. To fix the get case test if
src_reg == dst_reg and in this case do the is_fullsock test in the
temporary register. Remembering to restore the temporary register before
writing to either dst_reg or src_reg to avoid smashing the pointer into
the struct holding the tmp variable.

Adding this inline code to test_tcpbpf_kern will now be generated
correctly from,

  9: r2 = *(u32 *)(r2 + 96)

to xlated code,

  12: (7b) *(u64 *)(r2 +32) = r9
  13: (61) r9 = *(u32 *)(r2 +28)
  14: (15) if r9 == 0x0 goto pc+4
  15: (79) r9 = *(u64 *)(r2 +32)
  16: (79) r2 = *(u64 *)(r2 +0)
  17: (61) r2 = *(u32 *)(r2 +2348)
  18: (05) goto pc+1
  19: (79) r9 = *(u64 *)(r2 +32)

And in the normal case we keep the original code, because really this
is an edge case. From this,

  9: r2 = *(u32 *)(r6 + 96)

to xlated code,

  22: (61) r2 = *(u32 *)(r6 +28)
  23: (15) if r2 == 0x0 goto pc+2
  24: (79) r2 = *(u64 *)(r6 +0)
  25: (61) r2 = *(u32 *)(r2 +2348)

So three additional instructions if dst == src register, but I scanned
my current code base and did not see this pattern anywhere so should
not be a big deal. Further, it seems no one else has hit this or at
least reported it so it must a fairly rare pattern.

Fixes: 9b1f3d6e5af29 ("bpf: Refactor sock_ops_convert_ctx_access")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/159718347772.4728.2781381670567919577.stgit@john-Precision-5820-Tower
---
 net/core/filter.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 7124f0fe6974..1baeeff2fcd2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8317,15 +8317,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 /* Helper macro for adding read access to tcp_sock or sock fields. */
 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
 	do {								      \
+		int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2;     \
 		BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >		      \
 			     sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		if (si->dst_reg == si->src_reg) {			      \
+			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,	      \
+					  offsetof(struct bpf_sock_ops_kern,  \
+					  temp));			      \
+			fullsock_reg = reg;				      \
+			jmp += 2;					      \
+		}							      \
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
 						struct bpf_sock_ops_kern,     \
 						is_fullsock),		      \
-				      si->dst_reg, si->src_reg,		      \
+				      fullsock_reg, si->src_reg,	      \
 				      offsetof(struct bpf_sock_ops_kern,      \
 					       is_fullsock));		      \
-		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);	      \
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);	      \
+		if (si->dst_reg == si->src_reg)				      \
+			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+				      temp));				      \
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
 						struct bpf_sock_ops_kern, sk),\
 				      si->dst_reg, si->src_reg,		      \
@@ -8334,6 +8350,12 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 						       OBJ_FIELD),	      \
 				      si->dst_reg, si->dst_reg,		      \
 				      offsetof(OBJ, OBJ_FIELD));	      \
+		if (si->dst_reg == si->src_reg)	{			      \
+			*insn++ = BPF_JMP_A(1);				      \
+			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+				      temp));				      \
+		}							      \
 	} while (0)
 
 #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
-- 
cgit v1.2.3


From 84f44df664e9f0e261157e16ee1acd77cc1bb78d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 11 Aug 2020 15:04:56 -0700
Subject: bpf: sock_ops sk access may stomp registers when dst_reg = src_reg

Similar to patch ("bpf: sock_ops ctx access may stomp registers") if the
src_reg = dst_reg when reading the sk field of a sock_ops struct we
generate xlated code,

  53: (61) r9 = *(u32 *)(r9 +28)
  54: (15) if r9 == 0x0 goto pc+3
  56: (79) r9 = *(u64 *)(r9 +0)

This stomps on the r9 reg to do the sk_fullsock check and then when
reading the skops->sk field instead of the sk pointer we get the
sk_fullsock. To fix use similar pattern noted in the previous fix
and use the temp field to save/restore a register used to do
sk_fullsock check.

After the fix the generated xlated code reads,

  52: (7b) *(u64 *)(r9 +32) = r8
  53: (61) r8 = *(u32 *)(r9 +28)
  54: (15) if r9 == 0x0 goto pc+3
  55: (79) r8 = *(u64 *)(r9 +32)
  56: (79) r9 = *(u64 *)(r9 +0)
  57: (05) goto pc+1
  58: (79) r8 = *(u64 *)(r9 +32)

Here r9 register was in-use so r8 is chosen as the temporary register.
In line 52 r8 is saved in temp variable and at line 54 restored in case
fullsock != 0. Finally we handle fullsock == 0 case by restoring at
line 58.

This adds a new macro SOCK_OPS_GET_SK it is almost possible to merge
this with SOCK_OPS_GET_FIELD, but I found the extra branch logic a
bit more confusing than just adding a new macro despite a bit of
duplicating code.

Fixes: 1314ef561102e ("bpf: export bpf_sock for BPF_PROG_TYPE_SOCK_OPS prog type")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/159718349653.4728.6559437186853473612.stgit@john-Precision-5820-Tower
---
 net/core/filter.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 1baeeff2fcd2..b2df52086445 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8358,6 +8358,43 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 		}							      \
 	} while (0)
 
+#define SOCK_OPS_GET_SK()							      \
+	do {								      \
+		int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1;     \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		if (si->dst_reg == si->src_reg) {			      \
+			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,	      \
+					  offsetof(struct bpf_sock_ops_kern,  \
+					  temp));			      \
+			fullsock_reg = reg;				      \
+			jmp += 2;					      \
+		}							      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern,     \
+						is_fullsock),		      \
+				      fullsock_reg, si->src_reg,	      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       is_fullsock));		      \
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);	      \
+		if (si->dst_reg == si->src_reg)				      \
+			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+				      temp));				      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern, sk),\
+				      si->dst_reg, si->src_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern, sk));\
+		if (si->dst_reg == si->src_reg)	{			      \
+			*insn++ = BPF_JMP_A(1);				      \
+			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+				      temp));				      \
+		}							      \
+	} while (0)
+
 #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
 		SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
 
@@ -8642,17 +8679,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 		SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
 		break;
 	case offsetof(struct bpf_sock_ops, sk):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct bpf_sock_ops_kern,
-						is_fullsock),
-				      si->dst_reg, si->src_reg,
-				      offsetof(struct bpf_sock_ops_kern,
-					       is_fullsock));
-		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct bpf_sock_ops_kern, sk),
-				      si->dst_reg, si->src_reg,
-				      offsetof(struct bpf_sock_ops_kern, sk));
+		SOCK_OPS_GET_SK();
 		break;
 	}
 	return insn - insn_buf;
-- 
cgit v1.2.3


From 5c04da55c754c44937b3d19c6522f9023fd5c5d5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 Aug 2020 09:46:11 +0200
Subject: netfilter: ebtables: reject bogus getopt len value

syzkaller reports splat:
------------[ cut here ]------------
Buffer overflow detected (80 < 137)!
Call Trace:
 do_ebt_get_ctl+0x2b4/0x790 net/bridge/netfilter/ebtables.c:2317
 nf_getsockopt+0x72/0xd0 net/netfilter/nf_sockopt.c:116
 ip_getsockopt net/ipv4/ip_sockglue.c:1778 [inline]

caused by a copy-to-user with a too-large "*len" value.
This adds a argument check on *len just like in the non-compat version
of the handler.

Before the "Fixes" commit, the reproducer fails with -EINVAL as
expected:
1. core calls the "compat" getsockopt version
2. compat getsockopt version detects the *len value is possibly
   in 64-bit layout (*len != compat_len)
3. compat getsockopt version delegates everything to native getsockopt
   version
4. native getsockopt rejects invalid *len

-> compat handler only sees len == sizeof(compat_struct) for GET_ENTRIES.

After the refactor, event sequence is:
1. getsockopt calls "compat" version (len != native_len)
2. compat version attempts to copy *len bytes, where *len is random
   value from userspace

Fixes: fc66de8e16ec ("netfilter/ebtables: clean up compat {get, set}sockopt handling")
Reported-by: syzbot+5accb5c62faa1d346480@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1641f414d1ba..ebe33b60efd6 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2238,6 +2238,10 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
 	struct ebt_table *t;
 	struct net *net = sock_net(sk);
 
+	if ((cmd == EBT_SO_GET_INFO || cmd == EBT_SO_GET_INIT_INFO) &&
+	    *len != sizeof(struct compat_ebt_replace))
+		return -EINVAL;
+
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
-- 
cgit v1.2.3


From 38ba8b9241f5848a49b80fddac9ab5f4692e434e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 13 Aug 2020 09:18:34 -0700
Subject: can: j1939: fix kernel-infoleak in j1939_sk_sock2sockaddr_can()

syzbot found that at least 2 bytes of kernel information
were leaked during getsockname() on AF_CAN CAN_J1939 socket.

Since struct sockaddr_can has in fact two holes, simply
clear the whole area before filling it with useful data.

BUG: KMSAN: kernel-infoleak in kmsan_copy_to_user+0x81/0x90 mm/kmsan/kmsan_hooks.c:253
CPU: 0 PID: 8466 Comm: syz-executor511 Not tainted 5.8.0-rc5-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x21c/0x280 lib/dump_stack.c:118
 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121
 kmsan_internal_check_memory+0x238/0x3d0 mm/kmsan/kmsan.c:423
 kmsan_copy_to_user+0x81/0x90 mm/kmsan/kmsan_hooks.c:253
 instrument_copy_to_user include/linux/instrumented.h:91 [inline]
 _copy_to_user+0x18e/0x260 lib/usercopy.c:39
 copy_to_user include/linux/uaccess.h:186 [inline]
 move_addr_to_user+0x3de/0x670 net/socket.c:237
 __sys_getsockname+0x407/0x5e0 net/socket.c:1909
 __do_sys_getsockname net/socket.c:1920 [inline]
 __se_sys_getsockname+0x91/0xb0 net/socket.c:1917
 __x64_sys_getsockname+0x4a/0x70 net/socket.c:1917
 do_syscall_64+0xad/0x160 arch/x86/entry/common.c:386
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x440219
Code: Bad RIP value.
RSP: 002b:00007ffe5ee150c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000033
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440219
RDX: 0000000020000240 RSI: 0000000020000100 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401a20
R13: 0000000000401ab0 R14: 0000000000000000 R15: 0000000000000000

Local variable ----address@__sys_getsockname created at:
 __sys_getsockname+0x91/0x5e0 net/socket.c:1894
 __sys_getsockname+0x91/0x5e0 net/socket.c:1894

Bytes 2-3 of 24 are uninitialized
Memory access of size 24 starts at ffff8880ba2c7de8
Data copied to user address 0000000020000100

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Robin van der Gracht <robin@protonic.nl>
Cc: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: linux-can@vger.kernel.org
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20200813161834.4021638-1-edumazet@google.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/socket.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 78ff9b3f1d40..b634b680177f 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -553,6 +553,11 @@ static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr,
 static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr,
 				       const struct j1939_sock *jsk, int peer)
 {
+	/* There are two holes (2 bytes and 3 bytes) to clear to avoid
+	 * leaking kernel information to user space.
+	 */
+	memset(addr, 0, J1939_MIN_NAMELEN);
+
 	addr->can_family = AF_CAN;
 	addr->can_ifindex = jsk->ifindex;
 	addr->can_addr.j1939.pgn = jsk->addr.pgn;
-- 
cgit v1.2.3


From b43e3a82bc432c1caaed8950e7662c143470c54c Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 7 Aug 2020 12:51:56 +0200
Subject: can: j1939: transport: j1939_simple_recv(): ignore local J1939
 messages send not by J1939 stack

In current J1939 stack implementation, we process all locally send
messages as own messages. Even if it was send by CAN_RAW socket.

To reproduce it use following commands:
testj1939 -P -r can0:0x80 &
cansend can0 18238040#0123

This step will trigger false positive not critical warning:
j1939_simple_recv: Received already invalidated message

With this patch we add additional check to make sure, related skb is own
echo message.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-2-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/socket.c    | 1 +
 net/can/j1939/transport.c | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index b634b680177f..ad973370de12 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -398,6 +398,7 @@ static int j1939_sk_init(struct sock *sk)
 	spin_lock_init(&jsk->sk_session_queue_lock);
 	INIT_LIST_HEAD(&jsk->sk_session_queue);
 	sk->sk_destruct = j1939_sk_sock_destruct;
+	sk->sk_protocol = CAN_J1939;
 
 	return 0;
 }
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 9f99af5b0b11..b135c5e2a86e 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -2017,6 +2017,10 @@ void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb)
 	if (!skb->sk)
 		return;
 
+	if (skb->sk->sk_family != AF_CAN ||
+	    skb->sk->sk_protocol != CAN_J1939)
+		return;
+
 	j1939_session_list_lock(priv);
 	session = j1939_session_get_simple(priv, skb);
 	j1939_session_list_unlock(priv);
-- 
cgit v1.2.3


From cd3b3636c99fcac52c598b64061f3fe4413c6a12 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 7 Aug 2020 12:51:57 +0200
Subject: can: j1939: transport: j1939_session_tx_dat(): fix use-after-free
 read in j1939_tp_txtimer()

The current stack implementation do not support ECTS requests of not
aligned TP sized blocks.

If ECTS will request a block with size and offset spanning two TP
blocks, this will cause memcpy() to read beyond the queued skb (which
does only contain one TP sized block).

Sometimes KASAN will detect this read if the memory region beyond the
skb was previously allocated and freed. In other situations it will stay
undetected. The ETP transfer in any case will be corrupted.

This patch adds a sanity check to avoid this kind of read and abort the
session with error J1939_XTP_ABORT_ECTS_TOO_BIG.

Reported-by: syzbot+5322482fe520b02aea30@syzkaller.appspotmail.com
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: linux-stable <stable@vger.kernel.org> # >= v5.4
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-3-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index b135c5e2a86e..30957c9a8eb7 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -787,6 +787,18 @@ static int j1939_session_tx_dat(struct j1939_session *session)
 		if (len > 7)
 			len = 7;
 
+		if (offset + len > se_skb->len) {
+			netdev_err_once(priv->ndev,
+					"%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
+					__func__, session, skcb->offset, se_skb->len , session->pkt.tx);
+			return -EOVERFLOW;
+		}
+
+		if (!len) {
+			ret = -ENOBUFS;
+			break;
+		}
+
 		memcpy(&dat[1], &tpdat[offset], len);
 		ret = j1939_tp_tx_dat(session, dat, len + 1);
 		if (ret < 0) {
@@ -1120,6 +1132,9 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
 		 * cleanup including propagation of the error to user space.
 		 */
 		break;
+	case -EOVERFLOW:
+		j1939_session_cancel(session, J1939_XTP_ABORT_ECTS_TOO_BIG);
+		break;
 	case 0:
 		session->tx_retry = 0;
 		break;
-- 
cgit v1.2.3


From af804b7826350d5af728dca4715e473338fbd7e5 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 7 Aug 2020 12:51:58 +0200
Subject: can: j1939: socket: j1939_sk_bind(): make sure ml_priv is allocated

This patch adds check to ensure that the struct net_device::ml_priv is
allocated, as it is used later by the j1939 stack.

The allocation is done by all mainline CAN network drivers, but when using
bond or team devices this is not the case.

Bail out if no ml_priv is allocated.

Reported-by: syzbot+f03d384f3455d28833eb@syzkaller.appspotmail.com
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: linux-stable <stable@vger.kernel.org> # >= v5.4
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-4-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/socket.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index ad973370de12..b93876c57fc4 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -467,6 +467,14 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 			goto out_release_sock;
 		}
 
+		if (!ndev->ml_priv) {
+			netdev_warn_once(ndev,
+					 "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
+			dev_put(ndev);
+			ret = -ENODEV;
+			goto out_release_sock;
+		}
+
 		priv = j1939_netdev_start(ndev);
 		dev_put(ndev);
 		if (IS_ERR(priv)) {
-- 
cgit v1.2.3


From 840835c9281215341d84966a8855f267a971e6a3 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 7 Aug 2020 12:51:59 +0200
Subject: can: j1939: transport: add j1939_session_skb_find_by_offset()
 function

Sometimes it makes no sense to search the skb by pkt.dpo, since we need
next the skb within the transaction block. This may happen if we have an
ETP session with CTS set to less than 255 packets.

After this patch, we will be able to work with ETP sessions where the
block size (ETP.CM_CTS byte 2) is less than 255 packets.

Reported-by: Henrique Figueira <henrislip@gmail.com>
Reported-by: https://github.com/linux-can/can-utils/issues/228
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-5-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 30957c9a8eb7..90a2baac8a4a 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -352,17 +352,16 @@ void j1939_session_skb_queue(struct j1939_session *session,
 	skb_queue_tail(&session->skb_queue, skb);
 }
 
-static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+static struct
+sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
+					  unsigned int offset_start)
 {
 	struct j1939_priv *priv = session->priv;
+	struct j1939_sk_buff_cb *do_skcb;
 	struct sk_buff *skb = NULL;
 	struct sk_buff *do_skb;
-	struct j1939_sk_buff_cb *do_skcb;
-	unsigned int offset_start;
 	unsigned long flags;
 
-	offset_start = session->pkt.dpo * 7;
-
 	spin_lock_irqsave(&session->skb_queue.lock, flags);
 	skb_queue_walk(&session->skb_queue, do_skb) {
 		do_skcb = j1939_skb_to_cb(do_skb);
@@ -382,6 +381,14 @@ static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
 	return skb;
 }
 
+static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+{
+	unsigned int offset_start;
+
+	offset_start = session->pkt.dpo * 7;
+	return j1939_session_skb_find_by_offset(session, offset_start);
+}
+
 /* see if we are receiver
  * returns 0 for broadcasts, although we will receive them
  */
@@ -766,7 +773,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
 	int ret = 0;
 	u8 dat[8];
 
-	se_skb = j1939_session_skb_find(session);
+	se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7);
 	if (!se_skb)
 		return -ENOBUFS;
 
@@ -1765,7 +1772,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 			    __func__, session);
 		goto out_session_cancel;
 	}
-	se_skb = j1939_session_skb_find(session);
+
+	se_skb = j1939_session_skb_find_by_offset(session, packet * 7);
 	if (!se_skb) {
 		netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
 			    session);
-- 
cgit v1.2.3


From e052d0540298bfe0f6cbbecdc7e2ea9b859575b2 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 7 Aug 2020 12:52:00 +0200
Subject: can: j1939: transport: j1939_xtp_rx_dat_one(): compare own packets to
 detect corruptions

Since the stack relays on receiving own packets, it was overwriting own
transmit buffer from received packets.

At least theoretically, the received echo buffer can be corrupt or
changed and the session partner can request to resend previous data. In
this case we will re-send bad data.

With this patch we will stop to overwrite own TX buffer and use it for
sanity checking.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-6-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 90a2baac8a4a..5cf107cb447c 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1792,7 +1792,20 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 	}
 
 	tpdat = se_skb->data;
-	memcpy(&tpdat[offset], &dat[1], nbytes);
+	if (!session->transmission) {
+		memcpy(&tpdat[offset], &dat[1], nbytes);
+	} else {
+		int err;
+
+		err = memcmp(&tpdat[offset], &dat[1], nbytes);
+		if (err)
+			netdev_err_once(priv->ndev,
+					"%s: 0x%p: Data of RX-looped back packet (%*ph) doesn't match TX data (%*ph)!\n",
+					__func__, session,
+					nbytes, &dat[1],
+					nbytes, &tpdat[offset]);
+	}
+
 	if (packet == session->pkt.rx)
 		session->pkt.rx++;
 
-- 
cgit v1.2.3


From 35759383133f64d90eba120a0d3efe8f71241650 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 14 Aug 2020 15:56:34 +0200
Subject: mptcp: sendmsg: reset iter on error

Once we've copied data from the iterator we need to revert in case we
end up not sending any data.

This bug doesn't trigger with normal 'poll' based tests, because
we only feed a small chunk of data to kernel after poll indicated
POLLOUT.  With blocking IO and large writes this triggers. Receiver
ends up with less data than it should get.

Fixes: 72511aab95c94d ("mptcp: avoid blocking in tcp_sendpages")
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8c1d1a595701..c84b4051c2a4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -725,8 +725,10 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 		if (!psize)
 			return -EINVAL;
 
-		if (!sk_wmem_schedule(sk, psize + dfrag->overhead))
+		if (!sk_wmem_schedule(sk, psize + dfrag->overhead)) {
+			iov_iter_revert(&msg->msg_iter, psize);
 			return -ENOMEM;
+		}
 	} else {
 		offset = dfrag->offset;
 		psize = min_t(size_t, dfrag->data_len, avail_size);
@@ -737,8 +739,10 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	 */
 	ret = do_tcp_sendpages(ssk, page, offset, psize,
 			       msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT);
-	if (ret <= 0)
+	if (ret <= 0) {
+		iov_iter_revert(&msg->msg_iter, psize);
 		return ret;
+	}
 
 	frag_truesize += ret;
 	if (!retransmission) {
-- 
cgit v1.2.3


From f4fd77fd87e9b214c26bb2ebd4f90055eaea5ade Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Wed, 5 Aug 2020 11:50:22 +0800
Subject: can: j1939: fix support for multipacket broadcast message

Currently j1939_tp_im_involved_anydir() in j1939_tp_recv() check the previously
set flags J1939_ECU_LOCAL_DST and J1939_ECU_LOCAL_SRC of incoming skb, thus
multipacket broadcast message was aborted by receive side because it may come
from remote ECUs and have no exact dst address. Similarly, j1939_tp_cmd_recv()
and j1939_xtp_rx_dat() didn't process broadcast message.

So fix it by checking and process broadcast message in j1939_tp_recv(),
j1939_tp_cmd_recv() and j1939_xtp_rx_dat().

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1596599425-5534-2-git-send-email-zhangchangzhong@huawei.com
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 5cf107cb447c..868ecb2bfa45 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1673,8 +1673,12 @@ static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb,
 			return;
 		}
 		session = j1939_xtp_rx_rts_session_new(priv, skb);
-		if (!session)
+		if (!session) {
+			if (cmd == J1939_TP_CMD_BAM && j1939_sk_recv_match(priv, skcb))
+				netdev_info(priv->ndev, "%s: failed to create TP BAM session\n",
+					    __func__);
 			return;
+		}
 	} else {
 		if (j1939_xtp_rx_rts_session_active(session, skb)) {
 			j1939_session_put(session);
@@ -1865,6 +1869,13 @@ static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb)
 		else
 			j1939_xtp_rx_dat_one(session, skb);
 	}
+
+	if (j1939_cb_is_broadcast(skcb)) {
+		session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+						    false);
+		if (session)
+			j1939_xtp_rx_dat_one(session, skb);
+	}
 }
 
 /* j1939 main intf */
@@ -1956,7 +1967,7 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
 		if (j1939_tp_im_transmitter(skcb))
 			j1939_xtp_rx_rts(priv, skb, true);
 
-		if (j1939_tp_im_receiver(skcb))
+		if (j1939_tp_im_receiver(skcb) || j1939_cb_is_broadcast(skcb))
 			j1939_xtp_rx_rts(priv, skb, false);
 
 		break;
@@ -2020,7 +2031,7 @@ int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb)
 {
 	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
 
-	if (!j1939_tp_im_involved_anydir(skcb))
+	if (!j1939_tp_im_involved_anydir(skcb) && !j1939_cb_is_broadcast(skcb))
 		return 0;
 
 	switch (skcb->addr.pgn) {
-- 
cgit v1.2.3


From e8b17653088f28a87c81845fa41a2d295a3b458c Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Wed, 5 Aug 2020 11:50:23 +0800
Subject: can: j1939: cancel rxtimer on multipacket broadcast session complete

If j1939_xtp_rx_dat_one() receive last frame of multipacket broadcast message,
j1939_session_timers_cancel() should be called to cancel rxtimer.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1596599425-5534-3-git-send-email-zhangchangzhong@huawei.com
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 868ecb2bfa45..2f3c3afd5071 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1824,6 +1824,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 	}
 
 	if (final) {
+		j1939_session_timers_cancel(session);
 		j1939_session_completed(session);
 	} else if (do_cts_eoma) {
 		j1939_tp_set_rxtimeout(session, 1250);
-- 
cgit v1.2.3


From 2b8b2e31555cf55ba3680fb28e2b382e168d7ea1 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Wed, 5 Aug 2020 11:50:24 +0800
Subject: can: j1939: abort multipacket broadcast session when timeout occurs

If timeout occurs, j1939_tp_rxtimer() first calls hrtimer_start() to restart
rxtimer, and then calls __j1939_session_cancel() to set session->state =
J1939_SESSION_WAITING_ABORT. At next timeout expiration, because of the
J1939_SESSION_WAITING_ABORT session state j1939_tp_rxtimer() will call
j1939_session_deactivate_activate_next() to deactivate current session, and
rxtimer won't be set.

But for multipacket broadcast session, __j1939_session_cancel() don't set
session->state = J1939_SESSION_WAITING_ABORT, thus current session won't be
deactivate and hrtimer_start() is called to start new rxtimer again and again.

So fix it by moving session->state = J1939_SESSION_WAITING_ABORT out of if
(!j1939_cb_is_broadcast(&session->skcb)) statement.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1596599425-5534-4-git-send-email-zhangchangzhong@huawei.com
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 2f3c3afd5071..047118d5270b 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1074,9 +1074,9 @@ static void __j1939_session_cancel(struct j1939_session *session,
 	lockdep_assert_held(&session->priv->active_session_list_lock);
 
 	session->err = j1939_xtp_abort_to_errno(priv, err);
+	session->state = J1939_SESSION_WAITING_ABORT;
 	/* do not send aborts on incoming broadcasts */
 	if (!j1939_cb_is_broadcast(&session->skcb)) {
-		session->state = J1939_SESSION_WAITING_ABORT;
 		j1939_xtp_tx_abort(priv, &session->skcb,
 				   !session->transmission,
 				   err, session->skcb.addr.pgn);
-- 
cgit v1.2.3


From 0ae18a82686f9b9965a8ce0dd81371871b306ffe Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Wed, 5 Aug 2020 11:50:25 +0800
Subject: can: j1939: add rxtimer for multipacket broadcast session

According to SAE J1939/21 (Chapter 5.12.3 and APPENDIX C), for transmit side
the required time interval between packets of a multipacket broadcast message
is 50 to 200 ms, the responder shall use a timeout of 250ms (provides margin
allowing for the maximumm spacing of 200ms). For receive side a timeout will
occur when a time of greater than 750 ms elapsed between two message packets
when more packets were expected.

So this patch fix and add rxtimer for multipacket broadcast session.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1596599425-5534-5-git-send-email-zhangchangzhong@huawei.com
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 047118d5270b..a8dd956b5e8e 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -723,10 +723,12 @@ static int j1939_session_tx_rts(struct j1939_session *session)
 		return ret;
 
 	session->last_txcmd = dat[0];
-	if (dat[0] == J1939_TP_CMD_BAM)
+	if (dat[0] == J1939_TP_CMD_BAM) {
 		j1939_tp_schedule_txtimer(session, 50);
-
-	j1939_tp_set_rxtimeout(session, 1250);
+		j1939_tp_set_rxtimeout(session, 250);
+	} else {
+		j1939_tp_set_rxtimeout(session, 1250);
+	}
 
 	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
 
@@ -1687,11 +1689,15 @@ static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb,
 	}
 	session->last_cmd = cmd;
 
-	j1939_tp_set_rxtimeout(session, 1250);
-
-	if (cmd != J1939_TP_CMD_BAM && !session->transmission) {
-		j1939_session_txtimer_cancel(session);
-		j1939_tp_schedule_txtimer(session, 0);
+	if (cmd == J1939_TP_CMD_BAM) {
+		if (!session->transmission)
+			j1939_tp_set_rxtimeout(session, 750);
+	} else {
+		if (!session->transmission) {
+			j1939_session_txtimer_cancel(session);
+			j1939_tp_schedule_txtimer(session, 0);
+		}
+		j1939_tp_set_rxtimeout(session, 1250);
 	}
 
 	j1939_session_put(session);
@@ -1742,6 +1748,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 	int offset;
 	int nbytes;
 	bool final = false;
+	bool remain = false;
 	bool do_cts_eoma = false;
 	int packet;
 
@@ -1817,6 +1824,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 	    j1939_cb_is_broadcast(&session->skcb)) {
 		if (session->pkt.rx >= session->pkt.total)
 			final = true;
+		else
+			remain = true;
 	} else {
 		/* never final, an EOMA must follow */
 		if (session->pkt.rx >= session->pkt.last)
@@ -1826,6 +1835,9 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 	if (final) {
 		j1939_session_timers_cancel(session);
 		j1939_session_completed(session);
+	} else if (remain) {
+		if (!session->transmission)
+			j1939_tp_set_rxtimeout(session, 750);
 	} else if (do_cts_eoma) {
 		j1939_tp_set_rxtimeout(session, 1250);
 		if (!session->transmission)
-- 
cgit v1.2.3


From f8414a8d886b613b90d9fdf7cda6feea313b1069 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 15 Aug 2020 09:29:30 +0200
Subject: net: xdp: pull ethernet header off packet after computing
 skb->protocol

When an XDP program changes the ethernet header protocol field,
eth_type_trans is used to recalculate skb->protocol. In order for
eth_type_trans to work correctly, the ethernet header must actually be
part of the skb data segment, so the code first pushes that onto the
head of the skb. However, it subsequently forgets to pull it back off,
making the behavior of the passed-on packet inconsistent between the
protocol modifying case and the static protocol case. This patch fixes
the issue by simply pulling the ethernet header back off of the skb
head.

Fixes: 297249569932 ("net: fix generic XDP to handle if eth header was mangled")
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b5d1129d8310..7592d7dd6109 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4676,6 +4676,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	    (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
 		__skb_push(skb, ETH_HLEN);
 		skb->protocol = eth_type_trans(skb, skb->dev);
+		__skb_pull(skb, ETH_HLEN);
 	}
 
 	switch (act) {
-- 
cgit v1.2.3


From 55eff0eb7460c3d50716ed9eccf22257b046ca92 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 15 Aug 2020 04:44:31 -0400
Subject: net: Fix potential wrong skb->protocol in skb_vlan_untag()

We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). So
we should pull VLAN_HLEN + sizeof(unsigned short) in skb_vlan_untag() or
we may access the wrong data.

Fixes: 0d5501c1c828 ("net: Always untag vlan-tagged traffic on input.")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7e2e502ef519..5c3b906aeef3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5418,8 +5418,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		goto err_free;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+	/* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
 		goto err_free;
 
 	vhdr = (struct vlan_hdr *)skb->data;
-- 
cgit v1.2.3


From 47733f9daf4fe4f7e0eb9e273f21ad3a19130487 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sat, 15 Aug 2020 16:29:15 -0700
Subject: tipc: fix uninit skb->data in tipc_nl_compat_dumpit()

__tipc_nl_compat_dumpit() has two callers, and it expects them to
pass a valid nlmsghdr via arg->data. This header is artificial and
crafted just for __tipc_nl_compat_dumpit().

tipc_nl_compat_publ_dump() does so by putting a genlmsghdr as well
as some nested attribute, TIPC_NLA_SOCK. But the other caller
tipc_nl_compat_dumpit() does not, this leaves arg->data uninitialized
on this call path.

Fix this by just adding a similar nlmsghdr without any payload in
tipc_nl_compat_dumpit().

This bug exists since day 1, but the recent commit 6ea67769ff33
("net: tipc: prepare attrs in __tipc_nl_compat_dumpit()") makes it
easier to appear.

Reported-and-tested-by: syzbot+0e7181deafa7e0b79923@syzkaller.appspotmail.com
Fixes: d0796d1ef63d ("tipc: convert legacy nl bearer dump to nl compat")
Cc: Jon Maloy <jmaloy@redhat.com>
Cc: Ying Xue <ying.xue@windriver.com>
Cc: Richard Alpe <richard.alpe@ericsson.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 217516357ef2..90e3c70a91ad 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -275,8 +275,9 @@ err_out:
 static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 				 struct tipc_nl_compat_msg *msg)
 {
-	int err;
+	struct nlmsghdr *nlh;
 	struct sk_buff *arg;
+	int err;
 
 	if (msg->req_type && (!msg->req_size ||
 			      !TLV_CHECK_TYPE(msg->req, msg->req_type)))
@@ -305,6 +306,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 		return -ENOMEM;
 	}
 
+	nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI);
+	if (!nlh) {
+		kfree_skb(arg);
+		kfree_skb(msg->rep);
+		msg->rep = NULL;
+		return -EMSGSIZE;
+	}
+	nlmsg_end(arg, nlh);
+
 	err = __tipc_nl_compat_dumpit(cmd, msg, arg);
 	if (err) {
 		kfree_skb(msg->rep);
-- 
cgit v1.2.3


From c530189905efe91b6a464db4ec1b56b4c069609f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 16 Aug 2020 17:32:03 +0800
Subject: tipc: not enable tipc when ipv6 works as a module

When using ipv6_dev_find() in one module, it requires ipv6 not to
work as a module. Otherwise, this error occurs in build:

  undefined reference to `ipv6_dev_find'.

So fix it by adding "depends on IPV6 || IPV6=n" to tipc/Kconfig,
as it does in sctp/Kconfig.

Fixes: 5a6f6f579178 ("tipc: set ub->ifindex for local ipv6 address")
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 9dd780215eef..be1c4003d67d 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -6,6 +6,7 @@
 menuconfig TIPC
 	tristate "The TIPC Protocol"
 	depends on INET
+	depends on IPV6 || IPV6=n
 	help
 	  The Transparent Inter Process Communication (TIPC) protocol is
 	  specially designed for intra cluster communication. This protocol
-- 
cgit v1.2.3


From bd71ea60673180eccf17b1a1dda3504a04783789 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 16 Aug 2020 21:26:38 +0200
Subject: net: devlink: Remove overzealous WARN_ON with snapshots

It is possible to trigger this WARN_ON from user space by triggering a
devlink snapshot with an ID which already exists. We don't need both
-EEXISTS being reported and spamming the kernel log.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/devlink.c b/net/core/devlink.c
index e674f0f46dc2..e5feb87beca7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4063,7 +4063,7 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
 {
 	lockdep_assert_held(&devlink->lock);
 
-	if (WARN_ON(xa_load(&devlink->snapshot_ids, id)))
+	if (xa_load(&devlink->snapshot_ids, id))
 		return -EEXIST;
 
 	return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
-- 
cgit v1.2.3


From b3b2854dcf704c1db05d897072f98e8b79398af1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 16 Aug 2020 23:14:20 +0200
Subject: mptcp: sendmsg: reset iter on error redux

This fix wasn't correct: When this function is invoked from the
retransmission worker, the iterator contains garbage and resetting
it causes a crash.

As the work queue should not be performance critical also zero the
msghdr struct.

Fixes: 35759383133f64d "(mptcp: sendmsg: reset iter on error)"
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c84b4051c2a4..1aad411a0e46 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -740,7 +740,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	ret = do_tcp_sendpages(ssk, page, offset, psize,
 			       msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT);
 	if (ret <= 0) {
-		iov_iter_revert(&msg->msg_iter, psize);
+		if (!retransmission)
+			iov_iter_revert(&msg->msg_iter, psize);
 		return ret;
 	}
 
@@ -1392,7 +1393,9 @@ static void mptcp_worker(struct work_struct *work)
 	struct mptcp_data_frag *dfrag;
 	u64 orig_write_seq;
 	size_t copied = 0;
-	struct msghdr msg;
+	struct msghdr msg = {
+		.msg_flags = MSG_DONTWAIT,
+	};
 	long timeo = 0;
 
 	lock_sock(sk);
@@ -1425,7 +1428,6 @@ static void mptcp_worker(struct work_struct *work)
 
 	lock_sock(ssk);
 
-	msg.msg_flags = MSG_DONTWAIT;
 	orig_len = dfrag->data_len;
 	orig_offset = dfrag->offset;
 	orig_write_seq = dfrag->data_seq;
-- 
cgit v1.2.3


From 7f9bf6e82461b97ce43a912cb4a959c5a41367ac Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 17 Aug 2020 11:48:05 -0700
Subject: Revert "net: xdp: pull ethernet header off packet after computing
 skb->protocol"

This reverts commit f8414a8d886b613b90d9fdf7cda6feea313b1069.

eth_type_trans() does the necessary pull on the skb.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7592d7dd6109..b5d1129d8310 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4676,7 +4676,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	    (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
 		__skb_push(skb, ETH_HLEN);
 		skb->protocol = eth_type_trans(skb, skb->dev);
-		__skb_pull(skb, ETH_HLEN);
 	}
 
 	switch (act) {
-- 
cgit v1.2.3


From 8dfddfb79653df7c38a9c8c4c034f242a36acee9 Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran <necip@google.com>
Date: Mon, 17 Aug 2020 15:54:48 +0000
Subject: net: qrtr: fix usage of idr in port assignment to socket

Passing large uint32 sockaddr_qrtr.port numbers for port allocation
triggers a warning within idr_alloc() since the port number is cast
to int, and thus interpreted as a negative number. This leads to
the rejection of such valid port numbers in qrtr_port_assign() as
idr_alloc() fails.

To avoid the problem, switch to idr_alloc_u32() instead.

Fixes: bdabad3e363d ("net: Add Qualcomm IPC router")
Reported-by: syzbot+f31428628ef672716ea8@syzkaller.appspotmail.com
Signed-off-by: Necip Fazil Yildiran <necip@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index b4c0db0b7d31..90c558f89d46 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -692,23 +692,25 @@ static void qrtr_port_remove(struct qrtr_sock *ipc)
  */
 static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
 {
+	u32 min_port;
 	int rc;
 
 	mutex_lock(&qrtr_port_lock);
 	if (!*port) {
-		rc = idr_alloc(&qrtr_ports, ipc,
-			       QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1,
-			       GFP_ATOMIC);
-		if (rc >= 0)
-			*port = rc;
+		min_port = QRTR_MIN_EPH_SOCKET;
+		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC);
+		if (!rc)
+			*port = min_port;
 	} else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
 		rc = -EACCES;
 	} else if (*port == QRTR_PORT_CTRL) {
-		rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC);
+		min_port = 0;
+		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC);
 	} else {
-		rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC);
-		if (rc >= 0)
-			*port = rc;
+		min_port = *port;
+		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC);
+		if (!rc)
+			*port = min_port;
 	}
 	mutex_unlock(&qrtr_port_lock);
 
-- 
cgit v1.2.3