From dc4501ff287547dea7ca10f1c580c741291a8760 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 23 Dec 2018 21:45:56 -0800 Subject: tipc: fix a double free in tipc_enable_bearer() bearer_disable() already calls kfree_rcu() to free struct tipc_bearer, we don't need to call kfree() again. Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()") Reported-by: syzbot+b981acf1fb240c0c128b@syzkaller.appspotmail.com Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/bearer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index fb2c0d8f359f..d27f30a9a01d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -319,7 +319,6 @@ static int tipc_enable_bearer(struct net *net, const char *name, res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); - kfree(b); errstr = "failed to create discoverer"; goto rejected; } -- cgit v1.2.3 From f0fb9b288d0a7e9cc324ae362e2dfd2cc2217ded Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 24 Dec 2018 10:30:17 -0600 Subject: ipv6/route: Add a missing check on proc_dointvec While flushing the cache via ipv6_sysctl_rtcache_flush(), the call to proc_dointvec() may fail. The fix adds a check that returns the error, on failure. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 194bc162866d..a94e0b02a8ac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5054,12 +5054,16 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, { struct net *net; int delay; + int ret; if (!write) return -EINVAL; net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; - proc_dointvec(ctl, write, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (ret) + return ret; + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -- cgit v1.2.3 From 46273cf7e009231d2b6bc10a926e82b8928a9fb2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Wed, 26 Dec 2018 00:09:04 -0600 Subject: tipc: fix a missing check of genlmsg_put genlmsg_put could fail. The fix inserts a check of its return value, and if it fails, returns -EMSGSIZE. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 21f6ccc89401..40f5cae623a7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -904,6 +904,8 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + return -EMSGSIZE; nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { -- cgit v1.2.3 From eb8950861c1bfd3eecc8f6faad213e3bca0dc395 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 21 Dec 2018 00:46:23 -0600 Subject: netfilter: nf_tables: fix a missing check of nla_put_failure If nla_nest_start() may fail. The fix checks its return value and goes to nla_put_failure if it fails. Signed-off-by: Kangjie Lu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fec814dace5a..2b0a93300dd7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5727,6 +5727,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + if (!nest) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) goto nla_put_failure; -- cgit v1.2.3 From c78e7818f16f687389174c4569243abbec8dc68f Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Fri, 28 Dec 2018 01:24:42 +0100 Subject: netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS Most of the time these were the same value anyway, but when CONFIG_LOCKDEP was enabled we would use a smaller number of locks to reduce overhead. Unfortunately having two values is confusing and not worth the complexity. This fixes a bug where tree_gc_worker() would only GC up to CONNCOUNT_LOCK_SLOTS trees which meant when CONFIG_LOCKDEP was enabled not all trees would be GCed by tree_gc_worker(). Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") Signed-off-by: Florian Westphal Signed-off-by: Shawn Bohrer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 9cd180bda092..3271a4e00500 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -33,12 +33,6 @@ #define CONNCOUNT_SLOTS 256U -#ifdef CONFIG_LOCKDEP -#define CONNCOUNT_LOCK_SLOTS 8U -#else -#define CONNCOUNT_LOCK_SLOTS 256U -#endif - #define CONNCOUNT_GC_MAX_NODES 8 #define MAX_KEYLEN 5 @@ -60,7 +54,7 @@ struct nf_conncount_rb { struct rcu_head rcu_head; }; -static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; +static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp; struct nf_conncount_data { unsigned int keylen; @@ -353,7 +347,7 @@ insert_tree(struct net *net, unsigned int count = 0, gc_count = 0; bool node_found = false; - spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + spin_lock_bh(&nf_conncount_locks[hash]); parent = NULL; rbnode = &(root->rb_node); @@ -430,7 +424,7 @@ insert_tree(struct net *net, rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); out_unlock: - spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + spin_unlock_bh(&nf_conncount_locks[hash]); return count; } @@ -499,7 +493,7 @@ static void tree_gc_worker(struct work_struct *work) struct rb_node *node; unsigned int tree, next_tree, gc_count = 0; - tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; + tree = data->gc_tree % CONNCOUNT_SLOTS; root = &data->root[tree]; rcu_read_lock(); @@ -621,10 +615,7 @@ static int __init nf_conncount_modinit(void) { int i; - BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); - BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0); - - for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i) + for (i = 0; i < CONNCOUNT_SLOTS; ++i) spin_lock_init(&nf_conncount_locks[i]); conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", -- cgit v1.2.3 From 4cd273bb91b3001f623f516ec726c49754571b1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Dec 2018 01:24:43 +0100 Subject: netfilter: nf_conncount: don't skip eviction when age is negative age is signed integer, so result can be negative when the timestamps have a large delta. In this case we want to discard the entry. Instead of using age >= 2 || age < 0, just make it unsigned. Fixes: b36e4523d4d56 ("netfilter: nf_conncount: fix garbage collection confirm race") Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 3271a4e00500..8bb4ed85c262 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -155,7 +155,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, const struct nf_conntrack_tuple_hash *found; unsigned long a, b; int cpu = raw_smp_processor_id(); - __s32 age; + u32 age; found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); if (found) -- cgit v1.2.3 From f7fcc98dfc2d136722007fec0debbed761679b94 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Dec 2018 01:24:44 +0100 Subject: netfilter: nf_conncount: split gc in two phases The lockless workqueue garbage collector can race with packet path garbage collector to delete list nodes, as it calls tree_nodes_free() with the addresses of nodes that might have been free'd already from another cpu. To fix this, split gc into two phases. One phase to perform gc on the connections: From a locking perspective, this is the same as count_tree(): we hold rcu lock, but we do not change the tree, we only change the nodes' contents. The second phase acquires the tree lock and reaps empty nodes. This avoids a race condition of the garbage collection vs. packet path: If a node has been free'd already, the second phase won't find it anymore. This second phase is, from locking perspective, same as insert_tree(). The former only modifies nodes (list content, count), latter modifies the tree itself (rb_erase or rb_insert). Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 8bb4ed85c262..753132e4afa8 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -500,16 +500,32 @@ static void tree_gc_worker(struct work_struct *work) for (node = rb_first(root); node != NULL; node = rb_next(node)) { rbconn = rb_entry(node, struct nf_conncount_rb, node); if (nf_conncount_gc_list(data->net, &rbconn->list)) - gc_nodes[gc_count++] = rbconn; + gc_count++; } rcu_read_unlock(); spin_lock_bh(&nf_conncount_locks[tree]); + if (gc_count < ARRAY_SIZE(gc_nodes)) + goto next; /* do not bother */ - if (gc_count) { - tree_nodes_free(root, gc_nodes, gc_count); + gc_count = 0; + node = rb_first(root); + while (node != NULL) { + rbconn = rb_entry(node, struct nf_conncount_rb, node); + node = rb_next(node); + + if (rbconn->list.count > 0) + continue; + + gc_nodes[gc_count++] = rbconn; + if (gc_count >= ARRAY_SIZE(gc_nodes)) { + tree_nodes_free(root, gc_nodes, gc_count); + gc_count = 0; + } } + tree_nodes_free(root, gc_nodes, gc_count); +next: clear_bit(tree, data->pending_trees); next_tree = (tree + 1) % CONNCOUNT_SLOTS; -- cgit v1.2.3 From e8cfb372b38a1b8979aa7f7631fb5e7b11c3793c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Dec 2018 01:24:45 +0100 Subject: netfilter: nf_conncount: restart search when nodes have been erased Shawn Bohrer reported a following crash: |RIP: 0010:rb_erase+0xae/0x360 [..] Call Trace: nf_conncount_destroy+0x59/0xc0 [nf_conncount] cleanup_match+0x45/0x70 [ip_tables] ... Shawn tracked this down to bogus 'parent' pointer: Problem is that when we insert a new node, then there is a chance that the 'parent' that we found was also passed to tree_nodes_free() (because that node was empty) for erase+free. Instead of trying to be clever and detect when this happens, restart the search if we have evicted one or more nodes. To prevent frequent restarts, do not perform gc on the second round. Also, unconditionally schedule the gc worker. The condition gc_count > ARRAY_SIZE(gc_nodes)) cannot be true unless tree grows very large, as the height of the tree will be low even with hundreds of nodes present. Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") Reported-by: Shawn Bohrer Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 753132e4afa8..0a83c694a8f1 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -346,9 +346,10 @@ insert_tree(struct net *net, struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; bool node_found = false; + bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); - +restart: parent = NULL; rbnode = &(root->rb_node); while (*rbnode) { @@ -381,21 +382,16 @@ insert_tree(struct net *net, if (gc_count >= ARRAY_SIZE(gc_nodes)) continue; - if (nf_conncount_gc_list(net, &rbconn->list)) + if (do_gc && nf_conncount_gc_list(net, &rbconn->list)) gc_nodes[gc_count++] = rbconn; } if (gc_count) { tree_nodes_free(root, gc_nodes, gc_count); - /* tree_node_free before new allocation permits - * allocator to re-use newly free'd object. - * - * This is a rare event; in most cases we will find - * existing node to re-use. (or gc_count is 0). - */ - - if (gc_count >= ARRAY_SIZE(gc_nodes)) - schedule_gc_worker(data, hash); + schedule_gc_worker(data, hash); + gc_count = 0; + do_gc = false; + goto restart; } if (node_found) -- cgit v1.2.3 From df4a902509766897f7371fdfa4c3bf8bc321b55d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Dec 2018 01:24:46 +0100 Subject: netfilter: nf_conncount: merge lookup and add functions 'lookup' is always followed by 'add'. Merge both and make the list-walk part of nf_conncount_add(). This also avoids one unneeded unlock/re-lock pair. Extra care needs to be taken in count_tree, as we only hold rcu read lock, i.e. we can only insert to an existing tree node after acquiring its lock and making sure it has a nonzero count. As a zero count should be rare, just fall back to insert_tree() (which acquires tree lock). This issue and its solution were pointed out by Shawn Bohrer during patch review. Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 18 +--- net/netfilter/nf_conncount.c | 146 +++++++++++++---------------- net/netfilter/nft_connlimit.c | 14 +-- 3 files changed, 72 insertions(+), 106 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 4b2b2baf8ab4..aa66775c15f4 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -5,12 +5,6 @@ struct nf_conncount_data; -enum nf_conncount_list_add { - NF_CONNCOUNT_ADDED, /* list add was ok */ - NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ - NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ -}; - struct nf_conncount_list { spinlock_t list_lock; struct list_head head; /* connections with the same filtering key */ @@ -29,18 +23,12 @@ unsigned int nf_conncount_count(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); -void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit); +int nf_conncount_add(struct net *net, struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); void nf_conncount_list_init(struct nf_conncount_list *list); -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); - bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list); diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 0a83c694a8f1..ce7f7d1212a6 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -83,38 +83,6 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) return memcmp(a, b, klen * sizeof(u32)); } -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone) -{ - struct nf_conncount_tuple *conn; - - if (WARN_ON_ONCE(list->count > INT_MAX)) - return NF_CONNCOUNT_ERR; - - conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return NF_CONNCOUNT_ERR; - - conn->tuple = *tuple; - conn->zone = *zone; - conn->cpu = raw_smp_processor_id(); - conn->jiffies32 = (u32)jiffies; - conn->dead = false; - spin_lock_bh(&list->list_lock); - if (list->dead == true) { - kmem_cache_free(conncount_conn_cachep, conn); - spin_unlock_bh(&list->list_lock); - return NF_CONNCOUNT_SKIP; - } - list_add_tail(&conn->node, &list->head); - list->count++; - spin_unlock_bh(&list->list_lock); - return NF_CONNCOUNT_ADDED; -} -EXPORT_SYMBOL_GPL(nf_conncount_add); - static void __conn_free(struct rcu_head *h) { struct nf_conncount_tuple *conn; @@ -177,11 +145,10 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, return ERR_PTR(-EAGAIN); } -void nf_conncount_lookup(struct net *net, - struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) +static int __nf_conncount_add(struct net *net, + struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { const struct nf_conntrack_tuple_hash *found; struct nf_conncount_tuple *conn, *conn_n; @@ -189,9 +156,6 @@ void nf_conncount_lookup(struct net *net, unsigned int collect = 0; bool free_entry = false; - /* best effort only */ - *addit = tuple ? true : false; - /* check the saved connections */ list_for_each_entry_safe(conn, conn_n, &list->head, node) { if (collect > CONNCOUNT_GC_MAX_NODES) @@ -201,21 +165,19 @@ void nf_conncount_lookup(struct net *net, if (IS_ERR(found)) { /* Not found, but might be about to be confirmed */ if (PTR_ERR(found) == -EAGAIN) { - if (!tuple) - continue; - if (nf_ct_tuple_equal(&conn->tuple, tuple) && nf_ct_zone_id(&conn->zone, conn->zone.dir) == nf_ct_zone_id(zone, zone->dir)) - *addit = false; - } else if (PTR_ERR(found) == -ENOENT) + return 0; /* already exists */ + } else { collect++; + } continue; } found_ct = nf_ct_tuplehash_to_ctrack(found); - if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && + if (nf_ct_tuple_equal(&conn->tuple, tuple) && nf_ct_zone_equal(found_ct, zone, zone->dir)) { /* * We should not see tuples twice unless someone hooks @@ -223,7 +185,8 @@ void nf_conncount_lookup(struct net *net, * * Attempt to avoid a re-add in this case. */ - *addit = false; + nf_ct_put(found_ct); + return 0; } else if (already_closed(found_ct)) { /* * we do not care about connections which are @@ -237,8 +200,38 @@ void nf_conncount_lookup(struct net *net, nf_ct_put(found_ct); } + + if (WARN_ON_ONCE(list->count > INT_MAX)) + return -EOVERFLOW; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return -ENOMEM; + + conn->tuple = *tuple; + conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; + list_add_tail(&conn->node, &list->head); + list->count++; + return 0; } -EXPORT_SYMBOL_GPL(nf_conncount_lookup); + +int nf_conncount_add(struct net *net, + struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + int ret; + + /* check the saved connections */ + spin_lock_bh(&list->list_lock); + ret = __nf_conncount_add(net, list, tuple, zone); + spin_unlock_bh(&list->list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_conncount_add); void nf_conncount_list_init(struct nf_conncount_list *list) { @@ -339,13 +332,11 @@ insert_tree(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { - enum nf_conncount_list_add ret; struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - bool node_found = false; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -363,20 +354,15 @@ restart: } else if (diff > 0) { rbnode = &((*rbnode)->rb_right); } else { - /* unlikely: other cpu added node already */ - node_found = true; - ret = nf_conncount_add(&rbconn->list, tuple, zone); - if (ret == NF_CONNCOUNT_ERR) { + int ret; + + ret = nf_conncount_add(net, &rbconn->list, tuple, zone); + if (ret) count = 0; /* hotdrop */ - } else if (ret == NF_CONNCOUNT_ADDED) { + else count = rbconn->list.count; - } else { - /* NF_CONNCOUNT_SKIP, rbconn is already - * reclaimed by gc, insert a new tree node - */ - node_found = false; - } - break; + tree_nodes_free(root, gc_nodes, gc_count); + goto out_unlock; } if (gc_count >= ARRAY_SIZE(gc_nodes)) @@ -394,9 +380,6 @@ restart: goto restart; } - if (node_found) - goto out_unlock; - /* expected case: match, insert new node */ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); if (rbconn == NULL) @@ -431,7 +414,6 @@ count_tree(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { - enum nf_conncount_list_add ret; struct rb_root *root; struct rb_node *parent; struct nf_conncount_rb *rbconn; @@ -444,7 +426,6 @@ count_tree(struct net *net, parent = rcu_dereference_raw(root->rb_node); while (parent) { int diff; - bool addit; rbconn = rb_entry(parent, struct nf_conncount_rb, node); @@ -454,24 +435,29 @@ count_tree(struct net *net, } else if (diff > 0) { parent = rcu_dereference_raw(parent->rb_right); } else { - /* same source network -> be counted! */ - nf_conncount_lookup(net, &rbconn->list, tuple, zone, - &addit); + int ret; - if (!addit) + if (!tuple) { + nf_conncount_gc_list(net, &rbconn->list); return rbconn->list.count; + } - ret = nf_conncount_add(&rbconn->list, tuple, zone); - if (ret == NF_CONNCOUNT_ERR) { - return 0; /* hotdrop */ - } else if (ret == NF_CONNCOUNT_ADDED) { - return rbconn->list.count; - } else { - /* NF_CONNCOUNT_SKIP, rbconn is already - * reclaimed by gc, insert a new tree node - */ + spin_lock_bh(&rbconn->list.list_lock); + /* Node might be about to be free'd. + * We need to defer to insert_tree() in this case. + */ + if (rbconn->list.count == 0) { + spin_unlock_bh(&rbconn->list.list_lock); break; } + + /* same source network -> be counted! */ + ret = __nf_conncount_add(net, &rbconn->list, tuple, zone); + spin_unlock_bh(&rbconn->list.list_lock); + if (ret) + return 0; /* hotdrop */ + else + return rbconn->list.count; } } diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index b90d96ba4a12..af1497ab9464 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int count; - bool addit; tuple_ptr = &tuple; @@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, - &addit); - count = priv->list.count; - - if (!addit) - goto out; - - if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) { + if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) { regs->verdict.code = NF_DROP; return; } - count++; -out: + + count = priv->list.count; if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; -- cgit v1.2.3 From 2f971a8f425545da52ca0e6bee81f5b1ea0ccc5f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 28 Dec 2018 01:24:47 +0100 Subject: netfilter: nf_conncount: move all list iterations under spinlock Two CPUs may race to remove a connection from the list, the existing conn->dead will result in a use-after-free. Use the per-list spinlock to protect list iterations. As all accesses to the list now happen while holding the per-list lock, we no longer need to delay free operations with rcu. Joint work with Florian. Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 46 +++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index ce7f7d1212a6..d0fd195b19a8 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -43,8 +43,6 @@ struct nf_conncount_tuple { struct nf_conntrack_zone zone; int cpu; u32 jiffies32; - bool dead; - struct rcu_head rcu_head; }; struct nf_conncount_rb { @@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) return memcmp(a, b, klen * sizeof(u32)); } -static void __conn_free(struct rcu_head *h) -{ - struct nf_conncount_tuple *conn; - - conn = container_of(h, struct nf_conncount_tuple, rcu_head); - kmem_cache_free(conncount_conn_cachep, conn); -} - static bool conn_free(struct nf_conncount_list *list, struct nf_conncount_tuple *conn) { bool free_entry = false; - spin_lock_bh(&list->list_lock); - - if (conn->dead) { - spin_unlock_bh(&list->list_lock); - return free_entry; - } + lockdep_assert_held(&list->list_lock); list->count--; - conn->dead = true; - list_del_rcu(&conn->node); + list_del(&conn->node); if (list->count == 0) { list->dead = true; free_entry = true; } - spin_unlock_bh(&list->list_lock); - call_rcu(&conn->rcu_head, __conn_free); + kmem_cache_free(conncount_conn_cachep, conn); return free_entry; } @@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list) } EXPORT_SYMBOL_GPL(nf_conncount_list_init); -/* Return true if the list is empty */ +/* Return true if the list is empty. Must be called with BH disabled. */ bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list) { @@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *net, bool free_entry = false; bool ret = false; + /* don't bother if other cpu is already doing GC */ + if (!spin_trylock(&list->list_lock)) + return false; + list_for_each_entry_safe(conn, conn_n, &list->head, node) { found = find_or_evict(net, list, conn, &free_entry); if (IS_ERR(found)) { if (PTR_ERR(found) == -ENOENT) { - if (free_entry) + if (free_entry) { + spin_unlock(&list->list_lock); return true; + } collected++; } continue; @@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - if (conn_free(list, conn)) + if (conn_free(list, conn)) { + spin_unlock(&list->list_lock); return true; + } collected++; continue; } nf_ct_put(found_ct); if (collected > CONNCOUNT_GC_MAX_NODES) - return false; + break; } - spin_lock_bh(&list->list_lock); if (!list->count) { list->dead = true; ret = true; } - spin_unlock_bh(&list->list_lock); + spin_unlock(&list->list_lock); return ret; } @@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_struct *work) tree = data->gc_tree % CONNCOUNT_SLOTS; root = &data->root[tree]; + local_bh_disable(); rcu_read_lock(); for (node = rb_first(root); node != NULL; node = rb_next(node)) { rbconn = rb_entry(node, struct nf_conncount_rb, node); @@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_struct *work) gc_count++; } rcu_read_unlock(); + local_bh_enable(); + + cond_resched(); spin_lock_bh(&nf_conncount_locks[tree]); if (gc_count < ARRAY_SIZE(gc_nodes)) -- cgit v1.2.3 From c80f10bc973af2ace6b1414724eeff61eaa71837 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 28 Dec 2018 01:24:48 +0100 Subject: netfilter: nf_conncount: speculative garbage collection on empty lists Instead of removing a empty list node that might be reintroduced soon thereafter, tentatively place the empty list node on the list passed to tree_nodes_free(), then re-check if the list is empty again before erasing it from the tree. [ Florian: rebase on top of pending nf_conncount fixes ] Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 1 - net/netfilter/nf_conncount.c | 47 ++++++++++-------------------- 2 files changed, 15 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index aa66775c15f4..f32fc8289473 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -9,7 +9,6 @@ struct nf_conncount_list { spinlock_t list_lock; struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ - bool dead; }; struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index d0fd195b19a8..f0b05dfebc6e 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) return memcmp(a, b, klen * sizeof(u32)); } -static bool conn_free(struct nf_conncount_list *list, +static void conn_free(struct nf_conncount_list *list, struct nf_conncount_tuple *conn) { - bool free_entry = false; - lockdep_assert_held(&list->list_lock); list->count--; list_del(&conn->node); - if (list->count == 0) { - list->dead = true; - free_entry = true; - } kmem_cache_free(conncount_conn_cachep, conn); - return free_entry; } static const struct nf_conntrack_tuple_hash * find_or_evict(struct net *net, struct nf_conncount_list *list, - struct nf_conncount_tuple *conn, bool *free_entry) + struct nf_conncount_tuple *conn) { const struct nf_conntrack_tuple_hash *found; unsigned long a, b; @@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, */ age = a - b; if (conn->cpu == cpu || age >= 2) { - *free_entry = conn_free(list, conn); + conn_free(list, conn); return ERR_PTR(-ENOENT); } @@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net *net, struct nf_conncount_tuple *conn, *conn_n; struct nf_conn *found_ct; unsigned int collect = 0; - bool free_entry = false; /* check the saved connections */ list_for_each_entry_safe(conn, conn_n, &list->head, node) { if (collect > CONNCOUNT_GC_MAX_NODES) break; - found = find_or_evict(net, list, conn, &free_entry); + found = find_or_evict(net, list, conn); if (IS_ERR(found)) { /* Not found, but might be about to be confirmed */ if (PTR_ERR(found) == -EAGAIN) { @@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list) spin_lock_init(&list->list_lock); INIT_LIST_HEAD(&list->head); list->count = 0; - list->dead = false; } EXPORT_SYMBOL_GPL(nf_conncount_list_init); @@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *net, struct nf_conncount_tuple *conn, *conn_n; struct nf_conn *found_ct; unsigned int collected = 0; - bool free_entry = false; bool ret = false; /* don't bother if other cpu is already doing GC */ @@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *net, return false; list_for_each_entry_safe(conn, conn_n, &list->head, node) { - found = find_or_evict(net, list, conn, &free_entry); + found = find_or_evict(net, list, conn); if (IS_ERR(found)) { - if (PTR_ERR(found) == -ENOENT) { - if (free_entry) { - spin_unlock(&list->list_lock); - return true; - } + if (PTR_ERR(found) == -ENOENT) collected++; - } continue; } @@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - if (conn_free(list, conn)) { - spin_unlock(&list->list_lock); - return true; - } + conn_free(list, conn); collected++; continue; } @@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *net, break; } - if (!list->count) { - list->dead = true; + if (!list->count) ret = true; - } spin_unlock(&list->list_lock); return ret; @@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h) kmem_cache_free(conncount_rb_cachep, rbconn); } +/* caller must hold tree nf_conncount_locks[] lock */ static void tree_nodes_free(struct rb_root *root, struct nf_conncount_rb *gc_nodes[], unsigned int gc_count) @@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root, while (gc_count) { rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); - rb_erase(&rbconn->node, root); - call_rcu(&rbconn->rcu_head, __tree_nodes_free); + if (!rbconn->list.count) { + rb_erase(&rbconn->node, root); + call_rcu(&rbconn->rcu_head, __tree_nodes_free); + } spin_unlock(&rbconn->list.list_lock); } } @@ -318,7 +301,6 @@ insert_tree(struct net *net, struct rb_root *root, unsigned int hash, const u32 *key, - u8 keylen, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { @@ -327,6 +309,7 @@ insert_tree(struct net *net, struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; + u8 keylen = data->keylen; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -454,7 +437,7 @@ count_tree(struct net *net, if (!tuple) return 0; - return insert_tree(net, data, root, hash, key, keylen, tuple, zone); + return insert_tree(net, data, root, hash, key, tuple, zone); } static void tree_gc_worker(struct work_struct *work) -- cgit v1.2.3 From a007232066f6839d6f256bab21e825d968f1a163 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Dec 2018 01:24:49 +0100 Subject: netfilter: nf_conncount: fix argument order to find_next_bit Size and 'next bit' were swapped, this bug could cause worker to reschedule itself even if system was idle. Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") Reviewed-by: Shawn Bohrer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index f0b05dfebc6e..7554c56b2e63 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -488,7 +488,7 @@ next: clear_bit(tree, data->pending_trees); next_tree = (tree + 1) % CONNCOUNT_SLOTS; - next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); + next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree); if (next_tree < CONNCOUNT_SLOTS) { data->gc_tree = next_tree; -- cgit v1.2.3 From f9fc54d313fab2834f44f516459cdc8ac91d797f Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 26 Dec 2018 19:51:46 +0800 Subject: ethtool: check the return value of get_regs_len The return type for get_regs_len in struct ethtool_ops is int, the hns3 driver may return error when failing to get the regs len by sending cmd to firmware. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/core/ethtool.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d05402868575..158264f7cfaf 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -793,8 +793,13 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (rc >= 0) info.n_priv_flags = rc; } - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); + if (ops->get_regs_len) { + int ret = ops->get_regs_len(dev); + + if (ret > 0) + info.regdump_len = ret; + } + if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); @@ -1337,6 +1342,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return -EFAULT; reglen = ops->get_regs_len(dev); + if (reglen <= 0) + return reglen; + if (regs.len > reglen) regs.len = reglen; -- cgit v1.2.3 From f989d03ef25df3fc26d3ea0fe7c19c9830577166 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 30 Dec 2018 14:33:20 +0200 Subject: net: rtnetlink: address is mandatory for rtnl_fdb_get We must have an address to lookup otherwise we'll derefence a null pointer in the ndo_fdb_get callbacks. CC: Roopa Prabhu CC: David Ahern Reported-by: syzbot+017b1f61c82a1c3e7efd@syzkaller.appspotmail.com Fixes: 5b2f94b27622 ("net: rtnetlink: support for fdb get") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 48f61885fd6f..5ea1bed08ede 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4104,6 +4104,11 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (!addr) { + NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request"); + return -EINVAL; + } + if (brport_idx) { dev = __dev_get_by_index(net, brport_idx); if (!dev) { -- cgit v1.2.3 From 178fe94405bffbd1acd83b6ff3b40211185ae9c9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 28 Dec 2018 23:28:21 +0100 Subject: net/ipv6: Fix a test against 'ipv6_find_idev()' return value 'ipv6_find_idev()' returns NULL on error, not an error pointer. Update the test accordingly and return -ENOBUFS, as already done in 'addrconf_add_dev()', if NULL is returned. Fixes: ("ipv6: allow userspace to add IFA_F_OPTIMISTIC addresses") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 521e471f1cf9..8eeec6eb2bd3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4736,8 +4736,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) - return PTR_ERR(idev); + if (!idev) + return -ENOBUFS; if (!ipv6_allow_optimistic_dad(net, idev)) cfg.ifa_flags &= ~IFA_F_OPTIMISTIC; -- cgit v1.2.3 From 58075ff523af85002bfeace07304d57c59251605 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 29 Dec 2018 14:45:23 +0800 Subject: ipv4: fib_rules: Fix possible infinite loop in fib_empty_table gcc warn this: net/ipv4/fib_rules.c:203 fib_empty_table() warn: always true condition '(id <= 4294967295) => (0-u32max <= u32max)' 'id' is u32, which always not greater than RT_TABLE_MAX (0xFFFFFFFF), So add a check to break while wrap around. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f8eb78d042a4..cfec3af54c8d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -198,11 +198,15 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static struct fib_table *fib_empty_table(struct net *net) { - u32 id; + u32 id = 1; - for (id = 1; id <= RT_TABLE_MAX; id++) + while (1) { if (!fib_get_table(net, id)) return fib_new_table(net, id); + + if (id++ == RT_TABLE_MAX) + break; + } return NULL; } -- cgit v1.2.3 From 7f334a7e1ae113212e39aafba51352ea9ab8c9f9 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Sat, 29 Dec 2018 14:07:55 -0500 Subject: ipv6: fix typo in net/ipv6/reassembly.c Signed-off-by: Su Yanjun Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index a5bb59ee50ac..36a3d8dc61f5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -210,7 +210,7 @@ found: if (next && next->ip_defrag_offset < end) goto discard_fq; - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ + /* Note : skb->ip_defrag_offset and skb->sk share the same location */ dev = skb->dev; if (dev) fq->iif = dev->ifindex; -- cgit v1.2.3 From c433570458e49bccea5c551df628d058b3526289 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Dec 2018 13:56:36 -0800 Subject: ax25: fix a use-after-free in ax25_fillin_cb() There are multiple issues here: 1. After freeing dev->ax25_ptr, we need to set it to NULL otherwise we may use a dangling pointer. 2. There is a race between ax25_setsockopt() and device notifier as reported by syzbot. Close it by holding RTNL lock. 3. We need to test if dev->ax25_ptr is NULL before using it. Reported-and-tested-by: syzbot+ae6bb869cbed29b29040@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 11 +++++++++-- net/ax25/ax25_dev.c | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index c603d33d5410..5d01edf8d819 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -653,15 +653,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(&init_net, devname); + rtnl_lock(); + dev = __dev_get_by_name(&init_net, devname); if (!dev) { + rtnl_unlock(); res = -ENODEV; break; } ax25->ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25->ax25_dev) { + rtnl_unlock(); + res = -ENODEV; + break; + } ax25_fillin_cb(ax25, ax25->ax25_dev); - dev_put(dev); + rtnl_unlock(); break; default: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 9a3a301e1e2f..d92195cd7834 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; @@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; -- cgit v1.2.3 From 7314f5480f3e37e570104dc5e0f28823ef849e72 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Dec 2018 13:56:38 -0800 Subject: netrom: fix locking in nr_find_socket() nr_find_socket(), nr_find_peer() and nr_find_listener() lock the sock after finding it in the global list. However, the call path requires BH disabled for the sock lock consistently. Actually the locking is unnecessary at this point, we can just hold the sock refcnt to make sure it is not gone after we unlock the global list, and lock it later only when needed. Reported-and-tested-by: syzbot+f621cda8b7e598908efa@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 03f37c4e64fe..1d3144d19903 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr) sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { - bh_lock_sock(s); + sock_hold(s); goto found; } s = NULL; @@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id) struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, if (nr->your_index == index && nr->your_id == id && !ax25cmp(&nr->dest_addr, dest)) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void) if (i != 0 && j != 0) { if ((sk=nr_find_socket(i, j)) == NULL) break; - bh_unlock_sock(sk); + sock_put(sk); } id++; @@ -920,6 +920,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) } if (sk != NULL) { + bh_lock_sock(sk); skb_reset_transport_header(skb); if (frametype == NR_CONNACK && skb->len == 22) @@ -929,6 +930,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) ret = nr_process_rx_frame(sk, skb); bh_unlock_sock(sk); + sock_put(sk); return ret; } @@ -960,10 +962,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) (make = nr_make_new(sk)) == NULL) { nr_transmit_refusal(skb, 0); if (sk) - bh_unlock_sock(sk); + sock_put(sk); return 0; } + bh_lock_sock(sk); + window = skb->data[20]; skb->sk = make; @@ -1016,6 +1020,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); bh_unlock_sock(sk); + sock_put(sk); nr_insert_socket(make); -- cgit v1.2.3 From 3a0ed3e9619738067214871e9cb826fa23b2ddb9 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 27 Dec 2018 18:55:09 -0800 Subject: sock: Make sock->sk_stamp thread-safe Al Viro mentioned (Message-ID <20170626041334.GZ10672@ZenIV.linux.org.uk>) that there is probably a race condition lurking in accesses of sk_stamp on 32-bit machines. sock->sk_stamp is of type ktime_t which is always an s64. On a 32 bit architecture, we might run into situations of unsafe access as the access to the field becomes non atomic. Use seqlocks for synchronization. This allows us to avoid using spinlocks for readers as readers do not need mutual exclusion. Another approach to solve this is to require sk_lock for all modifications of the timestamps. The current approach allows for timestamps to have their own lock: sk_stamp_lock. This allows for the patch to not compete with already existing critical sections, and side effects are limited to the paths in the patch. The addition of the new field maintains the data locality optimizations from commit 9115e8cd2a0c ("net: reorganize struct sock for better data locality") Note that all the instances of the sk_stamp accesses are either through the ioctl or the syscall recvmsg. Signed-off-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/net/sock.h | 38 +++++++++++++++++++++++++++++++++++--- net/compat.c | 15 +++++++++------ net/core/sock.c | 15 ++++++++++----- net/sunrpc/svcsock.c | 2 +- 4 files changed, 55 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index a6235c286ef9..2b229f7be8eb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -298,6 +298,7 @@ struct sock_common { * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications @@ -474,6 +475,9 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; ktime_t sk_stamp; +#if BITS_PER_LONG==32 + seqlock_t sk_stamp_seq; +#endif u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; @@ -2297,6 +2301,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) atomic_add(segs, &sk->sk_drops); } +static inline ktime_t sock_read_timestamp(struct sock *sk) +{ +#if BITS_PER_LONG==32 + unsigned int seq; + ktime_t kt; + + do { + seq = read_seqbegin(&sk->sk_stamp_seq); + kt = sk->sk_stamp; + } while (read_seqretry(&sk->sk_stamp_seq, seq)); + + return kt; +#else + return sk->sk_stamp; +#endif +} + +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) +{ +#if BITS_PER_LONG==32 + write_seqlock(&sk->sk_stamp_seq); + sk->sk_stamp = kt; + write_sequnlock(&sk->sk_stamp_seq); +#else + sk->sk_stamp = kt; +#endif +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2321,7 +2353,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else - sk->sk_stamp = kt; + sock_write_timestamp(sk, kt); if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) __sock_recv_wifi_status(msg, sk, skb); @@ -2342,9 +2374,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) - sk->sk_stamp = skb->tstamp; + sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) - sk->sk_stamp = 0; + sock_write_timestamp(sk, 0); } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); diff --git a/net/compat.c b/net/compat.c index 47a614b370cd..d1f3a8a0b3ef 100644 --- a/net/compat.c +++ b/net/compat.c @@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) ctv = (struct compat_timeval __user *) userstamp; err = -ENOENT; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); + if (tv.tv_sec == -1) return err; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } err = 0; if (put_user(tv.tv_sec, &ctv->tv_sec) || @@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta ctv = (struct compat_timespec __user *) userstamp; err = -ENOENT; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return err; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - ts = ktime_to_timespec(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + ts = ktime_to_timespec(kt); } err = 0; if (put_user(ts.tv_sec, &ctv->tv_sec) || diff --git a/net/core/sock.c b/net/core/sock.c index f00902c532cc..6aa2e7e0b4fb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2751,6 +2751,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; +#if BITS_PER_LONG==32 + seqlock_init(&sk->sk_stamp_seq); +#endif atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL @@ -2850,12 +2853,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) struct timeval tv; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); if (tv.tv_sec == -1) return -ENOENT; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; } @@ -2866,11 +2870,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) struct timespec ts; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return -ENOENT; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); ts = ktime_to_timespec(sk->sk_stamp); } return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 986f3ed7d1a2..b7e67310ec37 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -549,7 +549,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->tstamp; + sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ len = skb->len; -- cgit v1.2.3 From 4087d2bc0d9469835f8d19d63a4a56739e5b8c5b Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 30 Dec 2018 23:24:11 +0800 Subject: net: rds: remove unnecessary NULL check In kfree, the NULL check is done. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- net/rds/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/tcp.c b/net/rds/tcp.c index b9bbcf3d6c63..c16f0a362c32 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -623,7 +623,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net) if (rtn->rds_tcp_sysctl) unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - if (net != &init_net && rtn->ctl_table) + if (net != &init_net) kfree(rtn->ctl_table); } -- cgit v1.2.3 From cb9f1b783850b14cbd7f87d061d784a666dfba1f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 30 Dec 2018 17:24:36 -0500 Subject: ip: validate header length on virtual device xmit KMSAN detected read beyond end of buffer in vti and sit devices when passing truncated packets with PF_PACKET. The issue affects additional ip tunnel devices. Extend commit 76c0ddd8c3a6 ("ip6_tunnel: be careful when accessing the inner header") and commit ccfec9e5cb2d ("ip_tunnel: be careful when accessing the inner header"). Move the check to a separate helper and call at the start of each ndo_start_xmit function in net/ipv4 and net/ipv6. Minor changes: - convert dev_kfree_skb to kfree_skb on error path, as dev_kfree_skb calls consume_skb which is not for error paths. - use pskb_network_may_pull even though that is pedantic here, as the same as pskb_may_pull for devices without llheaders. - do not cache ipv6 hdrs if used only once (unsafe across pskb_may_pull, was more relevant to earlier patch) Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 20 ++++++++++++++++++++ net/ipv4/ip_gre.c | 9 +++++++++ net/ipv4/ip_tunnel.c | 9 --------- net/ipv4/ip_vti.c | 12 +++++++++--- net/ipv6/ip6_gre.c | 10 +++++++--- net/ipv6/ip6_tunnel.c | 10 +++------- net/ipv6/ip6_vti.c | 8 ++++---- net/ipv6/ip6mr.c | 17 +++++++++++------ net/ipv6/sit.c | 3 +++ 9 files changed, 66 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index cbcf35ce1b14..34f019650941 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -308,6 +308,26 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); +static inline bool pskb_inet_may_pull(struct sk_buff *skb) +{ + int nhlen; + + switch (skb->protocol) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + default: + nhlen = 0; + } + + return pskb_network_may_pull(skb, nhlen); +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c7a7bd58a23c..d1d09f3e5f9e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -676,6 +676,9 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; @@ -719,6 +722,9 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { erspan_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; @@ -762,6 +768,9 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 284a22154b4e..c4f5602308ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -627,7 +627,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); - unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -637,14 +636,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, __be32 dst; bool connected; - /* ensure we can access the inner net header, for several users below */ - if (skb->protocol == htons(ETH_P_IP)) - inner_nhdr_len = sizeof(struct iphdr); - else if (skb->protocol == htons(ETH_P_IPV6)) - inner_nhdr_len = sizeof(struct ipv6hdr); - if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) - goto tx_error; - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index de31b302d69c..d7b43e700023 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -241,6 +241,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); struct flowi fl; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { @@ -253,15 +256,18 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); break; default: - dev->stats.tx_errors++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; + goto tx_err; } /* override mark with tunnel output key */ fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); return vti_xmit(skb, dev, &fl); + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; } static int vti4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 229e55c99021..09d0826742f8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -881,6 +881,9 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -923,6 +926,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int nhoff; int thoff; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -995,8 +1001,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, goto tx_err; } } else { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - switch (skb->protocol) { case htons(ETH_P_IP): memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1004,7 +1008,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, &dsfield, &encap_limit); break; case htons(ETH_P_IPV6): - if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr)) goto tx_err; if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 99179b9c8384..0c6403cf8b52 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1243,10 +1243,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - /* ensure we can access the full inner ip header */ - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return -1; - iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1321,9 +1317,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - return -1; - ipv6h = ipv6_hdr(skb); tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || @@ -1405,6 +1398,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &t->dev->stats; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + switch (skb->protocol) { case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 706fe42e4928..8b6eefff2f7e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -522,18 +522,18 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; - struct ipv6hdr *ipv6h; struct flowi fl; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { case htons(ETH_P_IPV6): - ipv6h = ipv6_hdr(skb); - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - vti6_addr_conflict(t, ipv6h)) + vti6_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; xfrm_decode_session(skb, &fl, AF_INET6); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8276f1224f16..30337b38274b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -599,13 +600,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; - int err; - err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) { - kfree_skb(skb); - return err; - } + if (!pskb_inet_may_pull(skb)) + goto tx_err; + + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) + goto tx_err; read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -614,6 +614,11 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, read_unlock(&mrt_lock); kfree_skb(skb); return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; } static int reg_vif_get_iflink(const struct net_device *dev) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 51c9f75f34b9..1e03305c0549 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1021,6 +1021,9 @@ tx_error: static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + if (!pskb_inet_may_pull(skb)) + goto tx_err; + switch (skb->protocol) { case htons(ETH_P_IP): sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP); -- cgit v1.2.3 From 7adf3246092f5e87ed0fa610e8088fae416c581f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 2 Jan 2019 13:29:27 +0100 Subject: ipv6: route: Fix return value of ip6_neigh_lookup() on neigh_create() error In ip6_neigh_lookup(), we must not return errors coming from neigh_create(): if creation of a neighbour entry fails, the lookup should return NULL, in the same way as it's done in __neigh_lookup(). Otherwise, callers legitimately checking for a non-NULL return value of the lookup function might dereference an invalid pointer. For instance, on neighbour table overflow, ndisc_router_discovery() crashes ndisc_update() by passing ERR_PTR(-ENOBUFS) as 'neigh' argument. Reported-by: Jianlin Shi Fixes: f8a1b43b709d ("net/ipv6: Create a neigh_lookup for FIB entries") Signed-off-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a94e0b02a8ac..40b225f87d5e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -210,7 +210,9 @@ struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dev); + + n = neigh_create(&nd_tbl, daddr, dev); + return IS_ERR(n) ? NULL : n; } static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, -- cgit v1.2.3 From 73155879b3c1ac3ace35208a54a3a160ec520bef Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Jan 2019 18:26:13 -0800 Subject: ipv6: Fix dump of specific table with strict checking Dump of a specific table with strict checking enabled is looping. The problem is that the end of the table dump is not marked in the cb. When dumping a specific table, cb args 0 and 1 are not used (they are the hash index and entry with an hash table index when dumping all tables). Re-use args[0] to hold a 'done' flag for the specific table dump. Fixes: 13e38901d46ca ("net/ipv6: Plumb support for filtering route dumps") Reported-by: Jakub Kicinski Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ae3786132c23..6613d8dbb0e5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -627,7 +627,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } - res = fib6_dump_table(tb, skb, cb); + if (!cb->args[0]) { + res = fib6_dump_table(tb, skb, cb); + if (!res) + cb->args[0] = 1; + } goto out; } -- cgit v1.2.3 From c5ee066333ebc322a24a00a743ed941a0c68617e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Jan 2019 18:57:09 -0800 Subject: ipv6: Consider sk_bound_dev_if when binding a socket to an address IPv6 does not consider if the socket is bound to a device when binding to an address. The result is that a socket can be bound to eth0 and then bound to the address of eth1. If the device is a VRF, the result is that a socket can only be bound to an address in the default VRF. Resolve by considering the device if sk_bound_dev_if is set. This problem exists from the beginning of git history. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f0cd291034f0..0bfb6cc0a30a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -350,6 +350,9 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, err = -EINVAL; goto out_unlock; } + } + + if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; -- cgit v1.2.3