From 995b337952cdf7e05d288eede580257b632a8343 Mon Sep 17 00:00:00 2001 From: Thomas Chenault Date: Mon, 18 May 2009 21:43:27 -0700 Subject: net: fix skb_seq_read returning wrong offset/length for page frag data When called with a consumed value that is less than skb_headlen(skb) bytes into a page frag, skb_seq_read() incorrectly returns an offset/length relative to skb->data. Ensure that data which should come from a page frag does. Signed-off-by: Thomas Chenault Tested-by: Shyam Iyer Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d152394b2611..e505b5392e1e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2288,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - if (abs_offset < block_limit) { + if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } -- cgit v1.2.3 From bc8a5397433e4effbaddfa7e462d10b3c060cabb Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Mon, 18 May 2009 21:48:38 -0700 Subject: ipv4: make default for INET_LRO consistent with help text Commit e81963b1 ("ipv4: Make INET_LRO a bool instead of tristate.") changed this config from tristate to bool. Add default so that it is consistent with the help text. Signed-off-by: Frans Pop Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9d26a3da37e5..5b919f7b45db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -408,7 +408,7 @@ config INET_XFRM_MODE_BEET config INET_LRO bool "Large Receive Offload (ipv4/tcp)" - + default y ---help--- Support for Large Receive Offload (ipv4/tcp). -- cgit v1.2.3 From 88f16db7a2fa63b9242e8a0fbc40d51722f2e2f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 May 2009 12:04:30 +0200 Subject: wext: verify buffer size for SIOCSIWENCODEEXT Another design flaw in wireless extensions (is anybody surprised?) in the way it handles the iw_encode_ext structure: The structure is part of the 'extra' memory but contains the key length explicitly, instead of it just being the length of the extra buffer - size of the struct and using the explicit key length only for the get operation (which only writes it). Therefore, we have this layout: extra: +-------------------------+ | struct iw_encode_ext { | | ... | | u16 key_len; | | u8 key[0]; | | }; | +-------------------------+ | key material | +-------------------------+ Now, all drivers I checked use ext->key_len without checking that both key_len and the struct fit into the extra buffer that has been copied from userspace. This leads to a buffer overrun while reading that buffer, depending on the driver it may be possible to specify arbitrary key_len or it may need to be a proper length for the key algorithm specified. Thankfully, this is only exploitable by root, but root can actually cause a segfault or use kernel memory as a key (which you can even get back with siocgiwencode or siocgiwencodeext from the key buffer). Fix this by verifying that key_len fits into the buffer along with struct iw_encode_ext. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/wext.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/wireless/wext.c b/net/wireless/wext.c index cb6a5bb85d80..0e59f9ae9b81 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, err = -EFAULT; goto out; } + + if (cmd == SIOCSIWENCODEEXT) { + struct iw_encode_ext *ee = (void *) extra; + + if (iwp->length < sizeof(*ee) + ee->key_len) + return -EFAULT; + } } err = handler(dev, info, (union iwreq_data *) iwp, extra); -- cgit v1.2.3 From 5078b2e32ad4b1f753b1c837c15892202f753c97 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 13 May 2009 17:04:42 -0400 Subject: cfg80211: fix race between core hint and driver's custom apply Its possible for cfg80211 to have scheduled the work and for the global workqueue to not have kicked in prior to a cfg80211 driver's regulatory hint or wiphy_apply_custom_regulatory(). Although this is very unlikely its possible and should fix this race. When this race would happen you are expected to have hit a null pointer dereference panic. Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Tested-by: Alan Jenkins Signed-off-by: John W. Linville --- net/wireless/reg.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 08265ca15785..487cb627ddba 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1551,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2) queue_regulatory_request(request); + /* + * This ensures last_request is populated once modules + * come swinging in and calling regulatory hints and + * wiphy_apply_custom_regulatory(). + */ + flush_scheduled_work(); + return 0; } -- cgit v1.2.3 From cf8da764fc6959b7efb482f375dfef9830e98205 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2009 18:54:22 +0000 Subject: net: fix length computation in rt_check_expire() rt_check_expire() computes average and standard deviation of chain lengths, but not correclty reset length to 0 at beginning of each chain. This probably gives overflows for sum2 (and sum) on loaded machines instead of meaningful results. Signed-off-by: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c4c60e9f068a..869cf1c44b78 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -785,7 +785,7 @@ static void rt_check_expire(void) static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; - unsigned long length = 0, samples = 0; + unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -795,9 +795,9 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; - length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; @@ -809,6 +809,7 @@ static void rt_check_expire(void) if (*rthp == NULL) continue; + length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { if (rt_is_expired(rth)) { -- cgit v1.2.3 From 1ddbcb005c395518c2cd0df504cff3d4b5c85853 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2009 20:14:28 +0000 Subject: net: fix rtable leak in net/ipv4/route.c Alexander V. Lukyanov found a regression in 2.6.29 and made a complete analysis found in http://bugzilla.kernel.org/show_bug.cgi?id=13339 Quoted here because its a perfect one : begin_of_quotation 2.6.29 patch has introduced flexible route cache rebuilding. Unfortunately the patch has at least one critical flaw, and another problem. rt_intern_hash calculates rthi pointer, which is later used for new entry insertion. The same loop calculates cand pointer which is used to clean the list. If the pointers are the same, rtable leak occurs, as first the cand is removed then the new entry is appended to it. This leak leads to unregister_netdevice problem (usage count > 0). Another problem of the patch is that it tries to insert the entries in certain order, to facilitate counting of entries distinct by all but QoS parameters. Unfortunately, referencing an existing rtable entry moves it to list beginning, to speed up further lookups, so the carefully built order is destroyed. For the first problem the simplest patch it to set rthi=0 when rthi==cand, but it will also destroy the ordering. end_of_quotation Problematic commit is 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b (net: implement emergency route cache rebulds when gc_elasticity is exceeded) Trying to keep dst_entries ordered is too complex and breaks the fact that order should depend on the frequency of use for garbage collection. A possible fix is to make rt_intern_hash() simpler, and only makes rt_check_expire() a litle bit smarter, being able to cope with an arbitrary entries order. The added loop is running on cache hot data, while cpu is prefetching next object, so should be unnoticied. Reported-and-analyzed-by: Alexander V. Lukyanov Signed-off-by: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 55 +++++++++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 869cf1c44b78..28205e5bfa9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,7 +784,7 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; + struct rtable *rth, *aux, **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -812,6 +812,7 @@ static void rt_check_expire(void) length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + prefetch(rth->u.dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); @@ -820,33 +821,30 @@ static void rt_check_expire(void) if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { +nofree: tmo >>= 1; rthp = &rth->u.dst.rt_next; /* - * Only bump our length if the hash - * inputs on entries n and n+1 are not - * the same, we only count entries on + * We only count entries on * a chain with equal hash inputs once * so that entries for different QOS * levels, and other non-hash input * attributes don't unfairly skew * the length computation */ - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; + for (aux = rt_hash_table[i].chain;;) { + if (aux == rth) { + length += ONE; + break; + } + if (compare_hash_inputs(&aux->fl, &rth->fl)) + break; + aux = aux->u.dst.rt_next; + } continue; } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - tmo >>= 1; - rthp = &rth->u.dst.rt_next; - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; - continue; - } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; /* Cleanup aged off entries. */ *rthp = rth->u.dst.rt_next; @@ -1069,7 +1067,6 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1089,7 +1086,6 @@ restart: } rthp = &rt_hash_table[hash].chain; - rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1135,17 +1131,6 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; - - /* - * check to see if the next entry in the chain - * contains the same hash input values as rt. If it does - * This is where we will insert into the list, instead of - * at the head. This groups entries that differ by aspects not - * relvant to the hash function together, which we use to adjust - * our chain length - */ - if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) - rthi = rth; } if (cand) { @@ -1206,10 +1191,7 @@ restart: } } - if (rthi) - rt->u.dst.rt_next = rthi->u.dst.rt_next; - else - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->u.dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { @@ -1225,10 +1207,7 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - if (rthi) - rcu_assign_pointer(rthi->u.dst.rt_next, rt); - else - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; -- cgit v1.2.3 From 4f72427998b105392e60bae7a6798a0c96fe4f0a Mon Sep 17 00:00:00 2001 From: Jean-Mickael Guerin Date: Wed, 20 May 2009 17:38:59 -0700 Subject: IPv6: set RTPROT_KERNEL to initial route The use of unspecified protocol in IPv6 initial route prevents quagga to install IPv6 default route: # show ipv6 route S ::/0 [1/0] via fe80::1, eth1_0 K>* ::/0 is directly connected, lo, rej C>* ::1/128 is directly connected, lo C>* fe80::/64 is directly connected, eth1_0 # ip -6 route fe80::/64 dev eth1_0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit -1 ff00::/8 dev eth1_0 metric 256 mtu 1500 advmss 1440 hoplimit -1 unreachable default dev lo proto none metric -1 error -101 hoplimit 255 The attached patch ensures RTPROT_KERNEL to the default initial route and fixes the problem for quagga. This is similar to "ipv6: protocol for address routes" f410a1fba7afa79d2992620e874a343fdba28332. # show ipv6 route S>* ::/0 [1/0] via fe80::1, eth1_0 C>* ::1/128 is directly connected, lo C>* fe80::/64 is directly connected, eth1_0 # ip -6 route fe80::/64 dev eth1_0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit -1 fe80::/64 dev eth1_0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit -1 ff00::/8 dev eth1_0 metric 256 mtu 1500 advmss 1440 hoplimit -1 default via fe80::1 dev eth1_0 proto zebra metric 1024 mtu 1500 advmss 1440 hoplimit -1 unreachable default dev lo proto kernel metric -1 error -101 hoplimit 255 Signed-off-by: Jean-Mickael Guerin Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1394ddb6e35c..032a5ec391c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; -- cgit v1.2.3 From 5b5f792a6a9a2f9ae812d151ed621f72e99b1725 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 May 2009 15:07:12 -0700 Subject: pktgen: do not access flows[] beyond its length typo -- pkt_dev->nflows is for stats only, the number of concurrent flows is stored in cflows. Reported-By: Vladimir Ivashchenko Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3779c1438c11..0666a827bc62 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i = 0; - for (; i < pkt_dev->nflows; i++){ + for (; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; if (x) { xfrm_state_put(x); -- cgit v1.2.3 From 3ed18d76d959e5cbfa5d70c8f7ba95476582a556 Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Thu, 21 May 2009 15:20:59 -0700 Subject: ipv4: Fix oops with FIB_TRIE It seems we can fix this by disabling preemption while we re-balance the trie. This is with the CONFIG_CLASSIC_RCU. It's been stress-tested at high loads continuesly taking a full BGP table up/down via iproute -batch. Note. fib_trie is not updated for CONFIG_PREEMPT_RCU Reported-by: Andrei Popa Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b6..33c7c85dfe40 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + preempt_disable(); + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); + preempt_enable(); return (struct node *)tn; } -- cgit v1.2.3 From 0975ecba3b670df7c488a5e0e6fe9f1f370a8ad8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 May 2009 15:22:02 -0700 Subject: RxRPC: Error handling for rxrpc_alloc_connection() rxrpc_alloc_connection() doesn't return an error code on failure, it just returns NULL. IS_ERR(NULL) is false. Signed-off-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-connection.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 0f1218b8d289..67e38a056240 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection * and then redo the check */ conn = rxrpc_alloc_connection(gfp); - if (IS_ERR(conn)) { - _leave(" = %ld", PTR_ERR(conn)); - return PTR_ERR(conn); + if (!conn) { + _leave(" = -ENOMEM"); + return -ENOMEM; } conn->trans = trans; @@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection and then * redo the check */ candidate = rxrpc_alloc_connection(gfp); - if (IS_ERR(candidate)) { - _leave(" = %ld", PTR_ERR(candidate)); - return PTR_ERR(candidate); + if (!candidate) { + _leave(" = -ENOMEM"); + return -ENOMEM; } candidate->trans = trans; -- cgit v1.2.3