From c316e6a3084cef1a5857cd66bb5429c969f06c93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2011 00:37:35 +0000 Subject: get_net_ns_by_fd() oopses if proc_ns_fget() returns an error BTW, looking through the code related to struct net lifetime rules has caught something else: struct net *get_net_ns_by_fd(int fd) { ... file = proc_ns_fget(fd); if (!file) goto out; ei = PROC_I(file->f_dentry->d_inode); while in proc_ns_fget() we have two return ERR_PTR(...) and not a single path that would return NULL. The other caller of proc_ns_fget() treats ERR_PTR() correctly... Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/core/net_namespace.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da15..e41e5110c65c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -310,19 +310,17 @@ struct net *get_net_ns_by_fd(int fd) struct file *file; struct net *net; - net = ERR_PTR(-EINVAL); file = proc_ns_fget(fd); - if (!file) - goto out; + if (IS_ERR(file)) + return ERR_CAST(file); ei = PROC_I(file->f_dentry->d_inode); - if (ei->ns_ops != &netns_operations) - goto out; + if (ei->ns_ops == &netns_operations) + net = get_net(ei->ns); + else + net = ERR_PTR(-EINVAL); - net = get_net(ei->ns); -out: - if (file) - fput(file); + fput(file); return net; } -- cgit v1.2.3 From b8f07a063163f8216cd891c5b007e839a56b6d93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2011 00:54:03 +0000 Subject: fix return values of l2tp_dfs_seq_open() More fallout from struct net lifetime rules review: PTR_ERR() is *already* negative and failing ->open() should return negatives on failure. Signed-off-by: Al Viro Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8dbae82fab8..76130134bfa6 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -258,7 +258,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) */ pd->net = get_net_ns_by_pid(current->pid); if (IS_ERR(pd->net)) { - rc = -PTR_ERR(pd->net); + rc = PTR_ERR(pd->net); goto err_free_pd; } -- cgit v1.2.3 From fb04883371f2cb7867d24783e7d590036dc9b548 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 15:44:27 +0200 Subject: netfilter: add more values to enum ip_conntrack_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following error is raised (and other similar ones) : net/ipv4/netfilter/nf_nat_standalone.c: In function ‘nf_nat_fn’: net/ipv4/netfilter/nf_nat_standalone.c:119:2: warning: case value ‘4’ not in enumerated type ‘enum ip_conntrack_info’ gcc barfs on adding two enum values and getting a not enumerated result : case IP_CT_RELATED+IP_CT_IS_REPLY: Add missing enum values Signed-off-by: Eric Dumazet CC: David Miller Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 3 +++ net/ipv4/netfilter/ipt_CLUSTERIP.c | 6 +++--- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- net/ipv4/netfilter/nf_nat_standalone.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_ftp.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 10 ++++------ net/netfilter/nf_conntrack_irc.c | 3 +-- net/netfilter/nf_conntrack_pptp.c | 3 +-- net/netfilter/nf_conntrack_sane.c | 2 +- net/netfilter/nf_conntrack_sip.c | 2 +- net/netfilter/xt_socket.c | 4 ++-- 16 files changed, 26 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 50cdc2559a5a..0d3dd66322ec 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -18,6 +18,9 @@ enum ip_conntrack_info { /* >= this indicates reply direction */ IP_CT_IS_REPLY, + IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, + IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, + IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY, /* Number of distinct IP_CT types (no NEW in reply dirn). */ IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d609ac3cb9a4..5c9e97c79017 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) * error messages (RELATED) and information requests (see below) */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) + ctinfo == IP_CT_RELATED_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) ct->mark = hash; break; case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: /* FIXME: we don't handle expectations at the * moment. they can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: + case IP_CT_ESTABLISHED_REPLY: break; default: break; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d2ed9dc74ebc..9931152a78b5 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a03c02af999..db10075dd88e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 9c71b2755ce3..3346de5d94d0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* Must be RELATED */ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + skb->nfctinfo == IP_CT_RELATED_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 21c30426480b..733c9abc1cbd 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 7317bdf1d457..483b76d042da 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) @@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum, default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + ctinfo == IP_CT_ESTABLISHED_REPLY); } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c8af58b22562..4111050a9fc5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e1c11f78419..0bd568929403 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + *ctinfo = IP_CT_ESTABLISHED_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { @@ -1143,7 +1143,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e17cb7c7dd8f..6f5801eac999 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -368,7 +368,7 @@ static int help(struct sk_buff *skb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + ctinfo != IP_CT_ESTABLISHED_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 18b2ce5c8ced..f03c2d4539f6 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b394aa318776..4f9390b98697 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 088944824e13..2fd4565144de 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; nexthdr_off = protoff; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index d9e27734b2a2..8501823b3f9b 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -78,7 +78,7 @@ static int help(struct sk_buff *skb, ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cb5a28581782..93faf6a3a637 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* No Data ? */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9cc46356b577..fe39f7e913df 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; -- cgit v1.2.3 From afb523c54718da57ff661950bd3287ec9eeb66bd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 2 Jun 2011 09:09:54 +0900 Subject: ipvs: restore support for iptables SNAT Fix the IPVS priority in LOCAL_IN hook, so that SNAT target in POSTROUTING is supported for IPVS traffic as in 2.6.36 where it worked depending on module load order. Before 2.6.37 we used priority 100 in LOCAL_IN to process remote requests. We used the same priority as iptables SNAT and if IPVS handlers are installed before SNAT handlers we supported SNAT in POSTROUTING for the IPVS traffic. If SNAT is installed before IPVS, the netfilter handlers are before IPVS and netfilter checks the NAT table twice for the IPVS requests: once in LOCAL_IN where IPS_SRC_NAT_DONE is set and second time in POSTROUTING where the SNAT rules are ignored because IPS_SRC_NAT_DONE was already set in LOCAL_IN. But in 2.6.37 we changed the IPVS priority for LOCAL_IN with the goal to be unique (101) forgetting the fact that for IPVS traffic we should not walk both LOCAL_IN and POSTROUTING nat tables. So, change the priority for processing remote IPVS requests from 101 to 99, i.e. before NAT_SRC (100) because we prefer to support SNAT in POSTROUTING instead of LOCAL_IN. It also moves the priority for IPVS replies from 99 to 98. Use constants instead of magic numbers at these places. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index bfa808f4da13..55af2242bccd 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1772,7 +1772,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1782,7 +1782,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1790,7 +1790,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1798,7 +1798,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1824,7 +1824,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP6_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1834,7 +1834,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1842,7 +1842,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1850,7 +1850,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP6_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ -- cgit v1.2.3 From fcbf12817100d23890832801507107718a1fa448 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 1 Jun 2011 23:35:48 +0200 Subject: netfilter: ipset: Fix return code for destroy when sets are in use Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8041befc6555..42aa64b6b0b1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -767,7 +767,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { - ret = IPSET_ERR_BUSY; + ret = -IPSET_ERR_BUSY; goto out; } } -- cgit v1.2.3 From b48e3c5c323fea08c12a340cbb8dcc8ca2431d5b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 1 Jun 2011 23:35:49 +0200 Subject: netfilter: ipset: Use the stored first cidr value instead of '1' The stored cidr values are tried one after anoter. The boolean condition evaluated to '1' instead of the first stored cidr or the default host cidr. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 10 ++++++---- net/netfilter/ipset/ip_set_hash_net.c | 8 ++++++-- net/netfilter/ipset/ip_set_hash_netport.c | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 4743e5402522..565a7c5b8818 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -146,8 +146,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet4_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; @@ -394,8 +395,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet6_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c4db202b7da4..2aeeabcd5a21 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -131,7 +131,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; @@ -296,7 +298,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d2a40362dd3a..e50d9bb8820b 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -144,7 +144,8 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; @@ -357,7 +358,8 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; -- cgit v1.2.3 From d9be76f38526dccf84062e3ac3ed3a6a97698565 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 29 May 2011 23:42:29 +0300 Subject: netfilter: nf_nat: fix crash in nf_nat_csum Fix crash in nf_nat_csum when mangling packets in OUTPUT hook where skb->dev is not defined, it is set later before POSTROUTING. Problem happens for CHECKSUM_NONE. We can check device from rt but using CHECKSUM_PARTIAL should be safe (skb_checksum_help). Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 99cfa28b6d38..ebc5f8894f99 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - skb->dev->features & NETIF_F_V4_CSUM) { + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + -- cgit v1.2.3 From 88ed01d17b44bc2bed4ad4835d3b1099bff3dd71 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 2 Jun 2011 15:08:45 +0200 Subject: netfilter: nf_conntrack: fix ct refcount leak in l4proto->error() This patch fixes a refcount leak of ct objects that may occur if l4proto->error() assigns one conntrack object to one skbuff. In that case, we have to skip further processing in nf_conntrack_in(). With this patch, we can also fix wrong return values (-NF_ACCEPT) for special cases in ICMP[v6] that should not bump the invalid/error statistic counters. Reported-by: Zoltan Menyhart Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7404bde95994..ab5b27a2916f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } /* Small and modified version of icmp_rcv */ diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 1df3c8b6bf47..7c05e7eacbc6 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -177,7 +177,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } static int diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0bd568929403..f7af8b866017 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -922,6 +922,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = -ret; goto out; } + /* ICMP[v6] protocol trackers may assign one conntrack. */ + if (skb->nfct) + goto out; } ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, -- cgit v1.2.3 From d232b8dded624af3e346b13807a591c63b601c44 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 27 May 2011 20:36:51 -0400 Subject: netfilter: use unsigned variables for packet lengths in ip[6]_queue. Netlink message lengths can't be negative, so use unsigned variables. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d2c1311cb28d..f7f9bd7ba12d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -402,7 +402,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 413ab0754e1f..065fe405fb58 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -403,7 +403,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; -- cgit v1.2.3 From b084f598df36b62dfae83c10ed17f0b66b50f442 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 May 2011 12:24:58 -0400 Subject: nfsd: fix dependency of nfsd on auth_rpcgss Commit b0b0c0a26e84 "nfsd: add proc file listing kernel's gss_krb5 enctypes" added an nunnecessary dependency of nfsd on the auth_rpcgss module. It's a little ad hoc, but since the only piece of information nfsd needs from rpcsec_gss_krb5 is a single static string, one solution is just to share it with an include file. Cc: stable@kernel.org Reported-by: Michael Guntsche Cc: Kevin Coffman Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 19 ++++++------------- include/linux/sunrpc/gss_krb5_enctypes.h | 4 ++++ net/sunrpc/auth_gss/gss_krb5_mech.c | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) create mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h (limited to 'net') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f5eae40f34e..2b1449dd2f49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -189,18 +190,10 @@ static struct file_operations export_features_operations = { .release = single_release, }; -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) { - struct gss_api_mech *k5mech; - - k5mech = gss_mech_get_by_name("krb5"); - if (k5mech == NULL) - goto out; - if (k5mech->gm_upcall_enctypes != NULL) - seq_printf(m, k5mech->gm_upcall_enctypes); - gss_mech_put(k5mech); -out: + seq_printf(m, KRB5_SUPPORTED_ENCTYPES); return 0; } @@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = { .llseek = seq_lseek, .release = single_release, }; -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); @@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h new file mode 100644 index 000000000000..ec6234eee89c --- /dev/null +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -0,0 +1,4 @@ +/* + * Dumb way to share this static piece of information with nfsd + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0a9a2ec2e469..c3b75333b821 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "18,17,16,23,3,1,2", + .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES, }; static int __init init_kerberos_module(void) -- cgit v1.2.3 From 5a079c305ad4dda9708b7a29db4a8bd38e21c3a6 Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Mon, 6 Jun 2011 06:00:07 +0000 Subject: net/ipv6: check for mistakenly passed in non-AF_INET6 sockaddrs Same check as for IPv4, also do for IPv6. (If you passed in a IPv4 sockaddr_in here, the sizeof check in the line before would have triggered already though.) Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b7919f901fbf..d450a2f9fc06 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -272,6 +272,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) return -EINVAL; -- cgit v1.2.3 From 3019de124b9f5b1526cb3668b74af14371e21795 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Jun 2011 16:41:33 -0700 Subject: net: Rework netdev_drivername() to avoid warning. This interface uses a temporary buffer, but for no real reason. And now can generate warnings like: net/sched/sch_generic.c: In function dev_watchdog net/sched/sch_generic.c:254:10: warning: unused variable drivername Just return driver->name directly or "". Reported-by: Connor Hansen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 16 +++++----------- net/sched/sch_generic.c | 3 +-- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca333e79e10f..54b8b4d7b68f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2555,7 +2555,7 @@ extern void netdev_class_remove_file(struct class_attribute *class_attr); extern struct kobj_ns_type_operations net_ns_type_operations; -extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); +extern const char *netdev_drivername(const struct net_device *dev); extern void linkwatch_run_queue(void); diff --git a/net/core/dev.c b/net/core/dev.c index 939307891e71..1af6cb27f67a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,29 +6264,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b1721d71c27c..b4c680900d7a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg) } if (some_queue_timedout) { - char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev, drivername, 64), i); + dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, -- cgit v1.2.3 From 79b3891587741dfac72cdfead1f2764b56a567b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Jun 2011 17:00:35 -0700 Subject: irda: iriap: Use seperate lockdep class for irias_objects->hb_spinlock The SEQ output functions grab the obj->attrib->hb_spinlock lock of sub-objects found in the hash traversal. These locks are in a different realm than the one used for the irias_objects hash table itself. So put the latter into it's own lockdep class. Reported-by: Dave Jones Signed-off-by: David S. Miller --- net/irda/iriap.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 36477538cea8..f876eed7d4aa 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -87,6 +87,8 @@ static inline void iriap_start_watchdog_timer(struct iriap_cb *self, iriap_watchdog_timer_expired); } +static struct lock_class_key irias_objects_key; + /* * Function iriap_init (void) * @@ -114,6 +116,9 @@ int __init iriap_init(void) return -ENOMEM; } + lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key, + "irias_objects"); + /* * Register some default services for IrLMP */ -- cgit v1.2.3 From 13fcb7bd322164c67926ffe272846d4860196dc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2011 22:42:06 -0700 Subject: af_packet: prevent information leak In 2.6.27, commit 393e52e33c6c2 (packet: deliver VLAN TCI to userspace) added a small information leak. Add padding field and make sure its zeroed before copy to user. Signed-off-by: Eric Dumazet CC: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_packet.h | 2 ++ net/packet/af_packet.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 6d66ce1791a9..7b318630139f 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -62,6 +62,7 @@ struct tpacket_auxdata { __u16 tp_mac; __u16 tp_net; __u16 tp_vlan_tci; + __u16 tp_padding; }; /* Rx ring - header status */ @@ -101,6 +102,7 @@ struct tpacket2_hdr { __u32 tp_sec; __u32 tp_nsec; __u16 tp_vlan_tci; + __u16 tp_padding; }; #define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba248d93399a..c0c3cda19712 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -804,6 +804,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } else { h.h2->tp_vlan_tci = 0; } + h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; default: @@ -1736,6 +1737,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, } else { aux.tp_vlan_tci = 0; } + aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } -- cgit v1.2.3 From 6407d74c5106bb362b4087693688afd34942b094 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Tue, 7 Jun 2011 00:51:35 -0700 Subject: bridge: provide a cow_metrics method for fake_ops Like in commit 0972ddb237 (provide cow_metrics() methods to blackhole dst_ops), we must provide a cow_metrics for bridges fake_dst_ops as well. This fixes a regression coming from commits 62fa8a846d7d (net: Implement read-only protection and COW'ing of metrics.) and 33eb9873a28 (bridge: initialize fake_rtable metrics) ip link set mybridge mtu 1234 --> [ 136.546243] Pid: 8415, comm: ip Tainted: P 2.6.39.1-00006-g40545b7 #103 ASUSTeK Computer Inc. V1Sn /V1Sn [ 136.546256] EIP: 0060:[<00000000>] EFLAGS: 00010202 CPU: 0 [ 136.546268] EIP is at 0x0 [ 136.546273] EAX: f14a389c EBX: 000005d4 ECX: f80d32c0 EDX: f80d1da1 [ 136.546279] ESI: f14a3000 EDI: f255bf10 EBP: f15c3b54 ESP: f15c3b48 [ 136.546285] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 136.546293] Process ip (pid: 8415, ti=f15c2000 task=f4741f80 task.ti=f15c2000) [ 136.546297] Stack: [ 136.546301] f80c658f f14a3000 ffffffed f15c3b64 c12cb9c8 f80d1b80 ffffffa1 f15c3bbc [ 136.546315] c12da347 c12d9c7d 00000000 f7670b00 00000000 f80d1b80 ffffffa6 f15c3be4 [ 136.546329] 00000004 f14a3000 f255bf20 00000008 f15c3bbc c11d6cae 00000000 00000000 [ 136.546343] Call Trace: [ 136.546359] [] ? br_change_mtu+0x5f/0x80 [bridge] [ 136.546372] [] dev_set_mtu+0x38/0x80 [ 136.546381] [] do_setlink+0x1a7/0x860 [ 136.546390] [] ? rtnl_fill_ifinfo+0x9bd/0xc70 [ 136.546400] [] ? nla_parse+0x6e/0xb0 [ 136.546409] [] rtnl_newlink+0x361/0x510 [ 136.546420] [] ? vmalloc_sync_all+0x100/0x100 [ 136.546429] [] ? error_code+0x5a/0x60 [ 136.546438] [] ? rtnl_configure_link+0x80/0x80 [ 136.546446] [] rtnetlink_rcv_msg+0xfa/0x210 [ 136.546454] [] ? __rtnl_unlock+0x20/0x20 [ 136.546463] [] netlink_rcv_skb+0x8e/0xb0 [ 136.546471] [] rtnetlink_rcv+0x1c/0x30 [ 136.546479] [] netlink_unicast+0x23a/0x280 [ 136.546487] [] netlink_sendmsg+0x26b/0x2f0 [ 136.546497] [] sock_sendmsg+0xc8/0x100 [ 136.546508] [] ? __alloc_pages_nodemask+0xe1/0x750 [ 136.546517] [] ? _copy_from_user+0x42/0x60 [ 136.546525] [] ? verify_iovec+0x4c/0xc0 [ 136.546534] [] sys_sendmsg+0x1c5/0x200 [ 136.546542] [] ? __do_fault+0x310/0x410 [ 136.546549] [] ? do_wp_page+0x1d6/0x6b0 [ 136.546557] [] ? handle_pte_fault+0xe1/0x720 [ 136.546565] [] ? sys_getsockname+0x7f/0x90 [ 136.546574] [] ? handle_mm_fault+0xb1/0x180 [ 136.546582] [] ? vmalloc_sync_all+0x100/0x100 [ 136.546589] [] ? do_page_fault+0x173/0x3d0 [ 136.546596] [] ? sys_recvmsg+0x3b/0x60 [ 136.546605] [] sys_socketcall+0x293/0x2d0 [ 136.546614] [] sysenter_do_call+0x12/0x26 [ 136.546619] Code: Bad EIP value. [ 136.546627] EIP: [<00000000>] 0x0 SS:ESP 0068:f15c3b48 [ 136.546645] CR2: 0000000000000000 [ 136.546652] ---[ end trace 6909b560e78934fa ]--- Signed-off-by: Alexander Holler Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3fa123185e89..56149ec36d7f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -104,10 +104,16 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) { } +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, + .cow_metrics = fake_cow_metrics, }; /* -- cgit v1.2.3 From 264524d5e5195f6e0f099bee20253a22b651e272 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Jun 2011 20:50:03 +0000 Subject: net: cpu offline cause napi stall Frank Blaschka reported : During heavy network load we turn off/on cpus. Sometimes this causes a stall on the network device. Digging into the dump I found out following: napi is scheduled but does not run. From the I/O buffers and the napi state I see napi/rx_softirq processing has stopped because the budget was reached. napi stays in the softnet_data poll_list and the rx_softirq was raised again. I assume at this time the cpu offline comes in, the rx softirq is raised/moved to another cpu but napi stays in the poll_list of the softnet_data of the now offline cpu. Reviewing dev_cpu_callback (net/core/dev.c) I did not find the poll_list is transfered to the new cpu. This patch is a straightforward implementation of Frank suggestion : Transfert poll_list and trigger NET_RX_SOFTIRQ on new cpu. Reported-by: Frank Blaschka Signed-off-by: Heiko Carstens Signed-off-by: Eric Dumazet Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1af6cb27f67a..a54c9f87ddbb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6178,6 +6178,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } + /* Append NAPI poll list from offline CPU. */ + if (!list_empty(&oldsd->poll_list)) { + list_splice_init(&oldsd->poll_list, &sd->poll_list); + raise_softirq_irqoff(NET_RX_SOFTIRQ); + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); -- cgit v1.2.3 From bb77f6341728624314f488ebd8b4c69f2caa33ea Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 7 Jun 2011 14:03:08 -0400 Subject: Revert "mac80211: stop queues before rate control updation" This reverts commit 1d38c16ce4156f63b45abbd09dd28ca2ef5172b4. The mac80211 maintainer raised complaints about abuse of the CSA stop reason, and about whether this patch actually serves its intended purpose at all. Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 456cccf26b51..d595265d6c22 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -232,9 +232,6 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); } - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); @@ -248,9 +245,6 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } - ieee80211_wake_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - ht_opmode = le16_to_cpu(hti->operation_mode); /* if bss configuration changed store the new one */ -- cgit v1.2.3 From 57a27e1d6a3bb9ad4efeebd3a8c71156d6207536 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 7 Jun 2011 20:42:26 +0300 Subject: nl80211: fix overflow in ssid_len When one of the SSID's length passed in a scan or sched_scan request is larger than 255, there will be an overflow in the u8 that is used to store the length before checking. This causes the check to fail and we overrun the buffer when copying the SSID. Fix this by checking the nl80211 attribute length before copying it to the struct. This is a follow up for the previous commit 208c72f4fe44fe09577e7975ba0e7fa0278f3d03, which didn't fix the problem entirely. Reported-by: Ido Yariv Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 88a565f130a5..98fa8eb6cc4b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3406,11 +3406,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) i = 0; if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { - request->ssids[i].ssid_len = nla_len(attr); - if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { + if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } + request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; } @@ -3572,12 +3572,11 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { - request->ssids[i].ssid_len = nla_len(attr); - if (request->ssids[i].ssid_len > - IEEE80211_MAX_SSID_LEN) { + if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } + request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; -- cgit v1.2.3 From e756682c8baa47da1648c0c016e9f48ed66bc32d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 5 Jun 2011 20:46:03 +0000 Subject: xfrm: Fix off by one in the replay advance functions We may write 4 byte too much when we reinitialize the anti replay window in the replay advance functions. This patch fixes this by adjusting the last index of the initialization loop. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_replay.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 47f1b8638df9..b11ea692bd7d 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -265,7 +265,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; @@ -471,7 +471,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; -- cgit v1.2.3 From 2584547230ae49b8de91ab3bb5e0a81898905b45 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 3 Jun 2011 09:37:09 -0700 Subject: ceph: fix sync vs canceled write If we cancel a write, trigger the safe completions to prevent a sync from blocking indefinitely in ceph_osdc_sync(). Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6ea2b892f44b..9cb627a4073a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1144,6 +1144,13 @@ static void handle_osds_timeout(struct work_struct *work) round_jiffies_relative(delay)); } +static void complete_request(struct ceph_osd_request *req) +{ + if (req->r_safe_callback) + req->r_safe_callback(req, NULL); + complete_all(&req->r_safe_completion); /* fsync waiter */ +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1226,11 +1233,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, else complete_all(&req->r_completion); - if (flags & CEPH_OSD_FLAG_ONDISK) { - if (req->r_safe_callback) - req->r_safe_callback(req, msg); - complete_all(&req->r_safe_completion); /* fsync waiter */ - } + if (flags & CEPH_OSD_FLAG_ONDISK) + complete_request(req); done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); @@ -1732,6 +1736,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, __cancel_request(req); __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); + complete_request(req); dout("wait_request tid %llu canceled/timed out\n", req->r_tid); return rc; } -- cgit v1.2.3 From f3209bea110cade12e2b133da8b8499689cb0e2e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Jun 2011 13:27:29 +0200 Subject: mac80211: fix IBSS teardown race Ignacy reports that sometimes after leaving an IBSS joining a new one didn't work because there still were stations on the list. He fixed it by flushing stations when attempting to join a new IBSS, but this shouldn't be happening in the first case. When I looked into it I saw a race condition in teardown that could cause stations to be added after flush, and thus cause this situation. Ignacy confirms that after applying my patch he hasn't seen this happen again. Reported-by: Ignacy Gawedzki Debugged-by: Ignacy Gawedzki Tested-by: Ignacy Gawedzki Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 421eaa6b0c2b..56c24cabf26d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -965,6 +965,10 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->u.ibss.mtx); + sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; + memset(sdata->u.ibss.bssid, 0, ETH_ALEN); + sdata->u.ibss.ssid_len = 0; + active_ibss = ieee80211_sta_active_ibss(sdata); if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { @@ -999,8 +1003,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree_skb(skb); skb_queue_purge(&sdata->skb_queue); - memset(sdata->u.ibss.bssid, 0, ETH_ALEN); - sdata->u.ibss.ssid_len = 0; del_timer_sync(&sdata->u.ibss.timer); -- cgit v1.2.3 From fe6fe792faec3fc2d2db39b69651682b8c4e7fcb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2011 06:07:07 +0000 Subject: net: pmtu_expires fixes commit 2c8cec5c10bc (ipv4: Cache learned PMTU information in inetpeer) added some racy peer->pmtu_expires accesses. As its value can be changed by another cpu/thread, we should be more careful, reading its value once. Add peer_pmtu_expired() and peer_pmtu_cleaned() helpers Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 78 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52b0b956508b..045f0ec6a4a0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1316,6 +1316,23 @@ reject_redirect: ; } +static bool peer_pmtu_expired(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + time_after_eq(jiffies, orig) && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + +static bool peer_pmtu_cleaned(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1331,14 +1348,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && - rt->peer->pmtu_expires && - time_after_eq(jiffies, rt->peer->pmtu_expires)) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(dst, RTAX_MTU, - rt->peer->pmtu_orig); + } else if (rt->peer && peer_pmtu_expired(rt->peer)) { + dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); } } return ret; @@ -1531,8 +1542,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { - unsigned long expires = peer->pmtu_expires; + unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); + if (!expires) + return; if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { @@ -1555,10 +1568,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt_bind_peer(rt, rt->rt_dst, 1); peer = rt->peer; if (peer) { + unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; + if (!pmtu_expires || mtu < peer->pmtu_learned) { pmtu_expires = jiffies + ip_rt_mtu_expires; if (!pmtu_expires) @@ -1612,13 +1626,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) rt_bind_peer(rt, rt->rt_dst, 0); peer = rt->peer; - if (peer && peer->pmtu_expires) + if (peer) { check_peer_pmtu(dst, peer); - if (peer && peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) - return NULL; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } } rt->rt_peer_genid = rt_peer_genid(); @@ -1649,14 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && - rt->peer && - rt->peer->pmtu_expires) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); - } + if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); } static int ip_rt_bug(struct sk_buff *skb) @@ -1770,8 +1779,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - if (peer->pmtu_expires) - check_peer_pmtu(&rt->dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; @@ -2775,7 +2783,8 @@ static int rt_fill_info(struct net *net, struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; - long expires; + long expires = 0; + const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2823,15 +2832,16 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; - expires = (rt->peer && rt->peer->pmtu_expires) ? - rt->peer->pmtu_expires - jiffies : 0; - if (rt->peer) { + if (peer) { inet_peer_refcheck(rt->peer); - id = atomic_read(&rt->peer->ip_id_count) & 0xffff; - if (rt->peer->tcp_ts_stamp) { - ts = rt->peer->tcp_ts; - tsage = get_seconds() - rt->peer->tcp_ts_stamp; + id = atomic_read(&peer->ip_id_count) & 0xffff; + if (peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; } + expires = ACCESS_ONCE(peer->pmtu_expires); + if (expires) + expires -= jiffies; } if (rt_is_input_route(rt)) { -- cgit v1.2.3 From 0c1ad04aecb975f2a2014e1bc5a2fa23923ecbd9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 9 Jun 2011 00:28:13 -0700 Subject: netpoll: prevent netpoll setup on slave devices In commit 8d8fc29d02a33e4bd5f4fa47823c1fd386346093 (netpoll: disable netpoll when enslave a device), we automatically disable netpoll when the underlying device is being enslaved, we also need to prevent people from setuping netpoll on devices that are already enslaved. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2d7d6d473781..42ea4b0e59f1 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -792,6 +792,12 @@ int netpoll_setup(struct netpoll *np) return -ENODEV; } + if (ndev->master) { + printk(KERN_ERR "%s: %s is a slave device, aborting.\n", + np->name, np->dev_name); + return -EBUSY; + } + if (!netif_running(ndev)) { unsigned long atmost, atleast; -- cgit v1.2.3 From 8d03e971cf403305217b8e62db3a2e5ad2d6263f Mon Sep 17 00:00:00 2001 From: Filip Palian Date: Thu, 12 May 2011 19:32:46 +0200 Subject: Bluetooth: l2cap and rfcomm: fix 1 byte infoleak to userspace. Structures "l2cap_conninfo" and "rfcomm_conninfo" have one padding byte each. This byte in "cinfo" is copied to userspace uninitialized. Signed-off-by: Filip Palian Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 1 + net/bluetooth/rfcomm/sock.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 18dc9888d8c2..8248303f44e8 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -413,6 +413,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us break; } + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 386cfaffd4b7..1b10727ce523 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -788,6 +788,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); -- cgit v1.2.3 From 96d7303e9cfb6a9bc664174a4dfdb6fa689284fe Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 5 Jun 2011 20:48:47 +0000 Subject: ipv4: Fix packet size calculation for raw IPsec packets in __ip_append_data We assume that transhdrlen is positive on the first fragment which is wrong for raw packets. So we don't add exthdrlen to the packet size for raw packets. This leads to a reallocation on IPsec because we have not enough headroom on the skb to place the IPsec headers. This patch fixes this by adding exthdrlen to the packet size whenever the send queue of the socket is empty. This issue was introduced with git commit 1470ddf7 (inet: Remove explicit write references to sk/inet in ip_append_data) Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 98af3697c718..a8024eaa0e87 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -799,7 +799,9 @@ static int __ip_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; - exthdrlen = transhdrlen ? rt->dst.header_len : 0; + skb = skb_peek_tail(queue); + + exthdrlen = !skb ? rt->dst.header_len : 0; length += exthdrlen; transhdrlen += exthdrlen; mtu = cork->fragsize; @@ -825,8 +827,6 @@ static int __ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(queue); - cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && -- cgit v1.2.3 From 0da67bed835fdde68ca0e924d2a2d6ac82c70833 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 May 2011 11:13:15 +0300 Subject: Bluetooth: fix shutdown on SCO sockets shutdown should wait for SCO link to be properly disconnected before detroying the socket, otherwise an application using the socket may assume link is properly disconnected before it really happens which can be a problem when e.g synchronizing profile switch. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 42fdffd1d76c..cb4fb7837e5c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -369,6 +369,15 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: + if (sco_pi(sk)->conn) { + sk->sk_state = BT_DISCONN; + sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); + hci_conn_put(sco_pi(sk)->conn->hcon); + sco_pi(sk)->conn->hcon = NULL; + } else + sco_chan_del(sk, ECONNRESET); + break; + case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); @@ -819,7 +828,9 @@ static void sco_chan_del(struct sock *sk, int err) conn->sk = NULL; sco_pi(sk)->conn = NULL; sco_conn_unlock(conn); - hci_conn_put(conn->hcon); + + if (conn->hcon) + hci_conn_put(conn->hcon); } sk->sk_state = BT_CLOSED; -- cgit v1.2.3 From 7f4f0572df6c8eaa6a587bc212b0806ff37380dd Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Fri, 27 May 2011 11:16:21 +0300 Subject: Bluetooth: Do not send SET_EVENT_MASK for 1.1 and earlier devices Some old hci controllers do not accept any mask so leave the default mask on for these devices. < HCI Command: Set Event Mask (0x03|0x0001) plen 8 Mask: 0xfffffbff00000000 > HCI Event: Command Complete (0x0e) plen 4 Set Event Mask (0x03|0x0001) ncmd 1 status 0x12 Error: Invalid HCI Command Parameters Signed-off-by: Ville Tervo Tested-by: Corey Boyle Tested-by: Ed Tomlinson Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f13ddbf858ba..77930aa522e3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -477,14 +477,16 @@ static void hci_setup_event_mask(struct hci_dev *hdev) * command otherwise */ u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; - /* Events for 1.2 and newer controllers */ - if (hdev->lmp_ver > 1) { - events[4] |= 0x01; /* Flow Specification Complete */ - events[4] |= 0x02; /* Inquiry Result with RSSI */ - events[4] |= 0x04; /* Read Remote Extended Features Complete */ - events[5] |= 0x08; /* Synchronous Connection Complete */ - events[5] |= 0x10; /* Synchronous Connection Changed */ - } + /* CSR 1.1 dongles does not accept any bitfield so don't try to set + * any event mask for pre 1.2 devices */ + if (hdev->lmp_ver <= 1) + return; + + events[4] |= 0x01; /* Flow Specification Complete */ + events[4] |= 0x02; /* Inquiry Result with RSSI */ + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ if (hdev->features[3] & LMP_RSSI_INQ) events[4] |= 0x04; /* Inquiry Result with RSSI */ -- cgit v1.2.3 From 0b5c9db1b11d3175bb42b80663a9f072f801edf5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Jun 2011 06:56:58 +0000 Subject: vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag but rather in vlan_do_receive. Otherwise the vlan header will not be properly put on the packet in the case of vlan header accelleration. As we remove the check from vlan_check_reorder_header rename it vlan_reorder_header to keep the naming clean. Fix up the skb->pkt_type early so we don't look at the packet after adding the vlan tag, which guarantees we don't goof and look at the wrong field. Use a simple if statement instead of a complicated switch statement to decided that we need to increment rx_stats for a multicast packet. Hopefully at somepoint we will just declare the case where VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove the code. Until then this keeps it working correctly. Signed-off-by: Eric W. Biederman Signed-off-by: Jiri Pirko Acked-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 25 ++++++++++++++++++--- include/linux/skbuff.h | 5 +++++ net/8021q/vlan_core.c | 60 +++++++++++++++++++++++++++---------------------- net/core/dev.c | 2 +- 4 files changed, 61 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index dc01681fbb42..affa27380b72 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -225,7 +225,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, } /** - * __vlan_put_tag - regular VLAN tag inserting + * vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_tci: VLAN TCI to insert * @@ -234,8 +234,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) { struct vlan_ethhdr *veth; @@ -255,8 +257,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); - skb->protocol = htons(ETH_P_8021Q); + return skb; +} +/** + * __vlan_put_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + */ +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +{ + skb = vlan_insert_tag(skb, vlan_tci); + if (skb) + skb->protocol = htons(ETH_P_8021Q); return skb; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e8b78ce14474..c0a4f3ab0cc0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1256,6 +1256,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +static inline void skb_reset_mac_len(struct sk_buff *skb) +{ + skb->mac_len = skb->network_header - skb->mac_header; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 41495dc2a4c9..fcc684678af6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; + if (skb->pkt_type == PACKET_OTHERHOST) { + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + vlan_dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + } + + if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + unsigned int offset = skb->data - skb_mac_header(skb); + + /* + * vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + skb_push(skb, offset); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + if (!skb) + return false; + skb_pull(skb, offset + VLAN_HLEN); + skb_reset_mac_len(skb); + } + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp) u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; - - switch (skb->pkt_type) { - case PACKET_BROADCAST: - break; - case PACKET_MULTICAST: + if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; - break; - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - vlan_dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - } u64_stats_update_end(&rx_stats->syncp); return true; @@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_frags); -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_cow(skb, skb_headroom(skb)) < 0) - skb = NULL; - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac_header += VLAN_HLEN; - } - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + return NULL; + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); return skb; } @@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); + skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; diff --git a/net/core/dev.c b/net/core/dev.c index a54c9f87ddbb..9c58c1ec41a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; -- cgit v1.2.3 From 83fe32de63e60af34fa8dae83716cb13b8677abd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 11 Jun 2011 18:55:22 -0700 Subject: netpoll: call dev_put() on error in netpoll_setup() There is a dev_put(ndev) missing on an error path. This was introduced in 0c1ad04aecb "netpoll: prevent netpoll setup on slave devices". Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 42ea4b0e59f1..18d9cbda3a39 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -795,7 +795,8 @@ int netpoll_setup(struct netpoll *np) if (ndev->master) { printk(KERN_ERR "%s: %s is a slave device, aborting.\n", np->name, np->dev_name); - return -EBUSY; + err = -EBUSY; + goto put; } if (!netif_running(ndev)) { -- cgit v1.2.3 From a685e08987d1edf1995b76511d4c98ea0e905377 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jun 2011 21:13:01 -0400 Subject: Delay struct net freeing while there's a sysfs instance refering to it * new refcount in struct net, controlling actual freeing of the memory * new method in kobj_ns_type_operations (->drop_ns()) * ->current_ns() semantics change - it's supposed to be followed by corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps the new refcount; net_drop_ns() decrements it and calls net_free() if the last reference has been dropped. Method renamed to ->grab_current_ns(). * old net_free() callers call net_drop_ns() instead. * sysfs_exit_ns() is gone, along with a large part of callchain leading to it; now that the references stored in ->ns[...] stay valid we do not need to hunt them down and replace them with NULL. That fixes problems in sysfs_lookup() and sysfs_readdir(), along with getting rid of sb->s_instances abuse. Note that struct net *shutdown* logics has not changed - net_cleanup() is called exactly when it used to be called. The only thing postponed by having a sysfs instance refering to that struct net is actual freeing of memory occupied by struct net. Signed-off-by: Al Viro --- fs/sysfs/mount.c | 37 +++++++++++-------------------------- fs/sysfs/sysfs.h | 2 +- include/linux/kobject_ns.h | 10 ++++++---- include/linux/sysfs.h | 7 ------- include/net/net_namespace.h | 10 +++++++++- lib/kobject.c | 26 +++++++++----------------- net/core/net-sysfs.c | 23 +++++++++-------------- net/core/net_namespace.c | 12 ++++++++++-- 8 files changed, 55 insertions(+), 72 deletions(-) (limited to 'net') diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 266895783b47..e34f0d99ea4e 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data) return error; } +static void free_sysfs_super_info(struct sysfs_super_info *info) +{ + int type; + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) + kobj_ns_drop(type, info->ns[type]); + kfree(info); +} + static struct dentry *sysfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, return ERR_PTR(-ENOMEM); for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) - info->ns[type] = kobj_ns_current(type); + info->ns[type] = kobj_ns_grab_current(type); sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); if (IS_ERR(sb) || sb->s_fs_info != info) - kfree(info); + free_sysfs_super_info(info); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { @@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, static void sysfs_kill_sb(struct super_block *sb) { struct sysfs_super_info *info = sysfs_info(sb); - /* Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing sysfs_super_info. */ kill_anon_super(sb); - kfree(info); + free_sysfs_super_info(info); } static struct file_system_type sysfs_fs_type = { @@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = { .kill_sb = sysfs_kill_sb, }; -void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) -{ - struct super_block *sb; - - mutex_lock(&sysfs_mutex); - spin_lock(&sb_lock); - list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { - struct sysfs_super_info *info = sysfs_info(sb); - /* - * If we see a superblock on the fs_supers/s_instances - * list the unmount has not completed and sb->s_fs_info - * points to a valid struct sysfs_super_info. - */ - /* Ignore superblocks with the wrong ns */ - if (info->ns[type] != ns) - continue; - info->ns[type] = NULL; - } - spin_unlock(&sb_lock); - mutex_unlock(&sysfs_mutex); -} - int __init sysfs_init(void) { int err = -ENOMEM; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3d28af31d863..2ed2404f3113 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -136,7 +136,7 @@ struct sysfs_addrm_cxt { * instance). */ struct sysfs_super_info { - const void *ns[KOBJ_NS_TYPES]; + void *ns[KOBJ_NS_TYPES]; }; #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) extern struct sysfs_dirent sysfs_root; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 82cb5bf461fb..f66b065a8b5f 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -32,15 +32,17 @@ enum kobj_ns_type { /* * Callbacks so sysfs can determine namespaces - * @current_ns: return calling task's namespace + * @grab_current_ns: return a new reference to calling task's namespace * @netlink_ns: return namespace to which a sock belongs (right?) * @initial_ns: return the initial namespace (i.e. init_net_ns) + * @drop_ns: drops a reference to namespace */ struct kobj_ns_type_operations { enum kobj_ns_type type; - const void *(*current_ns)(void); + void *(*grab_current_ns)(void); const void *(*netlink_ns)(struct sock *sk); const void *(*initial_ns)(void); + void (*drop_ns)(void *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); -const void *kobj_ns_current(enum kobj_ns_type type); +void *kobj_ns_grab_current(enum kobj_ns_type type); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); const void *kobj_ns_initial(enum kobj_ns_type type); -void kobj_ns_exit(enum kobj_ns_type type, const void *ns); +void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c3acda60eee0..e2696d76a599 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); -/* Called to clear a ns tag when it is no longer valid */ -void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); - int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd) { } -static inline void sysfs_exit_ns(int type, const void *tag) -{ -} - static inline int __must_check sysfs_init(void) { return 0; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2bf9ed9ef26b..aef430d779bd 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -35,8 +35,11 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { + atomic_t passive; /* To decided when the network + * namespace should be freed. + */ atomic_t count; /* To decided when the network - * namespace should be freed. + * namespace should be shut down. */ #ifdef NETNS_REFCNT_DEBUG atomic_t use_count; /* To track references we @@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2) { return net1 == net2; } + +extern void net_drop_ns(void *); + #else static inline struct net *get_net(struct net *net) @@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2) { return 1; } + +#define net_drop_ns NULL #endif diff --git a/lib/kobject.c b/lib/kobject.c index 82dc34c095c2..640bd98a4c8a 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) } -const void *kobj_ns_current(enum kobj_ns_type type) +void *kobj_ns_grab_current(enum kobj_ns_type type) { - const void *ns = NULL; + void *ns = NULL; spin_lock(&kobj_ns_type_lock); if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && kobj_ns_ops_tbl[type]) - ns = kobj_ns_ops_tbl[type]->current_ns(); + ns = kobj_ns_ops_tbl[type]->grab_current_ns(); spin_unlock(&kobj_ns_type_lock); return ns; @@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type) return ns; } -/* - * kobj_ns_exit - invalidate a namespace tag - * - * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) - * @ns: the actual namespace being invalidated - * - * This is called when a tag is no longer valid. For instance, - * when a network namespace exits, it uses this helper to - * make sure no sb's sysfs_info points to the now-invalidated - * netns. - */ -void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +void kobj_ns_drop(enum kobj_ns_type type, void *ns) { - sysfs_exit_ns(type, ns); + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) + kobj_ns_ops_tbl[type]->drop_ns(ns); + spin_unlock(&kobj_ns_type_lock); } - EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc2aa8f..33d2a1fba131 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) #endif } -static const void *net_current_ns(void) +static void *net_grab_current_ns(void) { - return current->nsproxy->net_ns; + struct net *ns = current->nsproxy->net_ns; +#ifdef CONFIG_NET_NS + if (ns) + atomic_inc(&ns->passive); +#endif + return ns; } static const void *net_initial_ns(void) @@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, - .current_ns = net_current_ns, + .grab_current_ns = net_grab_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, + .drop_ns = net_drop_ns, }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static void net_kobj_ns_exit(struct net *net) -{ - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); -} - -static struct pernet_operations kobj_net_ops = { - .exit = net_kobj_ns_exit, -}; - - #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { @@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); - register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da15..cdcbc3cb00a9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -210,6 +211,13 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; @@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { - net_free(net); + net_drop_ns(net); return ERR_PTR(rv); } return net; @@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); - net_free(net); + net_drop_ns(net); } } static DECLARE_WORK(net_cleanup_work, cleanup_net); -- cgit v1.2.3 From 8f4e0a18682d91abfad72ede3d3cb5f3ebdf54b4 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 13 Jun 2011 09:06:57 +0200 Subject: IPVS netns exit causes crash in conntrack Quote from Patric Mc Hardy "This looks like nfnetlink.c excited and destroyed the nfnl socket, but ip_vs was still holding a reference to a conntrack. When the conntrack got destroyed it created a ctnetlink event, causing an oops in netlink_has_listeners when trying to use the destroyed nfnetlink socket." If nf_conntrack_netlink is loaded before ip_vs this is not a problem. This patch simply avoids calling ip_vs_conn_drop_conntrack() when netns is dying as suggested by Julian. Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++++- net/netfilter/ipvs/ip_vs_core.c | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index bf28ac2fc99b..782db275ac53 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -776,8 +776,16 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) + if (cp->flags & IP_VS_CONN_F_NFCT) { ip_vs_conn_drop_conntrack(cp); + /* Do not access conntracks during subsys cleanup + * because nf_conntrack_find_get can not be used after + * conntrack cleanup for the net. + */ + smp_rmb(); + if (ipvs->enable) + ip_vs_conn_drop_conntrack(cp); + } ip_vs_pe_put(cp->pe); kfree(cp->pe_data); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 55af2242bccd..24c28d238dcb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1945,6 +1945,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { EnterFunction(2); net_ipvs(net)->enable = 0; /* Disable packet reception */ + smp_wmb(); __ip_vs_sync_cleanup(net); LeaveFunction(2); } -- cgit v1.2.3 From b9cabe52c27cf834137f3aaa46da23bcf32284e8 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 12 Jun 2011 04:28:16 +0000 Subject: ieee802154: Don't leak memory in ieee802154_nl_fill_phy In net/ieee802154/nl-phy.c::ieee802154_nl_fill_phy() I see two small issues. 1) If the allocation of 'buf' fails we may just as well return -EMSGSIZE directly rather than jumping to 'out:' and do a pointless kfree(0). 2) We do not free 'buf' unless we jump to one of the error labels and this leaks memory. This patch should address both. Signed-off-by: Jesper Juhl Acked-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- net/ieee802154/nl-phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index ed0eab39f531..02548b292b53 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -44,7 +44,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pr_debug("%s\n", __func__); if (!buf) - goto out; + return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_PHY); @@ -65,6 +65,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pages * sizeof(uint32_t), buf); mutex_unlock(&phy->pib_lock); + kfree(buf); return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3 From 63f6fe92c6b3cbf4c0bbbea4b31fdd3d68e21e4d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 16 Jun 2011 17:16:37 +0200 Subject: netfilter: ip_tables: fix compile with debug Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 764743843503..24e556e83a3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name) const struct xt_entry_target *t; if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, par->match->name); + duprintf("ip check failed %p %s.\n", e, name); return -EINVAL; } -- cgit v1.2.3 From 58d5a0257d2fd89fbe4451f704193cc95b0a9c97 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jun 2011 17:24:17 +0200 Subject: netfilter: ipt_ecn: fix protocol check in ecn_mt_check() Check for protocol inversion in ecn_mt_check() and remove the unnecessary runtime check for IPPROTO_TCP in ecn_mt(). Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_ecn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index af6e9c778345..aaa85be1b2d8 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -76,8 +76,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return false; if (!match_tcp(skb, info, &par->hotdrop)) return false; } @@ -97,7 +95,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && - ip->proto != IPPROTO_TCP) { + (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info("cannot match TCP bits in rule for non-tcp packets\n"); return -EINVAL; } -- cgit v1.2.3 From db898aa2ef6529fa80891e754d353063611c77de Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jun 2011 17:24:55 +0200 Subject: netfilter: ipt_ecn: fix inversion for IP header ECN match Userspace allows to specify inversion for IP header ECN matches, the kernel silently accepts it, but doesn't invert the match result. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_ecn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index aaa85be1b2d8..2b57e52c746c 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -25,7 +25,8 @@ MODULE_LICENSE("GPL"); static inline bool match_ip(const struct sk_buff *skb, const struct ipt_ecn_info *einfo) { - return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; + return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ + !!(einfo->invert & IPT_ECN_OP_MATCH_IP); } static inline bool match_tcp(const struct sk_buff *skb, -- cgit v1.2.3 From 2c38de4c1f8da799bdca0e4bb40ca13f2174d3e8 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 16 Jun 2011 17:27:04 +0200 Subject: netfilter: fix looped (broad|multi)cast's MAC handling By default, when broadcast or multicast packet are sent from a local application, they are sent to the interface then looped by the kernel to other local applications, going throught netfilter hooks in the process. These looped packet have their MAC header removed from the skb by the kernel looping code. This confuse various netfilter's netlink queue, netlink log and the legacy ip_queue, because they try to extract a hardware address from these packets, but extracts a part of the IP header instead. This patch prevent NFQUEUE, NFLOG and ip_QUEUE to include a MAC header if there is none in the packet. Signed-off-by: Nicolas Cavallari Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink_log.c | 3 ++- net/netfilter/nfnetlink_queue.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index f7f9bd7ba12d..5c9b9d963918 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 065fe405fb58..249394863284 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -204,7 +204,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e0ee010935e7..2e7ccbb43ddb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -456,7 +456,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); - if (indev && skb->dev) { + if (indev && skb->dev && + skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; int len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b83123f12b42..fdd2fafe0a14 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -335,7 +335,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (indev && entskb->dev) { + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; int len = dev_parse_header(entskb, phw.hw_addr); if (len) { -- cgit v1.2.3 From 42c1edd345c8412d96e7a362ee06feb7be73bb6c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 16 Jun 2011 17:29:22 +0200 Subject: netfilter: nf_nat: avoid double seq_adjust for loopback Avoid double seq adjustment for loopback traffic because it causes silent repetition of TCP data. One example is passive FTP with DNAT rule and difference in the length of IP addresses. This patch adds check if packet is sent and received via loopback device. As the same conntrack is used both for outgoing and incoming direction, we restrict seq adjustment to happen only in POSTROUTING. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 6 ++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c7c42e7acc31..5d4f8e586e32 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -307,6 +307,12 @@ static inline int nf_ct_is_untracked(const struct nf_conn *ct) return test_bit(IPS_UNTRACKED_BIT, &ct->status); } +/* Packet is received from loopback */ +static inline bool nf_is_loopback_packet(const struct sk_buff *skb) +{ + return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; +} + extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index db10075dd88e..de9da21113a1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum, return ret; } - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); -- cgit v1.2.3 From 62b2bcb49cca72f6d3f39f831127a6ab315a475d Mon Sep 17 00:00:00 2001 From: Fernando Luis Vázquez Cao Date: Mon, 13 Jun 2011 15:04:43 +0000 Subject: IGMP snooping: set mrouters_only flag for IPv4 traffic properly Upon reception of a IGMP/IGMPv2 membership report the kernel sets the mrouters_only flag in a skb that may be a clone of the original skb, which means that sometimes the bridge loses track of membership report packets (cb buffers are tied to a specific skb and not shared) and it ends up forwading join requests to the bridge interface. This can cause unexpected membership timeouts and intermitent/permanent loss of connectivity as described in RFC 4541 [2.1.1. IGMP Forwarding Rules]: A snooping switch should forward IGMP Membership Reports only to those ports where multicast routers are attached. [...] Sending membership reports to other hosts can result, for IGMPv1 and IGMPv2, in unintentionally preventing a host from joining a specific multicast group. Signed-off-by: Fernando Luis Vazquez Cao Tested-by: Hayato Kakuta Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2f14eafdeeab..a6d87c17bb03 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1424,7 +1424,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: -- cgit v1.2.3 From fc2af6c73fc9449cd5894a36bb76b8f8c0e49fd8 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vázquez Cao Date: Mon, 13 Jun 2011 15:06:58 +0000 Subject: IGMP snooping: set mrouters_only flag for IPv6 traffic properly Upon reception of a MGM report packet the kernel sets the mrouters_only flag in a skb that is a clone of the original skb, which means that the bridge loses track of MGM packets (cb buffers are tied to a specific skb and not shared) and it ends up forwading join requests to the bridge interface. This can cause unexpected membership timeouts and intermitent/permanent loss of connectivity as described in RFC 4541 [2.1.1. IGMP Forwarding Rules]: A snooping switch should forward IGMP Membership Reports only to those ports where multicast routers are attached. [...] Sending membership reports to other hosts can result, for IGMPv1 and IGMPv2, in unintentionally preventing a host from joining a specific multicast group. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a6d87c17bb03..29b9812c8da0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1543,7 +1543,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; } -- cgit v1.2.3 From a1b7f85e4f632f9cc342d8a34a3903feaf47a261 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Wed, 15 Jun 2011 12:38:25 +0000 Subject: caif: Bugfix - XOFF removed channel from caif-mux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XOFF was mixed up with DOWN indication, causing causing CAIF channel to be removed from mux and all incoming traffic to be lost after receiving flow-off. Fix this by replacing FLOW_OFF with DOWN notification. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfmuxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 3a66b8c10e09..c23979e79dfa 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -255,7 +255,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { - if ((ctrl == _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND || + if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND || ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && layer->id != 0) { -- cgit v1.2.3 From e3cb78c772de593afa720687ce3abbed8d93b0c3 Mon Sep 17 00:00:00 2001 From: Antoine Reversat Date: Thu, 16 Jun 2011 10:47:13 +0000 Subject: vlan: don't call ndo_vlan_rx_register on hardware that doesn't have vlan support This patch removes the call to ndo_vlan_rx_register if the underlying device doesn't have hardware support for VLAN. Signed-off-by: Antoine Reversat Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c7a581a96894..917ecb93ea28 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -205,7 +205,7 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp) { - if (ops->ndo_vlan_rx_register) + if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX)) ops->ndo_vlan_rx_register(real_dev, ngrp); rcu_assign_pointer(real_dev->vlgrp, ngrp); } -- cgit v1.2.3 From 1eddceadb0d6441cd39b2c38705a8f5fec86e770 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 03:45:15 +0000 Subject: net: rfs: enable RFS before first data packet is received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le jeudi 16 juin 2011 à 23:38 -0400, David Miller a écrit : > From: Ben Hutchings > Date: Fri, 17 Jun 2011 00:50:46 +0100 > > > On Wed, 2011-06-15 at 04:15 +0200, Eric Dumazet wrote: > >> @@ -1594,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) > >> goto discard; > >> > >> if (nsk != sk) { > >> + sock_rps_save_rxhash(nsk, skb->rxhash); > >> if (tcp_child_process(sk, nsk, skb)) { > >> rsk = nsk; > >> goto reset; > >> > > > > I haven't tried this, but it looks reasonable to me. > > > > What about IPv6? The logic in tcp_v6_do_rcv() looks very similar. > > Indeed ipv6 side needs the same fix. > > Eric please add that part and resubmit. And in fact I might stick > this into net-2.6 instead of net-next-2.6 > OK, here is the net-2.6 based one then, thanks ! [PATCH v2] net: rfs: enable RFS before first data packet is received First packet received on a passive tcp flow is not correctly RFS steered. One sock_rps_record_flow() call is missing in inet_accept() But before that, we also must record rxhash when child socket is setup. Signed-off-by: Eric Dumazet CC: Tom Herbert CC: Ben Hutchings CC: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c1926027a26..eae1f676f870 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -676,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); + sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671e33b8..708dc203b034 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fd28711ba5..87551ca568cd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1644,6 +1644,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) -- cgit v1.2.3 From eeb1497277d6b1a0a34ed36b97e18f2bd7d6de0d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 16:25:39 -0400 Subject: inet_diag: fix inet_diag_bc_audit() A malicious user or buggy application can inject code and trigger an infinite loop in inet_diag_bc_audit() Also make sure each instruction is aligned on 4 bytes boundary, to avoid unaligned accesses. Reported-by: Dan Rosenberg Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6ffe94ca5bc9..3267d3898437 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc) return 0; if (cc == len) return 1; - if (op->yes < 4) + if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; @@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc) static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { - const unsigned char *bc = bytecode; + const void *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + const struct inet_diag_bc_op *op = bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4) + if (op->no < 4 || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; break; default: return -EINVAL; } + if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + return -EINVAL; bc += op->yes; len -= op->yes; } -- cgit v1.2.3 From 9aa3c94ce59066f545521033007abb6441706068 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2011 11:59:18 -0700 Subject: ipv4: fix multicast losses Knut Tidemann found that first packet of a multicast flow was not correctly received, and bisected the regression to commit b23dd4fe42b4 (Make output route lookup return rtable directly.) Special thanks to Knut, who provided a very nice bug report, including sample programs to demonstrate the bug. Reported-and-bisectedby: Knut Tidemann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 045f0ec6a4a0..aa13ef105110 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1902,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); + return IS_ERR(rth) ? PTR_ERR(rth) : 0; e_nobufs: return -ENOBUFS; -- cgit v1.2.3 From cefa9993f161c1c2b6b91b7ea2e84a9bfbd43d2e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 19 Jun 2011 16:13:01 -0700 Subject: netpoll: copy dev name of slaves to struct netpoll Otherwise we will not see the name of the slave dev in error message: [ 388.469446] (null): doesn't support polling, aborting. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + net/bridge/br_device.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 652b30e525d0..eafe44a528ac 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1297,6 +1297,7 @@ static inline int slave_enable_netpoll(struct slave *slave) goto out; np->dev = slave->dev; + strlcpy(np->dev_name, slave->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { kfree(np); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index a6b2f86378c7..c188c803c09c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -243,6 +243,7 @@ int br_netpoll_enable(struct net_bridge_port *p) goto out; np->dev = p->dev; + strlcpy(np->dev_name, p->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { -- cgit v1.2.3 From 8ad2475e3555346fbd738e77da12578b97d10505 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 19 Jun 2011 22:31:20 +0000 Subject: ipv4, ping: Remove duplicate icmp.h include Remove the duplicate inclusion of net/icmp.h from net/ipv4/ping.c Signed-off-by: Jesper Juhl Signed-off-by: David S. Miller --- net/ipv4/ping.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9aaa67165f42..39b403f854c6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3