From b9a2f2e450b0f770bb4347ae8d48eb2dea701e24 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 May 2008 10:48:59 -0700
Subject: netlink: Fix nla_parse_nested_compat() to call nla_parse() directly

The purpose of nla_parse_nested_compat() is to parse attributes which
contain a struct followed by a stream of nested attributes.  So far,
it called nla_parse_nested() to parse the stream of nested attributes
which was wrong, as nla_parse_nested() expects a container attribute
as data which holds the attribute stream.  It needs to call
nla_parse() directly while pointing at the next possible alignment
point after the struct in the beginning of the attribute.

With this patch, I can no longer reproduce the reported leftover
warnings.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index a5506c42f03c..112dcdf7e34e 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -772,12 +772,13 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
 					    const struct nla_policy *policy,
 					    int len)
 {
-	if (nla_len(nla) < len)
+	int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+	if (nested_len < 0)
 		return -1;
-	if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr))
-		return nla_parse_nested(tb, maxtype,
-					nla_data(nla) + NLA_ALIGN(len),
-					policy);
+	if (nested_len >= nla_attr_size(0))
+		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+				 nested_len, policy);
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 	return 0;
 }
-- 
cgit v1.2.3


From bc3ed28caaef55e7e3a9316464256353c5f9b1df Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Jun 2008 16:36:54 -0700
Subject: netlink: Improve returned error codes

Make nlmsg_trim(), nlmsg_cancel(), genlmsg_cancel(), and
nla_nest_cancel() void functions.

Return -EMSGSIZE instead of -1 if the provided message buffer is not
big enough.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h |  4 ++--
 include/net/netlink.h   | 20 +++++++++-----------
 net/core/neighbour.c    |  3 ++-
 net/core/rtnetlink.c    |  3 ++-
 net/netlink/attr.c      | 12 ++++++------
 net/netlink/genetlink.c |  6 ++++--
 net/sched/sch_dsmark.c  |  6 ++++--
 net/sched/sch_gred.c    |  3 ++-
 net/sched/sch_hfsc.c    |  2 +-
 net/sched/sch_red.c     |  3 ++-
 net/wireless/nl80211.c  | 12 ++++++++----
 11 files changed, 42 insertions(+), 32 deletions(-)

(limited to 'include/net')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index decdda546829..747c255d1df0 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -162,9 +162,9 @@ static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
  * @skb: socket buffer the message is stored in
  * @hdr: generic netlink message header
  */
-static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr)
+static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 {
-	return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+	nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
 }
 
 /**
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 112dcdf7e34e..dfc3701dfcc3 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -556,14 +556,12 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb)
  * @skb: socket buffer the message is stored in
  * @mark: mark to trim to
  *
- * Trims the message to the provided mark. Returns -1.
+ * Trims the message to the provided mark.
  */
-static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
+static inline void nlmsg_trim(struct sk_buff *skb, const void *mark)
 {
 	if (mark)
 		skb_trim(skb, (unsigned char *) mark - skb->data);
-
-	return -1;
 }
 
 /**
@@ -572,11 +570,11 @@ static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
  * @nlh: netlink message header
  *
  * Removes the complete netlink message including all
- * attributes from the socket buffer again. Returns -1.
+ * attributes from the socket buffer again.
  */
-static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return nlmsg_trim(skb, nlh);
+	nlmsg_trim(skb, nlh);
 }
 
 /**
@@ -775,7 +773,7 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
 
 	if (nested_len < 0)
-		return -1;
+		return -EINVAL;
 	if (nested_len >= nla_attr_size(0))
 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 				 nested_len, policy);
@@ -1080,11 +1078,11 @@ static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
  * @start: container attribute
  *
  * Removes the container attribute and including all nested
- * attributes. Returns -1.
+ * attributes. Returns -EMSGSIZE
  */
-static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
+static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 {
-	return nlmsg_trim(skb, start);
+	nlmsg_trim(skb, start);
 }
 
 /**
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3896de79dfbf..65f01f71b3f3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1714,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 	return nla_nest_end(skb, nest);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, nest);
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
 }
 
 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf857c4dc7b1..a9a77216310e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 	return nla_nest_end(skb, mx);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, mx);
+	nla_nest_cancel(skb, mx);
+	return -EMSGSIZE;
 }
 
 int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index feb326f4a752..47bbf45ae5d7 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -400,13 +400,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
  * @attrlen: length of attribute payload
  * @data: head of attribute payload
  *
- * Returns -1 if the tailroom of the skb is insufficient to store
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
  * the attribute header and payload.
  */
 int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
 {
 	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
-		return -1;
+		return -EMSGSIZE;
 
 	__nla_put(skb, attrtype, attrlen, data);
 	return 0;
@@ -418,13 +418,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
  * @attrlen: length of attribute payload
  * @data: head of attribute payload
  *
- * Returns -1 if the tailroom of the skb is insufficient to store
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
  * the attribute payload.
  */
 int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
 {
 	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
-		return -1;
+		return -EMSGSIZE;
 
 	__nla_put_nohdr(skb, attrlen, data);
 	return 0;
@@ -436,13 +436,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
  * @attrlen: length of attribute payload
  * @data: head of attribute payload
  *
- * Returns -1 if the tailroom of the skb is insufficient to store
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
  * the attribute payload.
  */
 int nla_append(struct sk_buff *skb, int attrlen, const void *data)
 {
 	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
-		return -1;
+		return -EMSGSIZE;
 
 	memcpy(skb_put(skb, attrlen), data, attrlen);
 	return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d16929c9b4bc..f5aa23c3e886 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -554,7 +554,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 	return genlmsg_end(skb, hdr);
 
 nla_put_failure:
-	return genlmsg_cancel(skb, hdr);
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
 }
 
 static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
@@ -590,7 +591,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
 	return genlmsg_end(skb, hdr);
 
 nla_put_failure:
-	return genlmsg_cancel(skb, hdr);
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
 }
 
 static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0df911fd67b1..64465bacbe79 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static const struct Qdisc_class_ops dsmark_class_ops = {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3a9d226ff1e4..c89fba56db56 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -582,7 +582,8 @@ append_opt:
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static void gred_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 87293d0db1d7..fdfaa3fcc16d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 
  nla_put_failure:
 	nla_nest_cancel(skb, nest);
-	return -1;
+	return -EMSGSIZE;
 }
 
 static int
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3dcd493f4f4a..5c569853b9c0 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2bdd4dddc0e1..fb75f265b39c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
@@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_station(struct sk_buff *skb,
@@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_mpath(struct sk_buff *skb,
-- 
cgit v1.2.3


From e51171019bb0e1f9fb57c25bd2e38ce652eaea27 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 29 May 2008 19:55:05 +0900
Subject: [SCTP]: Fix NULL dereference of asoc.

Commit 7cbca67c073263c179f605bdbbdc565ab29d801d ("[IPV6]: Support
Source Address Selection API (RFC5014)") introduced NULL dereference
of asoc to sctp_v6_get_saddr in net/sctp/ipv6.c.
Pointed out by Johann Felix Soden <johfel@users.sourceforge.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/sctp/structs.h | 3 ++-
 net/sctp/ipv6.c            | 5 +++--
 net/sctp/protocol.c        | 3 ++-
 net/sctp/transport.c       | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0ce0443c5b79..917d425f0542 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -548,7 +548,8 @@ struct sctp_af {
 	struct dst_entry *(*get_dst)	(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr);
-	void		(*get_saddr)	(struct sctp_association *asoc,
+	void		(*get_saddr)	(struct sctp_sock *sk,
+					 struct sctp_association *asoc,
 					 struct dst_entry *dst,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e45e44c60635..e4aac3266fcd 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 /* Fills in the source address(saddr) based on the destination address(daddr)
  * and asoc's bind address list.
  */
-static void sctp_v6_get_saddr(struct sctp_association *asoc,
+static void sctp_v6_get_saddr(struct sctp_sock *sk,
+			      struct sctp_association *asoc,
 			      struct dst_entry *dst,
 			      union sctp_addr *daddr,
 			      union sctp_addr *saddr)
@@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 	if (!asoc) {
 		ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
 				   &daddr->v6.sin6_addr,
-				   inet6_sk(asoc->base.sk)->srcprefs,
+				   inet6_sk(&sk->inet.sk)->srcprefs,
 				   &saddr->v6.sin6_addr);
 		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
 				  NIP6(saddr->v6.sin6_addr));
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0ec234b762c2..13ee7fa92e07 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -519,7 +519,8 @@ out:
 /* For v4, the source address is cached in the route entry(dst). So no need
  * to cache it separately and hence this is an empty routine.
  */
-static void sctp_v4_get_saddr(struct sctp_association *asoc,
+static void sctp_v4_get_saddr(struct sctp_sock *sk,
+			      struct sctp_association *asoc,
 			      struct dst_entry *dst,
 			      union sctp_addr *daddr,
 			      union sctp_addr *saddr)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f4938f6c5abe..62082e7b7972 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -291,7 +291,7 @@ void sctp_transport_route(struct sctp_transport *transport,
 	if (saddr)
 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
 	else
-		af->get_saddr(asoc, dst, daddr, &transport->saddr);
+		af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
 
 	transport->dst = dst;
 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
-- 
cgit v1.2.3


From 4bed72e4f5502ea3322f0a00794815fa58951abe Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 27 May 2008 17:37:49 +0900
Subject: [IPV6] ADDRCONF: Allow longer lifetime on 64bit archs.

- Allow longer lifetimes (>= 0x7fffffff/HZ) on 64bit archs
  by using unsigned long.
- Shadow this arithmetic overflow workaround by introducing
  helper functions: addrconf_timeout_fixup() and
  addrconf_finite_timeout().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/addrconf.h | 22 ++++++++++++
 net/ipv6/addrconf.c    | 97 ++++++++++++++++++++++++++------------------------
 net/ipv6/route.c       | 12 ++-----
 3 files changed, 75 insertions(+), 56 deletions(-)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0a2f0372df31..bbd3d583c6e6 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,28 @@ extern void			addrconf_join_solict(struct net_device *dev,
 extern void			addrconf_leave_solict(struct inet6_dev *idev,
 					struct in6_addr *addr);
 
+static inline unsigned long addrconf_timeout_fixup(u32 timeout,
+						    unsigned unit)
+{
+	if (timeout == 0xffffffff)
+		return ~0UL;
+
+	/*
+	 * Avoid arithmetic overflow.
+	 * Assuming unit is constant and non-zero, this "if" statement
+	 * will go away on 64bit archs.
+	 */
+	if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit)
+		return LONG_MAX / unit;
+
+	return timeout;
+}
+
+static inline int addrconf_finite_timeout(unsigned long timeout)
+{
+	return ~timeout;
+}
+
 /*
  *	IPv6 Address Label subsystem (addrlabel.c)
  */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c3b20c5afa3e..147588f4c7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 						onlink = -1;
 
 					spin_lock(&ifa->lock);
-					lifetime = min_t(unsigned long,
-							 ifa->valid_lft, 0x7fffffffUL/HZ);
+
+					lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+					/*
+					 * Note: Because this address is
+					 * not permanent, lifetime <
+					 * LONG_MAX / HZ here.
+					 */
 					if (time_before(expires,
 							ifa->tstamp + lifetime * HZ))
 						expires = ifa->tstamp + lifetime * HZ;
@@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
-	unsigned long rt_expires;
 	struct inet6_dev *in6_dev;
 
 	pinfo = (struct prefix_info *) opt;
@@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	 *	2) Configure prefixes with the auto flag set
 	 */
 
-	if (valid_lft == INFINITY_LIFE_TIME)
-		rt_expires = ~0UL;
-	else if (valid_lft >= 0x7FFFFFFF/HZ) {
+	if (pinfo->onlink) {
+		struct rt6_info *rt;
+		unsigned long rt_expires;
+
 		/* Avoid arithmetic overflow. Really, we could
 		 * save rt_expires in seconds, likely valid_lft,
 		 * but it would require division in fib gc, that it
 		 * not good.
 		 */
-		rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
-	} else
-		rt_expires = valid_lft * HZ;
+		if (HZ > USER_HZ)
+			rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
+		else
+			rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
 
-	/*
-	 * We convert this (in jiffies) to clock_t later.
-	 * Avoid arithmetic overflow there as well.
-	 * Overflow can happen only if HZ < USER_HZ.
-	 */
-	if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
-		rt_expires = 0x7FFFFFFF / USER_HZ;
+		if (addrconf_finite_timeout(rt_expires))
+			rt_expires *= HZ;
 
-	if (pinfo->onlink) {
-		struct rt6_info *rt;
 		rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
 				dev->ifindex, 1);
 
@@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 			if (valid_lft == 0) {
 				ip6_del_rt(rt);
 				rt = NULL;
-			} else if (~rt_expires) {
+			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
 				rt->rt6i_expires = jiffies + rt_expires;
 				rt->rt6i_flags |= RTF_EXPIRES;
@@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 				rt->rt6i_expires = 0;
 			}
 		} else if (valid_lft) {
-			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
 			clock_t expires = 0;
-			if (~rt_expires) {
+			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+			if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
 				flags |= RTF_EXPIRES;
 				expires = jiffies_to_clock_t(rt_expires);
@@ -2036,6 +2035,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 	int scope;
 	u32 flags;
 	clock_t expires;
+	unsigned long timeout;
 
 	ASSERT_RTNL();
 
@@ -2055,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 
 	scope = ipv6_addr_scope(pfx);
 
-	if (valid_lft == INFINITY_LIFE_TIME) {
-		ifa_flags |= IFA_F_PERMANENT;
-		flags = 0;
-		expires = 0;
-	} else {
-		if (valid_lft >= 0x7FFFFFFF/HZ)
-			valid_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
 		flags = RTF_EXPIRES;
-		expires = jiffies_to_clock_t(valid_lft * HZ);
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	if (prefered_lft == 0)
-		ifa_flags |= IFA_F_DEPRECATED;
-	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-		 (prefered_lft != INFINITY_LIFE_TIME))
-		prefered_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
 
 	ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
 
@@ -3175,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
 {
 	u32 flags;
 	clock_t expires;
+	unsigned long timeout;
 
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
-	if (valid_lft == INFINITY_LIFE_TIME) {
-		ifa_flags |= IFA_F_PERMANENT;
-		flags = 0;
-		expires = 0;
-	} else {
-		if (valid_lft >= 0x7FFFFFFF/HZ)
-			valid_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
 		flags = RTF_EXPIRES;
-		expires = jiffies_to_clock_t(valid_lft * HZ);
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	if (prefered_lft == 0)
-		ifa_flags |= IFA_F_DEPRECATED;
-	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-		 (prefered_lft != INFINITY_LIFE_TIME))
-		prefered_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
 
 	spin_lock_bh(&ifp->lock);
 	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48534c6c0735..220cffe9e63b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	struct route_info *rinfo = (struct route_info *) opt;
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
-	u32 lifetime;
+	unsigned long lifetime;
 	struct rt6_info *rt;
 
 	if (len < sizeof(struct route_info)) {
@@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 
-	lifetime = ntohl(rinfo->lifetime);
-	if (lifetime == 0xffffffff) {
-		/* infinity */
-	} else if (lifetime > 0x7fffffff/HZ - 1) {
-		/* Avoid arithmetic overflow */
-		lifetime = 0x7fffffff/HZ - 1;
-	}
+	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 
 	if (rinfo->length == 3)
 		prefix = (struct in6_addr *)rinfo->prefix;
@@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 
 	if (rt) {
-		if (lifetime == 0xffffffff) {
+		if (!addrconf_finite_timeout(lifetime)) {
 			rt->rt6i_flags &= ~RTF_EXPIRES;
 		} else {
 			rt->rt6i_expires = jiffies + HZ * lifetime;
-- 
cgit v1.2.3


From 91e1908f569dd96a25a3947de8771e6cc93999dd Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 4 Jun 2008 13:02:49 +0900
Subject: [IPV6] NETNS: Handle ancillary data in appropriate namespace.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/transp_v6.h  | 3 ++-
 net/ipv6/datagram.c      | 7 ++++---
 net/ipv6/ip6_flowlabel.c | 2 +-
 net/ipv6/ipv6_sockglue.c | 2 +-
 net/ipv6/raw.c           | 2 +-
 net/ipv6/udp.c           | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 27394e0447d8..112934a3288d 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -40,7 +40,8 @@ extern int			datagram_recv_ctl(struct sock *sk,
 						  struct msghdr *msg,
 						  struct sk_buff *skb);
 
-extern int			datagram_send_ctl(struct msghdr *msg,
+extern int			datagram_send_ctl(struct net *net,
+						  struct msghdr *msg,
 						  struct flowi *fl,
 						  struct ipv6_txoptions *opt,
 						  int *hlimit, int *tclass);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 53e3883f7666..b9c2de84a8a2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	return 0;
 }
 
-int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
+int datagram_send_ctl(struct net *net,
+		      struct msghdr *msg, struct flowi *fl,
 		      struct ipv6_txoptions *opt,
 		      int *hlimit, int *tclass)
 {
@@ -540,7 +541,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
 
 			if (fl->oif) {
-				dev = dev_get_by_index(&init_net, fl->oif);
+				dev = dev_get_by_index(net, fl->oif);
 				if (!dev)
 					return -ENODEV;
 			} else if (addr_type & IPV6_ADDR_LINKLOCAL)
@@ -548,7 +549,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
-				if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
+				if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
 						   strict ? dev : NULL, 0))
 					err = -EINVAL;
 				else
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index eb7a940310f4..37a4e777e347 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		msg.msg_control = (void*)(fl->opt+1);
 		flowi.oif = 0;
 
-		err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
+		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9293b9f0ac23..3eef8e5b3636 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -416,7 +416,7 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk);
+		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 232e0dc45bf5..603df76e0522 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 47123bf5eb0f..1b35c4722004 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -731,7 +731,7 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
-- 
cgit v1.2.3


From 36d926b94a9908937593e5669162305a071b9cc3 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 4 Jun 2008 15:49:07 +0400
Subject: [IPV6]: inet_sk(sk)->cork.opt leak

IPv6 UDP sockets wth IPv4 mapped address use udp_sendmsg to send the data
actually. In this case ip_flush_pending_frames should be called instead
of ip6_flush_pending_frames.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/udp.h | 1 +
 net/ipv4/udp.c    | 3 ++-
 net/ipv6/udp.c    | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 3e55a99b0ba3..ccce83707046 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -135,6 +135,7 @@ extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len);
+extern void	udp_flush_pending_frames(struct sock *sk);
 
 extern int	udp_rcv(struct sk_buff *skb);
 extern int	udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d63..56fcda3694ba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
 /*
  * Throw away all pending data and cancel the corking. Socket is locked.
  */
-static void udp_flush_pending_frames(struct sock *sk)
+void udp_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
 		ip_flush_pending_frames(sk);
 	}
 }
+EXPORT_SYMBOL(udp_flush_pending_frames);
 
 /**
  * 	udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1b35c4722004..dd309626ae9a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
-	if (up->pending) {
+	if (up->pending == AF_INET)
+		udp_flush_pending_frames(sk);
+	else if (up->pending) {
 		up->len = 0;
 		up->pending = 0;
 		ip6_flush_pending_frames(sk);
-- 
cgit v1.2.3


From a6465234814efda9ed1dccdba852953f7508e827 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:38:43 -0700
Subject: sctp: Correctly implement Fast Recovery cwnd manipulations.

Correctly keep track of Fast Recovery state and do not reduce
congestion window multiple times during sucht state.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  8 +++++++-
 net/sctp/transport.c       | 44 ++++++++++++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 917d425f0542..67592072a32e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -902,7 +902,10 @@ struct sctp_transport {
 	 *		calculation completes (i.e. the DATA chunk
 	 *		is SACK'd) clear this flag.
 	 */
-	int rto_pending;
+	__u8 rto_pending;
+
+	/* Flag to track the current fast recovery state */
+	__u8 fast_recovery;
 
 	/*
 	 * These are the congestion stats.
@@ -921,6 +924,9 @@ struct sctp_transport {
 	/* Data that has been sent, but not acknowledged. */
 	__u32 flight_size;
 
+	/* TSN marking the fast recovery exit point */
+	__u32 fast_recovery_exit;
+
 	/* Destination */
 	struct dst_entry *dst;
 	/* Source address. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 62082e7b7972..9647fb277221 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	peer->rttvar = 0;
 	peer->srtt = 0;
 	peer->rto_pending = 0;
+	peer->fast_recovery = 0;
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_used = jiffies;
@@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 	cwnd = transport->cwnd;
 	flight_size = transport->flight_size;
 
+	/* See if we need to exit Fast Recovery first */
+	if (transport->fast_recovery &&
+	    TSN_lte(transport->fast_recovery_exit, sack_ctsn))
+		transport->fast_recovery = 0;
+
 	/* The appropriate cwnd increase algorithm is performed if, and only
-	 * if the cumulative TSN has advanced and the congestion window is
+	 * if the cumulative TSN whould advanced and the congestion window is
 	 * being fully utilized.
 	 */
-	if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
+	if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
 	    (flight_size < cwnd))
 		return;
 
@@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 	pmtu = transport->asoc->pathmtu;
 
 	if (cwnd <= ssthresh) {
-		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
-		 * than or equal to ssthresh an SCTP endpoint MUST use the
-		 * slow start algorithm to increase cwnd only if the current
-		 * congestion window is being fully utilized and an incoming
-		 * SACK advances the Cumulative TSN Ack Point. Only when these
-		 * two conditions are met can the cwnd be increased otherwise
-		 * the cwnd MUST not be increased. If these conditions are met
-		 * then cwnd MUST be increased by at most the lesser of
-		 * 1) the total size of the previously outstanding DATA
-		 * chunk(s) acknowledged, and 2) the destination's path MTU.
+		/* RFC 4960 7.2.1
+		 * o  When cwnd is less than or equal to ssthresh, an SCTP
+		 *    endpoint MUST use the slow-start algorithm to increase
+		 *    cwnd only if the current congestion window is being fully
+		 *    utilized, an incoming SACK advances the Cumulative TSN
+		 *    Ack Point, and the data sender is not in Fast Recovery.
+		 *    Only when these three conditions are met can the cwnd be
+		 *    increased; otherwise, the cwnd MUST not be increased.
+		 *    If these conditions are met, then cwnd MUST be increased
+		 *    by, at most, the lesser of 1) the total size of the
+		 *    previously outstanding DATA chunk(s) acknowledged, and
+		 *    2) the destination's path MTU.  This upper bound protects
+		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
 		 */
+		if (transport->fast_recovery)
+			return;
+
 		if (bytes_acked > pmtu)
 			cwnd += pmtu;
 		else
@@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      cwnd = ssthresh
 		 *      partial_bytes_acked = 0
 		 */
+		if (transport->fast_recovery)
+			return;
+
+		/* Mark Fast recovery */
+		transport->fast_recovery = 1;
+		transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+
 		transport->ssthresh = max(transport->cwnd/2,
 					  4*transport->asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
@@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->flight_size = 0;
 	t->error_count = 0;
 	t->rto_pending = 0;
+	t->fast_recovery = 0;
 
 	/* Initialize the state information for SFR-CACC */
 	t->cacc.changeover_active = 0;
-- 
cgit v1.2.3


From 62aeaff5ccd96462b7077046357a6d7886175a57 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:39:11 -0700
Subject: sctp: Start T3-RTX timer when fast retransmitting lowest TSN

When we are trying to fast retransmit the lowest outstanding TSN, we
need to restart the T3-RTX timer, so that subsequent timeouts will
correctly tag all the packets necessary for retransmissions.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  5 ++++-
 net/sctp/outqueue.c        | 42 +++++++++++++++++++++++++++++++-----------
 net/sctp/transport.c       |  4 ++--
 3 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 67592072a32e..714dc43c0345 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1051,7 +1051,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 			  struct sctp_sock *);
 void sctp_transport_pmtu(struct sctp_transport *);
 void sctp_transport_free(struct sctp_transport *);
-void sctp_transport_reset_timers(struct sctp_transport *);
+void sctp_transport_reset_timers(struct sctp_transport *, int);
 void sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1141,6 +1141,9 @@ struct sctp_outq {
 	/* How many unackd bytes do we have in-flight?	*/
 	__u32 outstanding_bytes;
 
+	/* Are we doing fast-rtx on this queue */
+	char fast_rtx;
+
 	/* Corked? */
 	char cork;
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 59edfd25a19c..5d3c441e84d3 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
 
+	q->fast_rtx = 0;
 	q->outstanding_bytes = 0;
 	q->empty = 1;
 	q->cork  = 0;
@@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 	case SCTP_RTXR_FAST_RTX:
 		SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
+		q->fast_rtx = 1;
 		break;
 	case SCTP_RTXR_PMTUD:
 		SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
@@ -543,10 +545,13 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
 	struct sctp_association *asoc;
+	int fast_rtx;
 	int error = 0;
+	int timer = 0;
 
 	asoc = q->asoc;
 	lqueue = &q->retransmit;
+	fast_rtx = q->fast_rtx;
 
 	/* RFC 2960 6.3.3 Handle T3-rtx Expiration
 	 *
@@ -587,13 +592,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		switch (status) {
 		case SCTP_XMIT_PMTU_FULL:
 			/* Send this packet.  */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* If we are retransmitting, we should only
 			 * send a single packet.
 			 */
-			if (rtx_timeout) {
+			if (rtx_timeout || fast_rtx) {
 				list_add(lchunk, lqueue);
 				lchunk = NULL;
 			}
@@ -603,8 +607,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
 		case SCTP_XMIT_RWND_FULL:
 			/* Send this packet. */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* Stop sending DATA as there is no more room
 			 * at the receiver.
@@ -615,8 +618,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
 		case SCTP_XMIT_NAGLE_DELAY:
 			/* Send this packet. */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* Stop sending DATA because of nagle delay. */
 			list_add(lchunk, lqueue);
@@ -635,7 +637,14 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			if (chunk->fast_retransmit > 0)
 				chunk->fast_retransmit = -1;
 
-			*start_timer = 1;
+			/* Force start T3-rtx timer when fast retransmitting
+			 * the earliest outstanding TSN
+			 */
+			if (!timer && fast_rtx &&
+			    ntohl(chunk->subh.data_hdr->tsn) ==
+					     asoc->ctsn_ack_point + 1)
+				timer = 2;
+
 			q->empty = 0;
 
 			/* Retrieve a new chunk to bundle. */
@@ -643,12 +652,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			break;
 		}
 
+		/* Set the timer if there were no errors */
+		if (!error && !timer)
+			timer = 1;
+
 		/* If we are here due to a retransmit timeout or a fast
 		 * retransmit and if there are any chunks left in the retransmit
 		 * queue that could not fit in the PMTU sized packet, they need
 		 * to be marked as ineligible for a subsequent fast retransmit.
 		 */
-		if (rtx_timeout && !lchunk) {
+		if (rtx_timeout && fast_rtx) {
 			list_for_each_entry(chunk1, lqueue, transmitted_list) {
 				if (chunk1->fast_retransmit > 0)
 					chunk1->fast_retransmit = -1;
@@ -656,6 +669,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		}
 	}
 
+	*start_timer = timer;
+
+	/* Clear fast retransmit hint */
+	if (fast_rtx)
+		q->fast_rtx = 0;
+
 	return error;
 }
 
@@ -862,7 +881,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 						    rtx_timeout, &start_timer);
 
 			if (start_timer)
-				sctp_transport_reset_timers(transport);
+				sctp_transport_reset_timers(transport,
+							    start_timer-1);
 
 			/* This can happen on COOKIE-ECHO resend.  Only
 			 * one chunk can get bundled with a COOKIE-ECHO.
@@ -977,7 +997,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport);
+			sctp_transport_reset_timers(transport, start_timer-1);
 
 			q->empty = 0;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 9647fb277221..3f34f61221ec 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -191,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 /* Start T3_rtx timer if it is not already running and update the heartbeat
  * timer.  This routine is called every time a DATA chunk is sent.
  */
-void sctp_transport_reset_timers(struct sctp_transport *transport)
+void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
 {
 	/* RFC 2960 6.3.2 Retransmission Timer Rules
 	 *
@@ -201,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
 	 * address.
 	 */
 
-	if (!timer_pending(&transport->T3_rtx_timer))
+	if (force || !timer_pending(&transport->T3_rtx_timer))
 		if (!mod_timer(&transport->T3_rtx_timer,
 			       jiffies + transport->rto))
 			sctp_transport_hold(transport);
-- 
cgit v1.2.3


From b9031d9d87b24e24cd32ea15b5f4220a1e8da909 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:40:15 -0700
Subject: sctp: Fix ECN markings for IPv6

Commit e9df2e8fd8fbc95c57dbd1d33dada66c4627b44c ("[IPV6]: Use
appropriate sock tclass setting for routing lookup.") also changed the
way that ECN capable transports mark this capability in IPv6.  As a
result, SCTP was not marking ECN capablity because the traffic class
was never set.  This patch brings back the markings for IPv6 traffic.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 net/sctp/ipv6.c            | 6 ++++++
 net/sctp/output.c          | 2 +-
 net/sctp/protocol.c        | 6 ++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 714dc43c0345..7f25195f9855 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -588,6 +588,7 @@ struct sctp_af {
 	int		(*is_ce)	(const struct sk_buff *sk);
 	void		(*seq_dump_addr)(struct seq_file *seq,
 					 union sctp_addr *addr);
+	void		(*ecn_capable)(struct sock *sk);
 	__u16		net_header_len;
 	int		sockaddr_len;
 	sa_family_t	sa_family;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e4aac3266fcd..a2f4d4d51593 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -727,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 	seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
 }
 
+static void sctp_v6_ecn_capable(struct sock *sk)
+{
+	inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+}
+
 /* Initialize a PF_INET6 socket msg_name. */
 static void sctp_inet6_msgname(char *msgname, int *addr_len)
 {
@@ -997,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = {
 	.skb_iif	   = sctp_v6_skb_iif,
 	.is_ce		   = sctp_v6_is_ce,
 	.seq_dump_addr	   = sctp_v6_seq_dump_addr,
+	.ecn_capable	   = sctp_v6_ecn_capable,
 	.net_header_len	   = sizeof(struct ipv6hdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
 #ifdef CONFIG_COMPAT
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cf4f9fb6819d..6d45bae93b46 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * Note: The works for IPv6 layer checks this bit too later
 	 * in transmission.  See IP6_ECN_flow_xmit().
 	 */
-	INET_ECN_xmit(nskb->sk);
+	(*tp->af_specific->ecn_capable)(nskb->sk);
 
 	/* Set up the IP options.  */
 	/* BUG: not implemented
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 56bdaf7fc425..b435a193c5df 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -617,6 +617,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 	seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
 }
 
+static void sctp_v4_ecn_capable(struct sock *sk)
+{
+	INET_ECN_xmit(sk);
+}
+
 /* Event handler for inet address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
  * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -935,6 +940,7 @@ static struct sctp_af sctp_af_inet = {
 	.skb_iif	   = sctp_v4_skb_iif,
 	.is_ce		   = sctp_v4_is_ce,
 	.seq_dump_addr	   = sctp_v4_seq_dump_addr,
+	.ecn_capable	   = sctp_v4_ecn_capable,
 	.net_header_len	   = sizeof(struct iphdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in),
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From 45d465bc237ab1e1ebb4c65b9b318830dafb7509 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 10 Jun 2008 12:37:42 -0700
Subject: ipv4: Remove unused declaration from include/net/tcp.h.

- The tcp_unhash() method in /include/net/tcp.h is no more needed, as the
unhash method in tcp_prot structure is now inet_unhash (instead of
tcp_unhash in the
past); see tcp_prot structure in net/ipv4/tcp_ipv4.c.

- So, this patch removes tcp_unhash() declaration from include/net/tcp.h

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 633147cb6bbc..d448310c82c1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -433,7 +433,6 @@ extern struct sk_buff *		tcp_make_synack(struct sock *sk,
 
 extern int			tcp_disconnect(struct sock *sk, int flags);
 
-extern void			tcp_unhash(struct sock *sk);
 
 /* From syncookies.c */
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-- 
cgit v1.2.3


From ce4a7d0d48bbaed78ccbb0bafb9229651a40303a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Jun 2008 12:39:35 -0700
Subject: inet{6}_request_sock: Init ->opt and ->pktopts in the constructor

Wei Yongjun noticed that we may call reqsk_free on request sock objects where
the opt fields may not be initialized, fix it by introducing inet_reqsk_alloc
where we initialize ->opt to NULL and set ->pktopts to NULL in
inet6_reqsk_alloc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h    |  4 +++-
 include/net/inet_sock.h | 10 ++++++++++
 net/dccp/ipv4.c         |  3 +--
 net/dccp/ipv6.c         |  1 -
 net/ipv4/syncookies.c   |  3 +--
 net/ipv4/tcp_ipv4.c     |  2 +-
 net/ipv6/syncookies.c   |  1 -
 net/ipv6/tcp_ipv6.c     |  1 -
 8 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 10b666b61add..cde056e08181 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -396,8 +396,10 @@ static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *op
 {
 	struct request_sock *req = reqsk_alloc(ops);
 
-	if (req != NULL)
+	if (req != NULL) {
 		inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
+		inet6_rsk(req)->pktopts = NULL;
+	}
 
 	return req;
 }
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a42cd63d241a..9fabe5b38912 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -197,4 +197,14 @@ static inline int inet_iif(const struct sk_buff *skb)
 	return skb->rtable->rt_iif;
 }
 
+static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
+{
+	struct request_sock *req = reqsk_alloc(ops);
+
+	if (req != NULL)
+		inet_rsk(req)->opt = NULL;
+
+	return req;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index c22a3780c14e..37d27bcb361f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&dccp_request_sock_ops);
+	req = inet_reqsk_alloc(&dccp_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
@@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = ip_hdr(skb)->daddr;
 	ireq->rmt_addr = ip_hdr(skb)->saddr;
-	ireq->opt	= NULL;
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9b1129bb7ece..f7fe2a572d7b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq6 = inet6_rsk(req);
 	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
-	ireq6->pktopts	= NULL;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d64..d182a2a26291 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		cookie_check_timestamp(&tcp_opt);
 
 	ret = NULL;
-	req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
 		goto out;
 
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->opt		= NULL;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2f..4f8485c67d1a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&tcp_request_sock_ops);
+	req = inet_reqsk_alloc(&tcp_request_sock_ops);
 	if (!req)
 		goto drop;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 938ce4ecde55..3ecc1157994e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
-	ireq6->pktopts = NULL;
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 715965f0fac0..cb46749d4c32 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	treq->pktopts = NULL;
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
-- 
cgit v1.2.3


From ec0a196626bd12e0ba108d7daa6d95a4fb25c2c5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jun 2008 16:31:35 -0700
Subject: tcp: Revert 'process defer accept as established' changes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts two changesets, ec3c0982a2dd1e671bad8e9d26c28dcba0039d87
("[TCP]: TCP_DEFER_ACCEPT updates - process as established") and
the follow-on bug fix 9ae27e0adbf471c7a6b80102e38e1d5a346b3b38
("tcp: Fix slab corruption with ipv6 and tcp6fuzz").

This change causes several problems, first reported by Ingo Molnar
as a distcc-over-loopback regression where connections were getting
stuck.

Ilpo Järvinen first spotted the locking problems.  The new function
added by this code, tcp_defer_accept_check(), only has the
child socket locked, yet it is modifying state of the parent
listening socket.

Fixing that is non-trivial at best, because we can't simply just grab
the parent listening socket lock at this point, because it would
create an ABBA deadlock.  The normal ordering is parent listening
socket --> child socket, but this code path would require the
reverse lock ordering.

Next is a problem noticed by Vitaliy Gusev, he noted:

----------------------------------------
>--- a/net/ipv4/tcp_timer.c
>+++ b/net/ipv4/tcp_timer.c
>@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
> 		goto death;
> 	}
>
>+	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
>+		tcp_send_active_reset(sk, GFP_ATOMIC);
>+		goto death;

Here socket sk is not attached to listening socket's request queue. tcp_done()
will not call inet_csk_destroy_sock() (and tcp_v4_destroy_sock() which should
release this sk) as socket is not DEAD. Therefore socket sk will be lost for
freeing.
----------------------------------------

Finally, Alexey Kuznetsov argues that there might not even be any
real value or advantage to these new semantics even if we fix all
of the bugs:

----------------------------------------
Hiding from accept() sockets with only out-of-order data only
is the only thing which is impossible with old approach. Is this really
so valuable? My opinion: no, this is nothing but a new loophole
to consume memory without control.
----------------------------------------

So revert this thing for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h             |  7 -------
 include/net/request_sock.h      |  4 ++--
 include/net/tcp.h               |  1 -
 net/ipv4/inet_connection_sock.c | 11 +++++++---
 net/ipv4/tcp.c                  | 18 ++++++++++-------
 net/ipv4/tcp_input.c            | 45 -----------------------------------------
 net/ipv4/tcp_ipv4.c             |  8 --------
 net/ipv4/tcp_minisocks.c        | 32 +++++++++++------------------
 net/ipv4/tcp_timer.c            |  5 -----
 9 files changed, 33 insertions(+), 98 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 18e62e3d406f..b31b6b74aa28 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,11 +239,6 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 	return (struct tcp_request_sock *)req;
 }
 
-struct tcp_deferred_accept_info {
-	struct sock *listen_sk;
-	struct request_sock *request;
-};
-
 struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
@@ -379,8 +374,6 @@ struct tcp_sock {
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 	int			linger2;
 
-	struct tcp_deferred_accept_info defer_tcp_accept;
-
 	unsigned long last_synq_overflow; 
 
 	u32	tso_deferred;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b220b5f624de..0c96e7bed5db 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -115,8 +115,8 @@ struct request_sock_queue {
 	struct request_sock	*rskq_accept_head;
 	struct request_sock	*rskq_accept_tail;
 	rwlock_t		syn_wait_lock;
-	u16			rskq_defer_accept;
-	/* 2 bytes hole, try to pack */
+	u8			rskq_defer_accept;
+	/* 3 bytes hole, try to pack */
 	struct listen_sock	*listen_opt;
 };
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d448310c82c1..cf54034019d9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,7 +139,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define MAX_TCP_KEEPINTVL	32767
 #define MAX_TCP_KEEPCNT		127
 #define MAX_TCP_SYNCNT		127
-#define MAX_TCP_ACCEPT_DEFERRED 65535
 
 #define TCP_SYNQ_INTERVAL	(HZ/5)	/* Period of SYNACK timer */
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff21..045e799d3e1d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	struct inet_connection_sock *icsk = inet_csk(parent);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct listen_sock *lopt = queue->listen_opt;
-	int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
 	unsigned long now = jiffies;
 	struct request_sock **reqp, *req;
 	int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		}
 	}
 
+	if (queue->rskq_defer_accept)
+		max_retries = queue->rskq_defer_accept;
+
 	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
 	i = lopt->clock_hand;
 
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		reqp=&lopt->syn_table[i];
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
-				if (req->retrans < thresh &&
-				    !req->rsk_ops->rtx_syn_ack(parent, req)) {
+				if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
+				    (inet_rsk(req)->acked ||
+				     !req->rsk_ops->rtx_syn_ack(parent, req))) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab66683b8043..fc54a48fde1e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		if (val < 0) {
-			err = -EINVAL;
-		} else {
-			if (val > MAX_TCP_ACCEPT_DEFERRED)
-				val = MAX_TCP_ACCEPT_DEFERRED;
-			icsk->icsk_accept_queue.rskq_defer_accept = val;
+		icsk->icsk_accept_queue.rskq_defer_accept = 0;
+		if (val > 0) {
+			/* Translate value in seconds to number of
+			 * retransmits */
+			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
+			       val > ((TCP_TIMEOUT_INIT / HZ) <<
+				       icsk->icsk_accept_queue.rskq_defer_accept))
+				icsk->icsk_accept_queue.rskq_defer_accept++;
+			icsk->icsk_accept_queue.rskq_defer_accept++;
 		}
 		break;
 
@@ -2299,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = icsk->icsk_accept_queue.rskq_defer_accept;
+		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eba873e9b560..cad73b7dfef0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 	}
 }
 
-static int tcp_defer_accept_check(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (tp->defer_tcp_accept.request) {
-		int queued_data =  tp->rcv_nxt - tp->copied_seq;
-		int hasfin =  !skb_queue_empty(&sk->sk_receive_queue) ?
-			tcp_hdr((struct sk_buff *)
-				sk->sk_receive_queue.prev)->fin : 0;
-
-		if (queued_data && hasfin)
-			queued_data--;
-
-		if (queued_data &&
-		    tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
-			if (sock_flag(sk, SOCK_KEEPOPEN)) {
-				inet_csk_reset_keepalive_timer(sk,
-							       keepalive_time_when(tp));
-			} else {
-				inet_csk_delete_keepalive_timer(sk);
-			}
-
-			inet_csk_reqsk_queue_add(
-				tp->defer_tcp_accept.listen_sk,
-				tp->defer_tcp_accept.request,
-				sk);
-
-			tp->defer_tcp_accept.listen_sk->sk_data_ready(
-				tp->defer_tcp_accept.listen_sk, 0);
-
-			sock_put(tp->defer_tcp_accept.listen_sk);
-			sock_put(sk);
-			tp->defer_tcp_accept.listen_sk = NULL;
-			tp->defer_tcp_accept.request = NULL;
-		} else if (hasfin ||
-			   tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
-			tcp_reset(sk);
-			return -1;
-		}
-	}
-	return 0;
-}
-
 static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4944,8 +4901,6 @@ step5:
 
 	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
-
-	tcp_defer_accept_check(sk);
 	return 0;
 
 csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4f8485c67d1a..97a230026e13 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
-	if (tp->defer_tcp_accept.request) {
-		reqsk_free(tp->defer_tcp_accept.request);
-		sock_put(tp->defer_tcp_accept.listen_sk);
-		sock_put(sk);
-		tp->defer_tcp_accept.listen_sk = NULL;
-		tp->defer_tcp_accept.request = NULL;
-	}
-
 	atomic_dec(&tcp_sockets_allocated);
 
 	return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	   does sequence test, SYN is truncated, and thus we consider
 	   it a bare ACK.
 
-	   Both ends (listening sockets) accept the new incoming
-	   connection and try to talk to each other. 8-)
+	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
+	   bare ACK.  Otherwise, we create an established connection.  Both
+	   ends (listening sockets) accept the new incoming connection and try
+	   to talk to each other. 8-)
 
 	   Note: This case is both harmless, and rare.  Possibility is about the
 	   same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		if (!(flg & TCP_FLAG_ACK))
 			return NULL;
 
+		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+			inet_rsk(req)->acked = 1;
+			return NULL;
+		}
+
 		/* OK, ACK is valid, create big socket and
 		 * feed this segment to it. It will repeat all
 		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		inet_csk_reqsk_queue_unlink(sk, req, prev);
 		inet_csk_reqsk_queue_removed(sk, req);
 
-		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
-		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-
-			/* the accept queue handling is done is est recv slow
-			 * path so lets make sure to start there
-			 */
-			tcp_sk(child)->pred_flags = 0;
-			sock_hold(sk);
-			sock_hold(child);
-			tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
-			tcp_sk(child)->defer_tcp_accept.request = req;
-
-			inet_csk_reset_keepalive_timer(child,
-						       inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
-		} else {
-			inet_csk_reqsk_queue_add(sk, req, child);
-		}
-
+		inet_csk_reqsk_queue_add(sk, req, child);
 		return child;
 
 	listen_overflow:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2aa..63ed9d6830e7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
 		goto death;
 	}
 
-	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
-		tcp_send_active_reset(sk, GFP_ATOMIC);
-		goto death;
-	}
-
 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 		goto out;
 
-- 
cgit v1.2.3


From 68b80f11380889996aa7eadba29dbbb5c29a5864 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 17 Jun 2008 15:51:47 -0700
Subject: netfilter: nf_nat: fix RCU races

Fix three ct_extend/NAT extension related races:

- When cleaning up the extension area and removing it from the bysource hash,
  the nat->ct pointer must not be set to NULL since it may still be used in
  a RCU read side

- When replacing a NAT extension area in the bysource hash, the nat->ct
  pointer must be assigned before performing the replacement

- When reallocating extension storage in ct_extend, the old memory must
  not be freed immediately since it may still be used by a RCU read side

Possibly fixes https://bugzilla.redhat.com/show_bug.cgi?id=449315
and/or http://bugzilla.kernel.org/show_bug.cgi?id=10875

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_extend.h | 1 +
 net/ipv4/netfilter/nf_nat_core.c            | 3 +--
 net/netfilter/nf_conntrack_extend.c         | 9 ++++++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index f736e842977f..f80c0ed6d870 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -15,6 +15,7 @@ enum nf_ct_ext_id
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
+	struct rcu_head rcu;
 	u8 offset[NF_CT_EXT_NUM];
 	u8 len;
 	char data[0];
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 04578593e100..d2a887fc8d9b 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 
 	spin_lock_bh(&nf_nat_lock);
 	hlist_del_rcu(&nat->bysource);
-	nat->ct = NULL;
 	spin_unlock_bh(&nf_nat_lock);
 }
 
@@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old)
 		return;
 
 	spin_lock_bh(&nf_nat_lock);
-	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
 	new_nat->ct = ct;
+	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
 	spin_unlock_bh(&nf_nat_lock);
 }
 
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bcc19fa4ed1e..8a3f8b34e466 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -59,12 +59,19 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 	if (!*ext)
 		return NULL;
 
+	INIT_RCU_HEAD(&(*ext)->rcu);
 	(*ext)->offset[id] = off;
 	(*ext)->len = len;
 
 	return (void *)(*ext) + off;
 }
 
+static void __nf_ct_ext_free_rcu(struct rcu_head *head)
+{
+	struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu);
+	kfree(ext);
+}
+
 void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
 	struct nf_ct_ext *new;
@@ -106,7 +113,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 					(void *)ct->ext + ct->ext->offset[i]);
 			rcu_read_unlock();
 		}
-		kfree(ct->ext);
+		call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu);
 		ct->ext = new;
 	}
 
-- 
cgit v1.2.3


From f630e43a215a3129d0c1173cae0bce6ea4855cf7 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 19 Jun 2008 16:33:57 -0700
Subject: ipv6: Drop packets for loopback address from outside of the box.

[ Based upon original report and patch by Karsten Keil.  Karsten
  has verified that this fixes the TAHI test case "ICMPv6 test
  v6LC.5.1.2 Part F". -DaveM ]

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h   | 6 ++++++
 net/ipv6/ip6_input.c | 9 +++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e0a612bc9c4e..f422f7218e1c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -367,6 +367,12 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
 		 a->s6_addr32[2] | a->s6_addr32[3] ) == 0); 
 }
 
+static inline int ipv6_addr_loopback(const struct in6_addr *a)
+{
+	return ((a->s6_addr32[0] | a->s6_addr32[1] |
+		 a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0);
+}
+
 static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
 {
 	return ((a->s6_addr32[0] | a->s6_addr32[1] |
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 4e5c8615832c..17eb48b8e329 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
+	/*
+	 * RFC4291 2.5.3
+	 * A packet received on an interface with a destination address
+	 * of loopback must be dropped.
+	 */
+	if (!(dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_loopback(&hdr->daddr))
+		goto err;
+
 	skb->transport_header = skb->network_header + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
-- 
cgit v1.2.3


From b9f75f45a6b46a0ab4eb0857d437a0845871f314 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 20 Jun 2008 22:16:51 -0700
Subject: netns: Don't receive new packets in a dead network namespace.

Alexey Dobriyan <adobriyan@gmail.com> writes:
> Subject: ICMP sockets destruction vs ICMP packets oops

> After icmp_sk_exit() nuked ICMP sockets, we get an interrupt.
> icmp_reply() wants ICMP socket.
>
> Steps to reproduce:
>
> 	launch shell in new netns
> 	move real NIC to netns
> 	setup routing
> 	ping -i 0
> 	exit from shell
>
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
> IP: [<ffffffff803fce17>] icmp_sk+0x17/0x30
> PGD 17f3cd067 PUD 17f3ce067 PMD 0
> Oops: 0000 [1] PREEMPT SMP DEBUG_PAGEALLOC
> CPU 0
> Modules linked in: usblp usbcore
> Pid: 0, comm: swapper Not tainted 2.6.26-rc6-netns-ct #4
> RIP: 0010:[<ffffffff803fce17>]  [<ffffffff803fce17>] icmp_sk+0x17/0x30
> RSP: 0018:ffffffff8057fc30  EFLAGS: 00010286
> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff81017c7db900
> RDX: 0000000000000034 RSI: ffff81017c7db900 RDI: ffff81017dc41800
> RBP: ffffffff8057fc40 R08: 0000000000000001 R09: 000000000000a815
> R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8057fd28
> R13: ffffffff8057fd00 R14: ffff81017c7db938 R15: ffff81017dc41800
> FS:  0000000000000000(0000) GS:ffffffff80525000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
> CR2: 0000000000000000 CR3: 000000017fcda000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process swapper (pid: 0, threadinfo ffffffff8053a000, task ffffffff804fa4a0)
> Stack:  0000000000000000 ffff81017c7db900 ffffffff8057fcf0 ffffffff803fcfe4
>  ffffffff804faa38 0000000000000246 0000000000005a40 0000000000000246
>  000000000001ffff ffff81017dd68dc0 0000000000005a40 0000000055342436
> Call Trace:
>  <IRQ>  [<ffffffff803fcfe4>] icmp_reply+0x44/0x1e0
>  [<ffffffff803d3a0a>] ? ip_route_input+0x23a/0x1360
>  [<ffffffff803fd645>] icmp_echo+0x65/0x70
>  [<ffffffff803fd300>] icmp_rcv+0x180/0x1b0
>  [<ffffffff803d6d84>] ip_local_deliver+0xf4/0x1f0
>  [<ffffffff803d71bb>] ip_rcv+0x33b/0x650
>  [<ffffffff803bb16a>] netif_receive_skb+0x27a/0x340
>  [<ffffffff803be57d>] process_backlog+0x9d/0x100
>  [<ffffffff803bdd4d>] net_rx_action+0x18d/0x250
>  [<ffffffff80237be5>] __do_softirq+0x75/0x100
>  [<ffffffff8020c97c>] call_softirq+0x1c/0x30
>  [<ffffffff8020f085>] do_softirq+0x65/0xa0
>  [<ffffffff80237af7>] irq_exit+0x97/0xa0
>  [<ffffffff8020f198>] do_IRQ+0xa8/0x130
>  [<ffffffff80212ee0>] ? mwait_idle+0x0/0x60
>  [<ffffffff8020bc46>] ret_from_intr+0x0/0xf
>  <EOI>  [<ffffffff80212f2c>] ? mwait_idle+0x4c/0x60
>  [<ffffffff80212f23>] ? mwait_idle+0x43/0x60
>  [<ffffffff8020a217>] ? cpu_idle+0x57/0xa0
>  [<ffffffff8040f380>] ? rest_init+0x70/0x80
> Code: 10 5b 41 5c 41 5d 41 5e c9 c3 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 53
> 48 83 ec 08 48 8b 9f 78 01 00 00 e8 2b c7 f1 ff 89 c0 <48> 8b 04 c3 48 83 c4 08
> 5b c9 c3 66 66 66 66 66 2e 0f 1f 84 00
> RIP  [<ffffffff803fce17>] icmp_sk+0x17/0x30
>  RSP <ffffffff8057fc30>
> CR2: 0000000000000000
> ---[ end trace ea161157b76b33e8 ]---
> Kernel panic - not syncing: Aiee, killing interrupt handler!

Receiving packets while we are cleaning up a network namespace is a
racy proposition. It is possible when the packet arrives that we have
removed some but not all of the state we need to fully process it.  We
have the choice of either playing wack-a-mole with the cleanup routines
or simply dropping packets when we don't have a network namespace to
handle them.

Since the check looks inexpensive in netif_receive_skb let's just
drop the incoming packets.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 11 +++++++++++
 net/core/dev.c              |  4 ++++
 net/core/net_namespace.c    |  3 +++
 3 files changed, 18 insertions(+)

(limited to 'include/net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index aa540e6be502..d9dd0f707296 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -95,6 +95,11 @@ extern struct list_head net_namespace_list;
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
 
+static inline int net_alive(struct net *net)
+{
+	return net && atomic_read(&net->count);
+}
+
 static inline struct net *get_net(struct net *net)
 {
 	atomic_inc(&net->count);
@@ -125,6 +130,12 @@ int net_eq(const struct net *net1, const struct net *net2)
 	return net1 == net2;
 }
 #else
+
+static inline int net_alive(struct net *net)
+{
+	return 1;
+}
+
 static inline struct net *get_net(struct net *net)
 {
 	return net;
diff --git a/net/core/dev.c b/net/core/dev.c
index 68d8df0992ab..c421a1f8f0b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2077,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	rcu_read_lock();
 
+	/* Don't receive packets in an exiting network namespace */
+	if (!net_alive(dev_net(skb->dev)))
+		goto out;
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72b4c184dd84..7c52fe277b62 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work)
 	struct pernet_operations *ops;
 	struct net *net;
 
+	/* Be very certain incoming network packets will not find us */
+	rcu_barrier();
+
 	net = container_of(work, struct net, work);
 
 	mutex_lock(&net_mutex);
-- 
cgit v1.2.3


From 23976efedd5ecb420b87455787c537eb4aed1981 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 28 Jun 2008 02:50:13 +0300
Subject: mac80211: don't accept WEP keys other than WEP40 and WEP104

This patch makes mac80211 refuse a WEP key whose length is not WEP40 nor
WEP104.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 9 +++++++++
 net/mac80211/wext.c    | 7 +++++++
 2 files changed, 16 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dae3f9ec1154..bcd1623245cb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -595,6 +595,15 @@ enum ieee80211_key_alg {
 	ALG_CCMP,
 };
 
+/**
+ * enum ieee80211_key_len - key length
+ * @WEP40: WEP 5 byte long key
+ * @WEP104: WEP 13 byte long key
+ */
+enum ieee80211_key_len {
+	LEN_WEP40 = 5,
+	LEN_WEP104 = 13,
+};
 
 /**
  * enum ieee80211_key_flags - key flags
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 6106cb79060c..e8404212ad57 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -95,6 +95,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr,
 			}
 		}
 
+		if (alg == ALG_WEP &&
+			key_len != LEN_WEP40 && key_len != LEN_WEP104) {
+			ieee80211_key_free(key);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
 		ieee80211_key_link(key, sdata, sta);
 
 		if (set_tx_key || (!sta && !sdata->default_key && key))
-- 
cgit v1.2.3


From ff31ab56c0e900235f653e375fc3b01ba2d8d6a3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 1 Jul 2008 19:52:38 -0700
Subject: net-sched: change tcf_destroy_chain() to clear start of filter list

Pass double tcf_proto pointers to tcf_destroy_chain() to make it
clear the start of the filter list for more consistency.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 +-
 net/mac80211/wme.c        | 3 +--
 net/sched/sch_api.c       | 6 +++---
 net/sched/sch_atm.c       | 5 ++---
 net/sched/sch_cbq.c       | 8 +++-----
 net/sched/sch_dsmark.c    | 2 +-
 net/sched/sch_hfsc.c      | 2 +-
 net/sched/sch_htb.c       | 4 ++--
 net/sched/sch_ingress.c   | 2 +-
 net/sched/sch_prio.c      | 2 +-
 net/sched/sch_sfq.c       | 2 +-
 11 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ab502ec1c61c..a87fc0312edc 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -178,7 +178,7 @@ extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
 				       struct Qdisc_ops *ops, u32 parentid);
 extern void tcf_destroy(struct tcf_proto *tp);
-extern void tcf_destroy_chain(struct tcf_proto *fl);
+extern void tcf_destroy_chain(struct tcf_proto **fl);
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 635b996c8c35..5d09e8698b57 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -323,8 +323,7 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
 	struct ieee80211_hw *hw = &local->hw;
 	int queue;
 
-	tcf_destroy_chain(q->filter_list);
-	q->filter_list = NULL;
+	tcf_destroy_chain(&q->filter_list);
 
 	for (queue=0; queue < hw->queues; queue++) {
 		skb_queue_purge(&q->requeued[queue]);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c40773cdbe45..10f01ad04380 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1252,12 +1252,12 @@ void tcf_destroy(struct tcf_proto *tp)
 	kfree(tp);
 }
 
-void tcf_destroy_chain(struct tcf_proto *fl)
+void tcf_destroy_chain(struct tcf_proto **fl)
 {
 	struct tcf_proto *tp;
 
-	while ((tp = fl) != NULL) {
-		fl = tp->next;
+	while ((tp = *fl) != NULL) {
+		*fl = tp->next;
 		tcf_destroy(tp);
 	}
 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 335273416384..8e5f70ba3a15 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -160,7 +160,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	*prev = flow->next;
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
-	tcf_destroy_chain(flow->filter_list);
+	tcf_destroy_chain(&flow->filter_list);
 	if (flow->sock) {
 		pr_debug("atm_tc_put: f_count %d\n",
 			file_count(flow->sock->file));
@@ -588,8 +588,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
 	/* races ? */
 	while ((flow = p->flows)) {
-		tcf_destroy_chain(flow->filter_list);
-		flow->filter_list = NULL;
+		tcf_destroy_chain(&flow->filter_list);
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
 			       flow->ref);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 09969c1fbc08..2a3c97f7dc63 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1704,7 +1704,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 
 	BUG_TRAP(!cl->filters);
 
-	tcf_destroy_chain(cl->filter_list);
+	tcf_destroy_chain(&cl->filter_list);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1728,10 +1728,8 @@ cbq_destroy(struct Qdisc* sch)
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
 	for (h = 0; h < 16; h++) {
-		for (cl = q->classes[h]; cl; cl = cl->next) {
-			tcf_destroy_chain(cl->filter_list);
-			cl->filter_list = NULL;
-		}
+		for (cl = q->classes[h]; cl; cl = cl->next)
+			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (h = 0; h < 16; h++) {
 		struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 64465bacbe79..c4c1317cd47d 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -416,7 +416,7 @@ static void dsmark_destroy(struct Qdisc *sch)
 
 	pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
 
-	tcf_destroy_chain(p->filter_list);
+	tcf_destroy_chain(&p->filter_list);
 	qdisc_destroy(p->q);
 	kfree(p->mask);
 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fdfaa3fcc16d..eca83a3be293 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1123,7 +1123,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(cl->filter_list);
+	tcf_destroy_chain(&cl->filter_list);
 	qdisc_destroy(cl->qdisc);
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
 	if (cl != &q->root)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6807c97985a5..3fb58f428f72 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1238,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
 
-	tcf_destroy_chain(cl->filter_list);
+	tcf_destroy_chain(&cl->filter_list);
 
 	while (!list_empty(&cl->children))
 		htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1267,7 +1267,7 @@ static void htb_destroy(struct Qdisc *sch)
 	   and surprisingly it worked in 2.4. But it must precede it
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
-	tcf_destroy_chain(q->filter_list);
+	tcf_destroy_chain(&q->filter_list);
 
 	while (!list_empty(&q->root))
 		htb_destroy_class(sch, list_entry(q->root.next,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 274b1ddb160c..956c80ad5965 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = qdisc_priv(sch);
 
-	tcf_destroy_chain(p->filter_list);
+	tcf_destroy_chain(&p->filter_list);
 }
 
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4aa2b45dad0a..5532f1031ab5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -219,7 +219,7 @@ prio_destroy(struct Qdisc* sch)
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(q->filter_list);
+	tcf_destroy_chain(&q->filter_list);
 	for (prio=0; prio<q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f0463d757a98..6a97afbfb952 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(q->filter_list);
+	tcf_destroy_chain(&q->filter_list);
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
 }
-- 
cgit v1.2.3