From fff9cfd99c0f88645c3f50d7476d6c8cef99f140 Mon Sep 17 00:00:00 2001
From:  <jt@hpl.hp.com>
Date: Thu, 12 May 2005 20:24:19 -0400
Subject:   [PATCH] Wireless Extensions 18 (aka WPA)

        This is version 18 of the Wireless Extensions. The main change
  is that it adds all the necessary APIs for WPA and WPA2 support. This
  work was entirely done by Jouni Malinen, so let's thank him for both
  his hard work and deep expertise on the subject ;-)
        This APIs obviously doesn't do much by itself and works in
  concert with driver support (Jouni already sent you the HostAP
  changes) and userspace (Jouni is updating wpa_supplicant). This is
  also orthogonal with the ongoing work on in-kernel IEEE support (but
  potentially useful).
        The patch is attached, tested with 2.6.11. Normally, I would
  ask you to push that directly in the kernel (99% of the patch has been
  on my web page for ages and it does not affect non-WPA stuff), but
  Jouni convinced me that it should bake a few weeks in wireless-2.6
  first, so that other driver maintainers can get up to speed with it.

  Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 net/core/wireless.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

(limited to 'net/core')

diff --git a/net/core/wireless.c b/net/core/wireless.c
index 750cc5daeb03..b2fe378dfbf8 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -2,7 +2,7 @@
  * This file implement the Wireless Extensions APIs.
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved.
+ * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved.
  *
  * (As all part of the Linux kernel, this file is GPL)
  */
@@ -187,6 +187,12 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
+	[SIOCSIWMLME	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_mlme),
+		.max_tokens	= sizeof(struct iw_mlme),
+	},
 	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr) +
@@ -195,7 +201,10 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
 	[SIOCSIWSCAN	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= 0,
+		.max_tokens	= sizeof(struct iw_scan_req),
 	},
 	[SIOCGIWSCAN	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
@@ -273,6 +282,42 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 	[SIOCGIWPOWER	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
+	[SIOCSIWGENIE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[SIOCGIWGENIE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[SIOCSIWAUTH	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWAUTH	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWENCODEEXT - SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_encode_ext),
+		.max_tokens	= sizeof(struct iw_encode_ext) +
+				  IW_ENCODING_TOKEN_MAX,
+	},
+	[SIOCGIWENCODEEXT - SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_encode_ext),
+		.max_tokens	= sizeof(struct iw_encode_ext) +
+				  IW_ENCODING_TOKEN_MAX,
+	},
+	[SIOCSIWPMKSA - SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_pmksa),
+		.max_tokens	= sizeof(struct iw_pmksa),
+	},
 };
 static const int standard_ioctl_num = (sizeof(standard_ioctl) /
 				       sizeof(struct iw_ioctl_description));
@@ -299,6 +344,31 @@ static const struct iw_ioctl_description standard_event[] = {
 	[IWEVEXPIRED	- IWEVFIRST] = {
 		.header_type	= IW_HEADER_TYPE_ADDR, 
 	},
+	[IWEVGENIE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[IWEVMICHAELMICFAILURE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT, 
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_michaelmicfailure),
+	},
+	[IWEVASSOCREQIE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[IWEVASSOCRESPIE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[IWEVPMKIDCAND	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_pmkid_cand),
+	},
 };
 static const int standard_event_num = (sizeof(standard_event) /
 				       sizeof(struct iw_ioctl_description));
-- 
cgit v1.2.3


From d8a33ac435c43a1a404b2ec560ef1d1536710c36 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sun, 29 May 2005 14:13:47 -0700
Subject: [BRIDGE]: features change notification

Resend of earlier patch (no changes) from Catalin used to provide
device feature change notification.

Signed-off-by: Catalin BOIE <catab at umbrella.ro>
Acked-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/linux/notifier.h  |  1 +
 net/core/dev.c            | 12 ++++++++++++
 net/core/ethtool.c        |  8 +++++++-
 4 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b25bd02720d3..d8c65ecef9d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -913,6 +913,7 @@ extern void		dev_mc_discard(struct net_device *dev);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
+extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(const char *name);
 extern void		dev_mcast_init(void);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9303a003e9ab..5937dd6053c3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -56,6 +56,7 @@ extern int notifier_call_chain(struct notifier_block **n, unsigned long val, voi
 #define NETDEV_CHANGEADDR	0x0008
 #define NETDEV_GOING_DOWN	0x0009
 #define NETDEV_CHANGENAME	0x000A
+#define NETDEV_FEAT_CHANGE	0x000B
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/dev.c b/net/core/dev.c
index d4d9e2680adb..f15a3ffff635 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -760,6 +760,18 @@ int dev_change_name(struct net_device *dev, char *newname)
 	return err;
 }
 
+/**
+ *	netdev_features_change - device changes fatures
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed features.
+ */
+void netdev_features_change(struct net_device *dev)
+{
+	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL(netdev_features_change);
+
 /**
  *	netdev_state_change - device changes state
  *	@dev: device to cause notification
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f05fde97c43d..252bfc6f03f4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -682,6 +682,7 @@ int dev_ethtool(struct ifreq *ifr)
 	void __user *useraddr = ifr->ifr_data;
 	u32 ethcmd;
 	int rc;
+	int old_features;
 
 	/*
 	 * XXX: This can be pushed down into the ethtool_* handlers that
@@ -703,6 +704,8 @@ int dev_ethtool(struct ifreq *ifr)
 		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
 			return rc;
 
+	old_features = dev->features;
+
 	switch (ethcmd) {
 	case ETHTOOL_GSET:
 		rc = ethtool_get_settings(dev, useraddr);
@@ -712,7 +715,6 @@ int dev_ethtool(struct ifreq *ifr)
 		break;
 	case ETHTOOL_GDRVINFO:
 		rc = ethtool_get_drvinfo(dev, useraddr);
-
 		break;
 	case ETHTOOL_GREGS:
 		rc = ethtool_get_regs(dev, useraddr);
@@ -801,6 +803,10 @@ int dev_ethtool(struct ifreq *ifr)
 	
 	if(dev->ethtool_ops->complete)
 		dev->ethtool_ops->complete(dev);
+
+	if (old_features != dev->features)
+		netdev_features_change(dev);
+
 	return rc;
 
  ioctl:
-- 
cgit v1.2.3


From 81e8157583c559c27aac75c708d40a35f563d734 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sun, 29 May 2005 14:14:35 -0700
Subject: [BRIDGE]: make dev->features unsigned

The features field in netdevice is really a bitmask, and bitmask's should
be unsigned.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 net/core/ethtool.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/core')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8c65ecef9d9..470af8c1a4a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -401,7 +401,7 @@ struct net_device
 	} reg_state;
 
 	/* Net device features */
-	int			features;
+	unsigned long		features;
 #define NETIF_F_SG		1	/* Scatter/gather IO. */
 #define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
 #define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 252bfc6f03f4..2a56a5218363 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -682,7 +682,7 @@ int dev_ethtool(struct ifreq *ifr)
 	void __user *useraddr = ifr->ifr_data;
 	u32 ethcmd;
 	int rc;
-	int old_features;
+	unsigned long old_features;
 
 	/*
 	 * XXX: This can be pushed down into the ethtool_* handlers that
-- 
cgit v1.2.3


From 69f6a0fafcdf0bfe85af182695d6d38f80f9d549 Mon Sep 17 00:00:00 2001
From: Jon Mason <jdmason@us.ibm.com>
Date: Sun, 29 May 2005 20:27:24 -0700
Subject: [NET]: Add ethtool support for NETIF_F_HW_CSUM.

Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  1 +
 net/core/ethtool.c      | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c85b210490ea..a0ab26aab450 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -256,6 +256,7 @@ struct net_device;
 u32 ethtool_op_get_link(struct net_device *dev);
 u32 ethtool_op_get_tx_csum(struct net_device *dev);
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
+int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
 u32 ethtool_op_get_sg(struct net_device *dev);
 int ethtool_op_set_sg(struct net_device *dev, u32 data);
 u32 ethtool_op_get_tso(struct net_device *dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 2a56a5218363..8ec484894d68 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -29,7 +29,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 
 u32 ethtool_op_get_tx_csum(struct net_device *dev)
 {
-	return (dev->features & NETIF_F_IP_CSUM) != 0;
+	return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
 }
 
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
@@ -42,6 +42,15 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
 	return 0;
 }
 
+int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_HW_CSUM;
+	else
+		dev->features &= ~NETIF_F_HW_CSUM;
+
+	return 0;
+}
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_SG) != 0;
@@ -823,3 +832,4 @@ EXPORT_SYMBOL(ethtool_op_get_tx_csum);
 EXPORT_SYMBOL(ethtool_op_set_sg);
 EXPORT_SYMBOL(ethtool_op_set_tso);
 EXPORT_SYMBOL(ethtool_op_set_tx_csum);
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-- 
cgit v1.2.3


From d1102b59ca7b3a3c58912330a4ae38f549c8d569 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 29 May 2005 20:28:25 -0700
Subject: [NET]: Use %lx for netdev->features sysfs formatting.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 060f703659e8..910eb4c05a47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
 #define to_net_dev(class) container_of(class, struct net_device, class_dev)
 
 static const char fmt_hex[] = "%#x\n";
+static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
 static const char fmt_ulong[] = "%lu\n";
 
@@ -91,7 +92,7 @@ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL)		\
 NETDEVICE_ATTR(addr_len, fmt_dec);
 NETDEVICE_ATTR(iflink, fmt_dec);
 NETDEVICE_ATTR(ifindex, fmt_dec);
-NETDEVICE_ATTR(features, fmt_hex);
+NETDEVICE_ATTR(features, fmt_long_hex);
 NETDEVICE_ATTR(type, fmt_dec);
 
 /* use same locking rules as GIFHWADDR ioctl's */
-- 
cgit v1.2.3


From fa04ae5c09f3dfedbc923c2954a9a26a573833f1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Jun 2005 15:07:19 -0700
Subject: [ETHTOOL]: Check correct pointer in ethtool_set_coalesce().

It was checking the "GET" function pointer instead of
the "SET" one.  Looks like a cut&paste error :-)

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8ec484894d68..a3eeb88e1c81 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -356,7 +356,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
-	if (!dev->ethtool_ops->get_coalesce)
+	if (!dev->ethtool_ops->set_coalesce)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
-- 
cgit v1.2.3


From 699a411451a32cc111410f44f172b265f6d679c8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 8 Jun 2005 14:55:42 -0700
Subject: [NET]: Allow controlling NAPI device weight with sysfs

Simple interface to allow changing network device scheduling weight
with sysfs. Please consider this for 2.6.12, since risk/impact is small.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'net/core')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 910eb4c05a47..e2137f3e489d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -185,6 +185,22 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
 static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 
 			 store_tx_queue_len);
 
+NETDEVICE_SHOW(weight, fmt_dec);
+
+static int change_weight(struct net_device *net, unsigned long new_weight)
+{
+	net->weight = new_weight;
+	return 0;
+}
+
+static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_weight);
+}
+
+static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, 
+			 store_weight);
+
 
 static struct class_device_attribute *net_class_attributes[] = {
 	&class_device_attr_ifindex,
@@ -194,6 +210,7 @@ static struct class_device_attribute *net_class_attributes[] = {
 	&class_device_attr_features,
 	&class_device_attr_mtu,
 	&class_device_attr_flags,
+	&class_device_attr_weight,
 	&class_device_attr_type,
 	&class_device_attr_address,
 	&class_device_attr_broadcast,
-- 
cgit v1.2.3


From e3876605450979fe52a1a03e7eb78a89bf59e76a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 8 Jun 2005 14:56:01 -0700
Subject: [NET]: Fix sysctl net.core.dev_weight

Changing the sysctl net.core.dev_weight has no effect because the weight
of the backlog devices is set during initialization and never changed.

This patch propagates any changes to the global value affected by sysctl
to the per-cpu devices. It is done every time the packet handler
function is run.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/core')

diff --git a/net/core/dev.c b/net/core/dev.c
index f15a3ffff635..ab935778ce81 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1744,6 +1744,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
 	unsigned long start_time = jiffies;
 
+	backlog_dev->weight = weight_p;
 	for (;;) {
 		struct sk_buff *skb;
 		struct net_device *dev;
-- 
cgit v1.2.3


From 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:46:52 -0700
Subject: [NET] Generalise TCP's struct open_request minisock infrastructure

Kept this first changeset minimal, without changing existing names to
ease peer review.

Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:

->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
  a specific protocol

The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.

I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.

Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)

Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h         | 21 ++++++++++
 include/linux/ipv6.h       | 13 +++++++
 include/linux/tcp.h        | 11 ++++++
 include/net/request_sock.h | 77 +++++++++++++++++++++++++++++++++++++
 include/net/sock.h         |  4 ++
 include/net/tcp.h          | 87 ++++++-----------------------------------
 include/net/tcp_ecn.h      |  7 ++--
 net/core/sock.c            | 35 +++++++++++++++++
 net/ipv4/syncookies.c      | 39 ++++++++++---------
 net/ipv4/tcp.c             |  9 -----
 net/ipv4/tcp_diag.c        | 25 ++++++------
 net/ipv4/tcp_ipv4.c        | 72 +++++++++++++++++++---------------
 net/ipv4/tcp_minisocks.c   | 48 ++++++++++++-----------
 net/ipv4/tcp_output.c      | 25 ++++++------
 net/ipv4/tcp_timer.c       |  2 +-
 net/ipv6/tcp_ipv6.c        | 96 ++++++++++++++++++++++++----------------------
 16 files changed, 341 insertions(+), 230 deletions(-)
 create mode 100644 include/net/request_sock.h

(limited to 'net/core')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 8438c68591f9..d5b7c907204e 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -81,6 +81,7 @@
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <linux/types.h>
+#include <net/request_sock.h>
 #include <net/sock.h>
 #include <linux/igmp.h>
 #include <net/flow.h>
@@ -107,6 +108,26 @@ struct ip_options {
 
 #define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
 
+struct inet_request_sock {
+	struct open_request	req;
+	u32			loc_addr;
+	u32			rmt_addr;
+	u16			rmt_port;
+	u16			snd_wscale : 4, 
+				rcv_wscale : 4, 
+				tstamp_ok  : 1,
+				sack_ok	   : 1,
+				wscale_ok  : 1,
+				ecn_ok	   : 1,
+				acked	   : 1;
+	struct ip_options	*opt;
+};
+
+static inline struct inet_request_sock *inet_rsk(const struct open_request *sk)
+{
+	return (struct inet_request_sock *)sk;
+}
+
 struct ipv6_pinfo;
 
 struct inet_sock {
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ab0d0efbf240..98acdbf3d446 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -193,6 +193,19 @@ struct inet6_skb_parm {
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
 
+struct tcp6_request_sock {
+	struct tcp_request_sock	req;
+	struct in6_addr		loc_addr;
+	struct in6_addr		rmt_addr;
+	struct sk_buff		*pktopts;
+	int			iif;
+};
+
+static inline struct tcp6_request_sock *tcp6_rsk(const struct open_request *sk)
+{
+	return (struct tcp6_request_sock *)sk;
+}
+
 /**
  * struct ipv6_pinfo - ipv6 private area
  *
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 14a55e3e3a50..86771b37b80d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -230,6 +230,17 @@ struct tcp_options_received {
 	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
+struct tcp_request_sock {
+	struct inet_request_sock req;
+	__u32			 rcv_isn;
+	__u32			 snt_isn;
+};
+
+static inline struct tcp_request_sock *tcp_rsk(const struct open_request *req)
+{
+	return (struct tcp_request_sock *)req;
+}
+
 struct tcp_sock {
 	/* inet_sock has to be the first member of tcp_sock */
 	struct inet_sock	inet;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
new file mode 100644
index 000000000000..9502f5587931
--- /dev/null
+++ b/include/net/request_sock.h
@@ -0,0 +1,77 @@
+/*
+ * NET		Generic infrastructure for Network protocols.
+ *
+ *		Definitions for request_sock 
+ *
+ * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * 		From code originally in include/net/tcp.h
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _REQUEST_SOCK_H
+#define _REQUEST_SOCK_H
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <net/sock.h>
+
+struct open_request;
+struct sk_buff;
+struct dst_entry;
+struct proto;
+
+struct or_calltable {
+	int		family;
+	kmem_cache_t	*slab;
+	int		obj_size;
+	int		(*rtx_syn_ack)(struct sock *sk,
+				       struct open_request *req,
+				       struct dst_entry *dst);
+	void		(*send_ack)(struct sk_buff *skb,
+				    struct open_request *req);
+	void		(*send_reset)(struct sk_buff *skb);
+	void		(*destructor)(struct open_request *req);
+};
+
+/* struct open_request - mini sock to represent a connection request
+ */
+struct open_request {
+	struct open_request		*dl_next; /* Must be first member! */
+	u16				mss;
+	u8				retrans;
+	u8				__pad;
+	/* The following two fields can be easily recomputed I think -AK */
+	u32				window_clamp; /* window clamp at creation time */
+	u32				rcv_wnd;	  /* rcv_wnd offered first time */
+	u32				ts_recent;
+	unsigned long			expires;
+	struct or_calltable		*class;
+	struct sock			*sk;
+};
+
+static inline struct open_request *tcp_openreq_alloc(struct or_calltable *class)
+{
+	struct open_request *req = kmem_cache_alloc(class->slab, SLAB_ATOMIC);
+
+	if (req != NULL)
+		req->class = class;
+
+	return req;
+}
+
+static inline void tcp_openreq_fastfree(struct open_request *req)
+{
+	kmem_cache_free(req->class->slab, req);
+}
+
+static inline void tcp_openreq_free(struct open_request *req)
+{
+	req->class->destructor(req);
+	tcp_openreq_fastfree(req);
+}
+
+#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index a9ef3a6a13f3..6919276af8af 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -484,6 +484,8 @@ extern void sk_stream_kill_queues(struct sock *sk);
 
 extern int sk_wait_data(struct sock *sk, long *timeo);
 
+struct or_calltable;
+
 /* Networking protocol blocks we attach to sockets.
  * socket layer -> transport layer interface
  * transport -> network interface is defined by struct inet_proto
@@ -547,6 +549,8 @@ struct proto {
 	kmem_cache_t		*slab;
 	unsigned int		obj_size;
 
+	struct or_calltable	*rsk_prot;
+
 	struct module		*owner;
 
 	char			name[32];
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e71f8ba3e101..d438ba566b89 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -31,6 +31,7 @@
 #include <linux/cache.h>
 #include <linux/percpu.h>
 #include <net/checksum.h>
+#include <net/request_sock.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 #include <net/ip.h>
@@ -613,74 +614,6 @@ extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
 extern int tcp_memory_pressure;
 
-struct open_request;
-
-struct or_calltable {
-	int  family;
-	int  (*rtx_syn_ack)	(struct sock *sk, struct open_request *req, struct dst_entry*);
-	void (*send_ack)	(struct sk_buff *skb, struct open_request *req);
-	void (*destructor)	(struct open_request *req);
-	void (*send_reset)	(struct sk_buff *skb);
-};
-
-struct tcp_v4_open_req {
-	__u32			loc_addr;
-	__u32			rmt_addr;
-	struct ip_options	*opt;
-};
-
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-struct tcp_v6_open_req {
-	struct in6_addr		loc_addr;
-	struct in6_addr		rmt_addr;
-	struct sk_buff		*pktopts;
-	int			iif;
-};
-#endif
-
-/* this structure is too big */
-struct open_request {
-	struct open_request	*dl_next; /* Must be first member! */
-	__u32			rcv_isn;
-	__u32			snt_isn;
-	__u16			rmt_port;
-	__u16			mss;
-	__u8			retrans;
-	__u8			__pad;
-	__u16	snd_wscale : 4, 
-		rcv_wscale : 4, 
-		tstamp_ok : 1,
-		sack_ok : 1,
-		wscale_ok : 1,
-		ecn_ok : 1,
-		acked : 1;
-	/* The following two fields can be easily recomputed I think -AK */
-	__u32			window_clamp;	/* window clamp at creation time */
-	__u32			rcv_wnd;	/* rcv_wnd offered first time */
-	__u32			ts_recent;
-	unsigned long		expires;
-	struct or_calltable	*class;
-	struct sock		*sk;
-	union {
-		struct tcp_v4_open_req v4_req;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-		struct tcp_v6_open_req v6_req;
-#endif
-	} af;
-};
-
-/* SLAB cache for open requests. */
-extern kmem_cache_t *tcp_openreq_cachep;
-
-#define tcp_openreq_alloc()		kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
-#define tcp_openreq_fastfree(req)	kmem_cache_free(tcp_openreq_cachep, req)
-
-static inline void tcp_openreq_free(struct open_request *req)
-{
-	req->class->destructor(req);
-	tcp_openreq_fastfree(req);
-}
-
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
 #else
@@ -1832,17 +1765,19 @@ static __inline__ void tcp_openreq_init(struct open_request *req,
 					struct tcp_options_received *rx_opt,
 					struct sk_buff *skb)
 {
+	struct inet_request_sock *ireq = inet_rsk(req);
+
 	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
-	req->rcv_isn = TCP_SKB_CB(skb)->seq;
+	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
-	req->tstamp_ok = rx_opt->tstamp_ok;
-	req->sack_ok = rx_opt->sack_ok;
-	req->snd_wscale = rx_opt->snd_wscale;
-	req->wscale_ok = rx_opt->wscale_ok;
-	req->acked = 0;
-	req->ecn_ok = 0;
-	req->rmt_port = skb->h.th->source;
+	ireq->tstamp_ok = rx_opt->tstamp_ok;
+	ireq->sack_ok = rx_opt->sack_ok;
+	ireq->snd_wscale = rx_opt->snd_wscale;
+	ireq->wscale_ok = rx_opt->wscale_ok;
+	ireq->acked = 0;
+	ireq->ecn_ok = 0;
+	ireq->rmt_port = skb->h.th->source;
 }
 
 extern void tcp_enter_memory_pressure(void);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index dc1456389a97..94ad970e844a 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -2,6 +2,7 @@
 #define _NET_TCP_ECN_H_ 1
 
 #include <net/inet_ecn.h>
+#include <net/request_sock.h>
 
 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
 
@@ -40,7 +41,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp,
 static __inline__ void
 TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th)
 {
-	if (req->ecn_ok)
+	if (inet_rsk(req)->ecn_ok)
 		th->ece = 1;
 }
 
@@ -113,14 +114,14 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
 					 struct open_request *req)
 {
-	tp->ecn_flags = req->ecn_ok ? TCP_ECN_OK : 0;
+	tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
 }
 
 static __inline__ void
 TCP_ECN_create_request(struct open_request *req, struct tcphdr *th)
 {
 	if (sysctl_tcp_ecn && th->ece && th->cwr)
-		req->ecn_ok = 1;
+		inet_rsk(req)->ecn_ok = 1;
 }
 
 #endif
diff --git a/net/core/sock.c b/net/core/sock.c
index 96e00b08698f..a6ec3ada7f9e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,6 +118,7 @@
 #include <linux/netdevice.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
+#include <net/request_sock.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
@@ -1363,6 +1364,7 @@ static LIST_HEAD(proto_list);
 
 int proto_register(struct proto *prot, int alloc_slab)
 {
+	char *request_sock_slab_name;
 	int rc = -ENOBUFS;
 
 	if (alloc_slab) {
@@ -1374,6 +1376,25 @@ int proto_register(struct proto *prot, int alloc_slab)
 			       prot->name);
 			goto out;
 		}
+
+		if (prot->rsk_prot != NULL) {
+			static const char mask[] = "request_sock_%s";
+
+			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+			if (request_sock_slab_name == NULL)
+				goto out_free_sock_slab;
+
+			sprintf(request_sock_slab_name, mask, prot->name);
+			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
+								 prot->rsk_prot->obj_size, 0,
+								 SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+			if (prot->rsk_prot->slab == NULL) {
+				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
+				       prot->name);
+				goto out_free_request_sock_slab_name;
+			}
+		}
 	}
 
 	write_lock(&proto_list_lock);
@@ -1382,6 +1403,12 @@ int proto_register(struct proto *prot, int alloc_slab)
 	rc = 0;
 out:
 	return rc;
+out_free_request_sock_slab_name:
+	kfree(request_sock_slab_name);
+out_free_sock_slab:
+	kmem_cache_destroy(prot->slab);
+	prot->slab = NULL;
+	goto out;
 }
 
 EXPORT_SYMBOL(proto_register);
@@ -1395,6 +1422,14 @@ void proto_unregister(struct proto *prot)
 		prot->slab = NULL;
 	}
 
+	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
+		const char *name = kmem_cache_name(prot->rsk_prot->slab);
+
+		kmem_cache_destroy(prot->rsk_prot->slab);
+		kfree(name);
+		prot->rsk_prot->slab = NULL;
+	}
+
 	list_del(&prot->node);
 	write_unlock(&proto_list_lock);
 }
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e923d2f021aa..dd47e6da6fb3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -190,6 +190,8 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt)
 {
+	struct inet_request_sock *ireq;
+	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 
 	struct sock *ret = sk;
@@ -209,19 +211,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
 
-	req = tcp_openreq_alloc();
 	ret = NULL;
+	req = tcp_openreq_alloc(&or_ipv4); /* for safety */
 	if (!req)
 		goto out;
 
-	req->rcv_isn		= htonl(skb->h.th->seq) - 1;
-	req->snt_isn		= cookie; 
+	ireq = inet_rsk(req);
+	treq = tcp_rsk(req);
+	treq->rcv_isn		= htonl(skb->h.th->seq) - 1;
+	treq->snt_isn		= cookie; 
 	req->mss		= mss;
- 	req->rmt_port		= skb->h.th->source;
-	req->af.v4_req.loc_addr = skb->nh.iph->daddr;
-	req->af.v4_req.rmt_addr = skb->nh.iph->saddr;
-	req->class		= &or_ipv4; /* for savety */
-	req->af.v4_req.opt	= NULL;
+ 	ireq->rmt_port		= skb->h.th->source;
+	ireq->loc_addr		= skb->nh.iph->daddr;
+	ireq->rmt_addr		= skb->nh.iph->saddr;
+	ireq->opt		= NULL;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -229,17 +232,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	if (opt && opt->optlen) {
 		int opt_size = sizeof(struct ip_options) + opt->optlen;
 
-		req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
-		if (req->af.v4_req.opt) {
-			if (ip_options_echo(req->af.v4_req.opt, skb)) {
-				kfree(req->af.v4_req.opt);
-				req->af.v4_req.opt = NULL;
-			}
+		ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
+		if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
+			kfree(ireq->opt);
+			ireq->opt = NULL;
 		}
 	}
 
-	req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0;
-	req->wscale_ok	= req->sack_ok = 0; 
+	ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
+	ireq->wscale_ok	 = ireq->sack_ok = 0; 
 	req->expires	= 0UL; 
 	req->retrans	= 0; 
 	
@@ -253,8 +254,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = ((opt && opt->srr) ?
 							  opt->faddr :
-							  req->af.v4_req.rmt_addr),
-						.saddr = req->af.v4_req.loc_addr,
+							  ireq->rmt_addr),
+						.saddr = ireq->loc_addr,
 						.tos = RT_CONN_FLAGS(sk) } },
 				    .proto = IPPROTO_TCP,
 				    .uli_u = { .ports =
@@ -272,7 +273,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 				  &req->rcv_wnd, &req->window_clamp, 
 				  0, &rcv_wscale);
 	/* BTW win scale with syncookies is 0 by definition */
-	req->rcv_wscale	  = rcv_wscale; 
+	ireq->rcv_wscale  = rcv_wscale; 
 
 	ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
 out:	return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d9a4fd5f1a4..a3cabfa2022a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 
-kmem_cache_t *tcp_openreq_cachep;
 kmem_cache_t *tcp_bucket_cachep;
 kmem_cache_t *tcp_timewait_cachep;
 
@@ -2271,13 +2270,6 @@ void __init tcp_init(void)
 		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
 					   sizeof(skb->cb));
 
-	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
-						   sizeof(struct open_request),
-					       0, SLAB_HWCACHE_ALIGN,
-					       NULL, NULL);
-	if (!tcp_openreq_cachep)
-		panic("tcp_init: Cannot alloc open_request cache.");
-
 	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
 					      sizeof(struct tcp_bind_bucket),
 					      0, SLAB_HWCACHE_ALIGN,
@@ -2374,7 +2366,6 @@ EXPORT_SYMBOL(tcp_destroy_sock);
 EXPORT_SYMBOL(tcp_disconnect);
 EXPORT_SYMBOL(tcp_getsockopt);
 EXPORT_SYMBOL(tcp_ioctl);
-EXPORT_SYMBOL(tcp_openreq_cachep);
 EXPORT_SYMBOL(tcp_poll);
 EXPORT_SYMBOL(tcp_read_sock);
 EXPORT_SYMBOL(tcp_recvmsg);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8faa8948f75c..700ff2413588 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -458,6 +458,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
 			    struct open_request *req,
 			    u32 pid, u32 seq)
 {
+	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *inet = inet_sk(sk);
 	unsigned char *b = skb->tail;
 	struct tcpdiagmsg *r;
@@ -482,9 +483,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
 		tmo = 0;
 
 	r->id.tcpdiag_sport = inet->sport;
-	r->id.tcpdiag_dport = req->rmt_port;
-	r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr;
-	r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr;
+	r->id.tcpdiag_dport = ireq->rmt_port;
+	r->id.tcpdiag_src[0] = ireq->loc_addr;
+	r->id.tcpdiag_dst[0] = ireq->rmt_addr;
 	r->tcpdiag_expires = jiffies_to_msecs(tmo),
 	r->tcpdiag_rqueue = 0;
 	r->tcpdiag_wqueue = 0;
@@ -493,9 +494,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
 #ifdef CONFIG_IP_TCPDIAG_IPV6
 	if (r->tcpdiag_family == AF_INET6) {
 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-			       &req->af.v6_req.loc_addr);
+			       &tcp6_rsk(req)->loc_addr);
 		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-			       &req->af.v6_req.rmt_addr);
+			       &tcp6_rsk(req)->rmt_addr);
 	}
 #endif
 	nlh->nlmsg_len = skb->tail - b;
@@ -545,9 +546,11 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 		reqnum = 0;
 		for (req = head; req; reqnum++, req = req->dl_next) {
+			struct inet_request_sock *ireq = inet_rsk(req);
+
 			if (reqnum < s_reqnum)
 				continue;
-			if (r->id.tcpdiag_dport != req->rmt_port &&
+			if (r->id.tcpdiag_dport != ireq->rmt_port &&
 			    r->id.tcpdiag_dport)
 				continue;
 
@@ -555,16 +558,16 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 				entry.saddr =
 #ifdef CONFIG_IP_TCPDIAG_IPV6
 					(entry.family == AF_INET6) ?
-					req->af.v6_req.loc_addr.s6_addr32 :
+					tcp6_rsk(req)->loc_addr.s6_addr32 :
 #endif
-					&req->af.v4_req.loc_addr;
+					&ireq->loc_addr;
 				entry.daddr = 
 #ifdef CONFIG_IP_TCPDIAG_IPV6
 					(entry.family == AF_INET6) ?
-					req->af.v6_req.rmt_addr.s6_addr32 :
+					tcp6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
-					&req->af.v4_req.rmt_addr;
-				entry.dport = ntohs(req->rmt_port);
+					&ireq->rmt_addr;
+				entry.dport = ntohs(ireq->rmt_port);
 
 				if (!tcpdiag_bc_run(RTA_DATA(bc),
 						    RTA_PAYLOAD(bc), &entry))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dad98e4a5043..e156be90df14 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -880,9 +880,11 @@ static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
 	for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
 	     (req = *prev) != NULL;
 	     prev = &req->dl_next) {
-		if (req->rmt_port == rport &&
-		    req->af.v4_req.rmt_addr == raddr &&
-		    req->af.v4_req.loc_addr == laddr &&
+		const struct inet_request_sock *ireq = inet_rsk(req);
+
+		if (ireq->rmt_port == rport &&
+		    ireq->rmt_addr == raddr &&
+		    ireq->loc_addr == laddr &&
 		    TCP_INET_FAMILY(req->class->family)) {
 			BUG_TRAP(!req->sk);
 			*prevp = prev;
@@ -897,7 +899,7 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_listen_opt *lopt = tp->listen_opt;
-	u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
+	u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
 
 	req->expires = jiffies + TCP_TIMEOUT_INIT;
 	req->retrans = 0;
@@ -1065,7 +1067,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 		 */
 		BUG_TRAP(!req->sk);
 
-		if (seq != req->snt_isn) {
+		if (seq != tcp_rsk(req)->snt_isn) {
 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
@@ -1256,7 +1258,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
 {
-	tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
+	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent);
 }
 
@@ -1264,18 +1266,19 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
 					  struct open_request *req)
 {
 	struct rtable *rt;
-	struct ip_options *opt = req->af.v4_req.opt;
+	const struct inet_request_sock *ireq = inet_rsk(req);
+	struct ip_options *opt = inet_rsk(req)->opt;
 	struct flowi fl = { .oif = sk->sk_bound_dev_if,
 			    .nl_u = { .ip4_u =
 				      { .daddr = ((opt && opt->srr) ?
 						  opt->faddr :
-						  req->af.v4_req.rmt_addr),
-					.saddr = req->af.v4_req.loc_addr,
+						  ireq->rmt_addr),
+					.saddr = ireq->loc_addr,
 					.tos = RT_CONN_FLAGS(sk) } },
 			    .proto = IPPROTO_TCP,
 			    .uli_u = { .ports =
 				       { .sport = inet_sk(sk)->sport,
-					 .dport = req->rmt_port } } };
+					 .dport = ireq->rmt_port } } };
 
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1297,6 +1300,7 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
 static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
 			      struct dst_entry *dst)
 {
+	const struct inet_request_sock *ireq = inet_rsk(req);
 	int err = -1;
 	struct sk_buff * skb;
 
@@ -1310,14 +1314,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
 		struct tcphdr *th = skb->h.th;
 
 		th->check = tcp_v4_check(th, skb->len,
-					 req->af.v4_req.loc_addr,
-					 req->af.v4_req.rmt_addr,
+					 ireq->loc_addr,
+					 ireq->rmt_addr,
 					 csum_partial((char *)th, skb->len,
 						      skb->csum));
 
-		err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
-					    req->af.v4_req.rmt_addr,
-					    req->af.v4_req.opt);
+		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+					    ireq->rmt_addr,
+					    ireq->opt);
 		if (err == NET_XMIT_CN)
 			err = 0;
 	}
@@ -1332,8 +1336,8 @@ out:
  */
 static void tcp_v4_or_free(struct open_request *req)
 {
-	if (req->af.v4_req.opt)
-		kfree(req->af.v4_req.opt);
+	if (inet_rsk(req)->opt)
+		kfree(inet_rsk(req)->opt);
 }
 
 static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1387,6 +1391,7 @@ int sysctl_max_syn_backlog = 256;
 
 struct or_calltable or_ipv4 = {
 	.family		=	PF_INET,
+	.obj_size	=	sizeof(struct tcp_request_sock),
 	.rtx_syn_ack	=	tcp_v4_send_synack,
 	.send_ack	=	tcp_v4_or_send_ack,
 	.destructor	=	tcp_v4_or_free,
@@ -1395,6 +1400,7 @@ struct or_calltable or_ipv4 = {
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct inet_request_sock *ireq;
 	struct tcp_options_received tmp_opt;
 	struct open_request *req;
 	__u32 saddr = skb->nh.iph->saddr;
@@ -1433,7 +1439,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
 		goto drop;
 
-	req = tcp_openreq_alloc();
+	req = tcp_openreq_alloc(&or_ipv4);
 	if (!req)
 		goto drop;
 
@@ -1461,10 +1467,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	req->af.v4_req.loc_addr = daddr;
-	req->af.v4_req.rmt_addr = saddr;
-	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
-	req->class = &or_ipv4;
+	ireq = inet_rsk(req);
+	ireq->loc_addr = daddr;
+	ireq->rmt_addr = saddr;
+	ireq->opt = tcp_v4_save_options(sk, skb);
 	if (!want_cookie)
 		TCP_ECN_create_request(req, skb->h.th);
 
@@ -1523,7 +1529,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 		isn = tcp_v4_init_sequence(sk, skb);
 	}
-	req->snt_isn = isn;
+	tcp_rsk(req)->snt_isn = isn;
 
 	if (tcp_v4_send_synack(sk, req, dst))
 		goto drop_and_free;
@@ -1551,6 +1557,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 				  struct open_request *req,
 				  struct dst_entry *dst)
 {
+	struct inet_request_sock *ireq;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
 	struct sock *newsk;
@@ -1570,11 +1577,12 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
-	newinet->daddr	      = req->af.v4_req.rmt_addr;
-	newinet->rcv_saddr    = req->af.v4_req.loc_addr;
-	newinet->saddr	      = req->af.v4_req.loc_addr;
-	newinet->opt	      = req->af.v4_req.opt;
-	req->af.v4_req.opt    = NULL;
+	ireq		      = inet_rsk(req);
+	newinet->daddr	      = ireq->rmt_addr;
+	newinet->rcv_saddr    = ireq->loc_addr;
+	newinet->saddr	      = ireq->loc_addr;
+	newinet->opt	      = ireq->opt;
+	ireq->opt	      = NULL;
 	newinet->mc_index     = tcp_v4_iif(skb);
 	newinet->mc_ttl	      = skb->nh.iph->ttl;
 	newtp->ext_header_len = 0;
@@ -2454,15 +2462,16 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
 static void get_openreq4(struct sock *sk, struct open_request *req,
 			 char *tmpbuf, int i, int uid)
 {
+	const struct inet_request_sock *ireq = inet_rsk(req);
 	int ttd = req->expires - jiffies;
 
 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
 		i,
-		req->af.v4_req.loc_addr,
+		ireq->loc_addr,
 		ntohs(inet_sk(sk)->sport),
-		req->af.v4_req.rmt_addr,
-		ntohs(req->rmt_port),
+		ireq->rmt_addr,
+		ntohs(ireq->rmt_port),
 		TCP_SYN_RECV,
 		0, 0, /* could print option size, but that is af dependent. */
 		1,    /* timers active (only the expire timer) */
@@ -2618,6 +2627,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
+	.rsk_prot		= &or_ipv4,
 };
 
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index eea1a17a9ac2..1037401c7cc8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -692,6 +692,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 	struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
 
 	if(newsk != NULL) {
+		struct inet_request_sock *ireq = inet_rsk(req);
+		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct tcp_sock *newtp;
 		struct sk_filter *filter;
 
@@ -703,7 +705,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		tcp_sk(newsk)->bind_hash = NULL;
 
 		/* Clone the TCP header template */
-		inet_sk(newsk)->dport = req->rmt_port;
+		inet_sk(newsk)->dport = ireq->rmt_port;
 
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
@@ -739,14 +741,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		/* Now setup tcp_sock */
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_nxt = req->rcv_isn + 1;
-		newtp->snd_nxt = req->snt_isn + 1;
-		newtp->snd_una = req->snt_isn + 1;
-		newtp->snd_sml = req->snt_isn + 1;
+		newtp->rcv_nxt = treq->rcv_isn + 1;
+		newtp->snd_nxt = treq->snt_isn + 1;
+		newtp->snd_una = treq->snt_isn + 1;
+		newtp->snd_sml = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 
-		tcp_init_wl(newtp, req->snt_isn, req->rcv_isn);
+		tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
 
 		newtp->retransmits = 0;
 		newtp->backoff = 0;
@@ -775,10 +777,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		tcp_set_ca_state(newtp, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->rcv_wup = req->rcv_isn + 1;
-		newtp->write_seq = req->snt_isn + 1;
+		newtp->rcv_wup = treq->rcv_isn + 1;
+		newtp->write_seq = treq->snt_isn + 1;
 		newtp->pushed_seq = newtp->write_seq;
-		newtp->copied_seq = req->rcv_isn + 1;
+		newtp->copied_seq = treq->rcv_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -808,18 +810,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		newsk->sk_socket = NULL;
 		newsk->sk_sleep = NULL;
 
-		newtp->rx_opt.tstamp_ok = req->tstamp_ok;
-		if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) {
+		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
+		if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
 			if (sysctl_tcp_fack)
 				newtp->rx_opt.sack_ok |= 2;
 		}
 		newtp->window_clamp = req->window_clamp;
 		newtp->rcv_ssthresh = req->rcv_wnd;
 		newtp->rcv_wnd = req->rcv_wnd;
-		newtp->rx_opt.wscale_ok = req->wscale_ok;
+		newtp->rx_opt.wscale_ok = ireq->wscale_ok;
 		if (newtp->rx_opt.wscale_ok) {
-			newtp->rx_opt.snd_wscale = req->snd_wscale;
-			newtp->rx_opt.rcv_wscale = req->rcv_wscale;
+			newtp->rx_opt.snd_wscale = ireq->snd_wscale;
+			newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
 		} else {
 			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
 			newtp->window_clamp = min(newtp->window_clamp, 65535U);
@@ -881,7 +883,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	}
 
 	/* Check for pure retransmitted SYN. */
-	if (TCP_SKB_CB(skb)->seq == req->rcv_isn &&
+	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
 	    flg == TCP_FLAG_SYN &&
 	    !paws_reject) {
 		/*
@@ -959,7 +961,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1))
+	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
@@ -970,7 +972,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	/* RFC793: "first check sequence number". */
 
 	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-					  req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) {
+					  tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
 		/* Out of window: send ACK and drop. */
 		if (!(flg & TCP_FLAG_RST))
 			req->class->send_ack(skb, req);
@@ -981,12 +983,12 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 
 	/* In sequence, PAWS is OK. */
 
-	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
 			req->ts_recent = tmp_opt.rcv_tsval;
 
-		if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+		if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
 			/* Truncate SYN, it is out of window starting
-			   at req->rcv_isn+1. */
+			   at tcp_rsk(req)->rcv_isn + 1. */
 			flg &= ~TCP_FLAG_SYN;
 		}
 
@@ -1003,8 +1005,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			return NULL;
 
 		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-		if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
-			req->acked = 1;
+		if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+			inet_rsk(req)->acked = 1;
 			return NULL;
 		}
 
@@ -1026,7 +1028,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 
 	listen_overflow:
 		if (!sysctl_tcp_abort_on_overflow) {
-			req->acked = 1;
+			inet_rsk(req)->acked = 1;
 			return NULL;
 		}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fa24e7ae1f40..f3c8747caf91 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1358,6 +1358,7 @@ int tcp_send_synack(struct sock *sk)
 struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				 struct open_request *req)
 {
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcphdr *th;
 	int tcp_header_size;
@@ -1373,47 +1374,47 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	skb->dst = dst_clone(dst);
 
 	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
-			   (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
-			   (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
+			   (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
+			   (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
 			   /* SACK_PERM is in the place of NOP NOP of TS */
-			   ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+			   ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
 	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
 
 	memset(th, 0, sizeof(struct tcphdr));
 	th->syn = 1;
 	th->ack = 1;
 	if (dst->dev->features&NETIF_F_TSO)
-		req->ecn_ok = 0;
+		ireq->ecn_ok = 0;
 	TCP_ECN_make_synack(req, th);
 	th->source = inet_sk(sk)->sport;
-	th->dest = req->rmt_port;
-	TCP_SKB_CB(skb)->seq = req->snt_isn;
+	th->dest = ireq->rmt_port;
+	TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
 	TCP_SKB_CB(skb)->sacked = 0;
 	skb_shinfo(skb)->tso_segs = 1;
 	skb_shinfo(skb)->tso_size = 0;
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
-	th->ack_seq = htonl(req->rcv_isn + 1);
+	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
 		__u8 rcv_wscale; 
 		/* Set this up on the first call only */
 		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
 		/* tcp_full_space because it is guaranteed to be the first packet */
 		tcp_select_initial_window(tcp_full_space(sk), 
-			dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
 			&req->rcv_wnd,
 			&req->window_clamp,
-			req->wscale_ok,
+			ireq->wscale_ok,
 			&rcv_wscale);
-		req->rcv_wscale = rcv_wscale; 
+		ireq->rcv_wscale = rcv_wscale; 
 	}
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(req->rcv_wnd);
 
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok,
-			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
+	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
+			      ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
 			      TCP_SKB_CB(skb)->when,
 			      req->ts_recent);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 799ebe061e2c..ba30ca0aa6a3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -513,7 +513,7 @@ static void tcp_synack_timer(struct sock *sk)
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
 				if ((req->retrans < thresh ||
-				     (req->acked && req->retrans < max_retries))
+				     (inet_rsk(req)->acked && req->retrans < max_retries))
 				    && !req->class->rtx_syn_ack(sk, req, NULL)) {
 					unsigned long timeo;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f69e800a0ad..9199ad2fde0d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -407,11 +407,13 @@ static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
 	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 	     (req = *prev) != NULL;
 	     prev = &req->dl_next) {
-		if (req->rmt_port == rport &&
+		const struct tcp6_request_sock *treq = tcp6_rsk(req);
+
+		if (inet_rsk(req)->rmt_port == rport &&
 		    req->class->family == AF_INET6 &&
-		    ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
-		    ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
-		    (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
+		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
+		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
+		    (!treq->iif || treq->iif == iif)) {
 			BUG_TRAP(req->sk == NULL);
 			*prevp = prev;
 			return req;
@@ -923,7 +925,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		 */
 		BUG_TRAP(req->sk == NULL);
 
-		if (seq != req->snt_isn) {
+		if (seq != tcp_rsk(req)->snt_isn) {
 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
@@ -960,6 +962,7 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
 			      struct dst_entry *dst)
 {
+	struct tcp6_request_sock *treq = tcp6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
@@ -969,19 +972,19 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
-	ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
+	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 	fl.fl6_flowlabel = 0;
-	fl.oif = req->af.v6_req.iif;
-	fl.fl_ip_dport = req->rmt_port;
+	fl.oif = treq->iif;
+	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
 
 	if (dst == NULL) {
 		opt = np->opt;
 		if (opt == NULL &&
 		    np->rxopt.bits.srcrt == 2 &&
-		    req->af.v6_req.pktopts) {
-			struct sk_buff *pktopts = req->af.v6_req.pktopts;
+		    treq->pktopts) {
+			struct sk_buff *pktopts = treq->pktopts;
 			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 			if (rxopt->srcrt)
 				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
@@ -1008,10 +1011,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
 		struct tcphdr *th = skb->h.th;
 
 		th->check = tcp_v6_check(th, skb->len,
-					 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
+					 &treq->loc_addr, &treq->rmt_addr,
 					 csum_partial((char *)th, skb->len, skb->csum));
 
-		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 		err = ip6_xmit(sk, skb, &fl, opt, 0);
 		if (err == NET_XMIT_CN)
 			err = 0;
@@ -1026,12 +1029,13 @@ done:
 
 static void tcp_v6_or_free(struct open_request *req)
 {
-	if (req->af.v6_req.pktopts)
-		kfree_skb(req->af.v6_req.pktopts);
+	if (tcp6_rsk(req)->pktopts)
+		kfree_skb(tcp6_rsk(req)->pktopts);
 }
 
 static struct or_calltable or_ipv6 = {
 	.family		=	AF_INET6,
+	.obj_size	=	sizeof(struct tcp6_request_sock),
 	.rtx_syn_ack	=	tcp_v6_send_synack,
 	.send_ack	=	tcp_v6_or_send_ack,
 	.destructor	=	tcp_v6_or_free,
@@ -1221,7 +1225,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
 {
-	tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
+	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
 }
 
 
@@ -1264,7 +1268,7 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_listen_opt *lopt = tp->listen_opt;
-	u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
+	u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
 
 	req->sk = NULL;
 	req->expires = jiffies + TCP_TIMEOUT_INIT;
@@ -1284,6 +1288,7 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_options_received tmp_opt;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1308,7 +1313,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
 		goto drop;
 
-	req = tcp_openreq_alloc();
+	req = tcp_openreq_alloc(&or_ipv6);
 	if (req == NULL)
 		goto drop;
 
@@ -1321,28 +1326,28 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	req->class = &or_ipv6;
-	ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
+	treq = tcp6_rsk(req);
+	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
 	TCP_ECN_create_request(req, skb->h.th);
-	req->af.v6_req.pktopts = NULL;
+	treq->pktopts = NULL;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo ||
 	    np->rxopt.bits.rxhlim) {
 		atomic_inc(&skb->users);
-		req->af.v6_req.pktopts = skb;
+		treq->pktopts = skb;
 	}
-	req->af.v6_req.iif = sk->sk_bound_dev_if;
+	treq->iif = sk->sk_bound_dev_if;
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		req->af.v6_req.iif = tcp_v6_iif(skb);
+	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		treq->iif = tcp_v6_iif(skb);
 
 	if (isn == 0) 
 		isn = tcp_v6_init_sequence(sk,skb);
 
-	req->snt_isn = isn;
+	tcp_rsk(req)->snt_isn = isn;
 
 	if (tcp_v6_send_synack(sk, req, NULL))
 		goto drop;
@@ -1363,6 +1368,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct open_request *req,
 					  struct dst_entry *dst)
 {
+	struct tcp6_request_sock *treq = tcp6_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1426,10 +1432,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto out_overflow;
 
 	if (np->rxopt.bits.srcrt == 2 &&
-	    opt == NULL && req->af.v6_req.pktopts) {
-		struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
+	    opt == NULL && treq->pktopts) {
+		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
 		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
+			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
@@ -1438,16 +1444,16 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 		if (opt && opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 			ipv6_addr_copy(&final, &fl.fl6_dst);
 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 			final_p = &final;
 		}
-		ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
+		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
-		fl.fl_ip_dport = req->rmt_port;
+		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1482,10 +1488,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
-	ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
-	ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
-	newsk->sk_bound_dev_if = req->af.v6_req.iif;
+	ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
+	ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
+	ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
+	newsk->sk_bound_dev_if = treq->iif;
 
 	/* Now IPv6 options... 
 
@@ -1498,11 +1504,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (req->af.v6_req.pktopts) {
-		newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
-					      GFP_ATOMIC);
-		kfree_skb(req->af.v6_req.pktopts);
-		req->af.v6_req.pktopts = NULL;
+	if (treq->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
+		kfree_skb(treq->pktopts);
+		treq->pktopts = NULL;
 		if (newnp->pktoptions)
 			skb_set_owner_r(newnp->pktoptions, newsk);
 	}
@@ -2058,8 +2063,8 @@ static void get_openreq6(struct seq_file *seq,
 	if (ttd < 0)
 		ttd = 0;
 
-	src = &req->af.v6_req.loc_addr;
-	dest = &req->af.v6_req.rmt_addr;
+	src = &tcp6_rsk(req)->loc_addr;
+	dest = &tcp6_rsk(req)->rmt_addr;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -2069,7 +2074,7 @@ static void get_openreq6(struct seq_file *seq,
 		   ntohs(inet_sk(sk)->sport),
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
-		   ntohs(req->rmt_port),
+		   ntohs(inet_rsk(req)->rmt_port),
 		   TCP_SYN_RECV,
 		   0,0, /* could print option size, but that is af dependent. */
 		   1,   /* timers active (only the expire timer) */  
@@ -2239,6 +2244,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
+	.rsk_prot		= &or_ipv6,
 };
 
 static struct inet6_protocol tcpv6_protocol = {
-- 
cgit v1.2.3


From 0e87506fcc734647c7b2497eee4eb81e785c857a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:47:59 -0700
Subject: [NET] Generalise tcp_listen_opt

This chunks out the accept_queue and tcp_listen_opt code and moves
them to net/core/request_sock.c and include/net/request_sock.h, to
make it useful for other transport protocols, DCCP being the first one
to use it.

Next patches will rename tcp_listen_opt to accept_sock and remove the
inline tcp functions that just call a reqsk_queue_ function.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h        |  17 +----
 include/net/request_sock.h | 178 +++++++++++++++++++++++++++++++++++++++++++++
 include/net/tcp.h          |  46 ++----------
 net/core/Makefile          |   3 +-
 net/core/request_sock.c    |  48 ++++++++++++
 net/ipv4/tcp.c             |  67 ++++++-----------
 net/ipv4/tcp_diag.c        |   6 +-
 net/ipv4/tcp_ipv4.c        |  32 +++-----
 net/ipv4/tcp_minisocks.c   |   6 +-
 net/ipv4/tcp_timer.c       |  10 +--
 net/ipv6/tcp_ipv6.c        |  14 +---
 11 files changed, 281 insertions(+), 146 deletions(-)
 create mode 100644 net/core/request_sock.c

(limited to 'net/core')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fb54292a15aa..97a7c9e03df5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -379,22 +379,7 @@ struct tcp_sock {
 
 	__u32	total_retrans;	/* Total retransmits for entire connection */
 
-	/* The syn_wait_lock is necessary only to avoid proc interface having
-	 * to grab the main lock sock while browsing the listening hash
-	 * (otherwise it's deadlock prone).
-	 * This lock is acquired in read mode only from listening_get_next()
-	 * and it's acquired in write mode _only_ from code that is actively
-	 * changing the syn_wait_queue. All readers that are holding
-	 * the master sock lock don't need to grab this lock in read mode
-	 * too as the syn_wait_queue writes are always protected from
-	 * the main sock lock.
-	 */
-	rwlock_t		syn_wait_lock;
-	struct tcp_listen_opt	*listen_opt;
-
-	/* FIFO of established children */
-	struct request_sock	*accept_queue;
-	struct request_sock	*accept_queue_tail;
+	struct request_sock_queue accept_queue; /* FIFO of established children */
 
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 08a8fd1d1610..38943ed04e73 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -16,7 +16,9 @@
 #define _REQUEST_SOCK_H
 
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
+
 #include <net/sock.h>
 
 struct request_sock;
@@ -74,4 +76,180 @@ static inline void reqsk_free(struct request_sock *req)
 	__reqsk_free(req);
 }
 
+extern int sysctl_max_syn_backlog;
+
+/** struct tcp_listen_opt - listen state
+ *
+ * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
+ */
+struct tcp_listen_opt {
+	u8			max_qlen_log;
+	/* 3 bytes hole, try to use */
+	int			qlen;
+	int			qlen_young;
+	int			clock_hand;
+	u32			hash_rnd;
+	struct request_sock	*syn_table[0];
+};
+
+/** struct request_sock_queue - queue of request_socks
+ *
+ * @rskq_accept_head - FIFO head of established children
+ * @rskq_accept_tail - FIFO tail of established children
+ * @syn_wait_lock - serializer
+ *
+ * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
+ * lock sock while browsing the listening hash (otherwise it's deadlock prone).
+ *
+ * This lock is acquired in read mode only from listening_get_next() seq_file
+ * op and it's acquired in write mode _only_ from code that is actively
+ * changing rskq_accept_head. All readers that are holding the master sock lock
+ * don't need to grab this lock in read mode too as rskq_accept_head. writes
+ * are always protected from the main sock lock.
+ */
+struct request_sock_queue {
+	struct request_sock	*rskq_accept_head;
+	struct request_sock	*rskq_accept_tail;
+	rwlock_t		syn_wait_lock;
+	struct tcp_listen_opt	*listen_opt;
+};
+
+extern int reqsk_queue_alloc(struct request_sock_queue *queue,
+			     const int nr_table_entries);
+
+static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue)
+{
+	struct tcp_listen_opt *lopt;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	lopt = queue->listen_opt;
+	queue->listen_opt = NULL;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return lopt;
+}
+
+static inline void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	kfree(reqsk_queue_yank_listen_sk(queue));
+}
+
+static inline struct request_sock *
+	reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
+{
+	struct request_sock *req = queue->rskq_accept_head;
+
+	queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+	return req;
+}
+
+static inline int reqsk_queue_empty(struct request_sock_queue *queue)
+{
+	return queue->rskq_accept_head == NULL;
+}
+
+static inline void reqsk_queue_unlink(struct request_sock_queue *queue,
+				      struct request_sock *req,
+				      struct request_sock **prev_req)
+{
+	write_lock(&queue->syn_wait_lock);
+	*prev_req = req->dl_next;
+	write_unlock(&queue->syn_wait_lock);
+}
+
+static inline void reqsk_queue_add(struct request_sock_queue *queue,
+				   struct request_sock *req,
+				   struct sock *parent,
+				   struct sock *child)
+{
+	req->sk = child;
+	sk_acceptq_added(parent);
+
+	if (queue->rskq_accept_head == NULL)
+		queue->rskq_accept_head = req;
+	else
+		queue->rskq_accept_tail->dl_next = req;
+
+	queue->rskq_accept_tail = req;
+	req->dl_next = NULL;
+}
+
+static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue)
+{
+	struct request_sock *req = queue->rskq_accept_head;
+
+	BUG_TRAP(req != NULL);
+
+	queue->rskq_accept_head = req->dl_next;
+	if (queue->rskq_accept_head == NULL)
+		queue->rskq_accept_tail = NULL;
+
+	return req;
+}
+
+static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
+						 struct sock *parent)
+{
+	struct request_sock *req = reqsk_queue_remove(queue);
+	struct sock *child = req->sk;
+
+	BUG_TRAP(child != NULL);
+
+	sk_acceptq_removed(parent);
+	__reqsk_free(req);
+	return child;
+}
+
+static inline int reqsk_queue_removed(struct request_sock_queue *queue,
+				      struct request_sock *req)
+{
+	struct tcp_listen_opt *lopt = queue->listen_opt;
+
+	if (req->retrans == 0)
+		--lopt->qlen_young;
+
+	return --lopt->qlen;
+}
+
+static inline int reqsk_queue_added(struct request_sock_queue *queue)
+{
+	struct tcp_listen_opt *lopt = queue->listen_opt;
+	const int prev_qlen = lopt->qlen;
+
+	lopt->qlen_young++;
+	lopt->qlen++;
+	return prev_qlen;
+}
+
+static inline int reqsk_queue_len(struct request_sock_queue *queue)
+{
+	return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
+}
+
+static inline int reqsk_queue_len_young(struct request_sock_queue *queue)
+{
+	return queue->listen_opt->qlen_young;
+}
+
+static inline int reqsk_queue_is_full(struct request_sock_queue *queue)
+{
+	return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
+}
+
+static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
+					u32 hash, struct request_sock *req,
+					unsigned timeout)
+{
+	struct tcp_listen_opt *lopt = queue->listen_opt;
+
+	req->expires = jiffies + timeout;
+	req->retrans = 0;
+	req->sk = NULL;
+	req->dl_next = lopt->syn_table[hash];
+
+	write_lock(&queue->syn_wait_lock);
+	lopt->syn_table[hash] = req;
+	write_unlock(&queue->syn_wait_lock);
+}
+
 #endif /* _REQUEST_SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6663086a5e35..a2e323c54457 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1686,71 +1686,41 @@ static inline int tcp_full_space(const struct sock *sk)
 static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
 					 struct sock *child)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	req->sk = child;
-	sk_acceptq_added(sk);
-
-	if (!tp->accept_queue_tail) {
-		tp->accept_queue = req;
-	} else {
-		tp->accept_queue_tail->dl_next = req;
-	}
-	tp->accept_queue_tail = req;
-	req->dl_next = NULL;
+	reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
 }
 
-struct tcp_listen_opt
-{
-	u8			max_qlen_log;	/* log_2 of maximal queued SYNs */
-	int			qlen;
-	int			qlen_young;
-	int			clock_hand;
-	u32			hash_rnd;
-	struct request_sock	*syn_table[TCP_SYNQ_HSIZE];
-};
-
 static inline void
 tcp_synq_removed(struct sock *sk, struct request_sock *req)
 {
-	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
-
-	if (--lopt->qlen == 0)
+	if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
 		tcp_delete_keepalive_timer(sk);
-	if (req->retrans == 0)
-		lopt->qlen_young--;
 }
 
 static inline void tcp_synq_added(struct sock *sk)
 {
-	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
-
-	if (lopt->qlen++ == 0)
+	if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
 		tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
-	lopt->qlen_young++;
 }
 
 static inline int tcp_synq_len(struct sock *sk)
 {
-	return tcp_sk(sk)->listen_opt->qlen;
+	return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
 }
 
 static inline int tcp_synq_young(struct sock *sk)
 {
-	return tcp_sk(sk)->listen_opt->qlen_young;
+	return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
 }
 
 static inline int tcp_synq_is_full(struct sock *sk)
 {
-	return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log;
+	return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
 }
 
 static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
-				       struct request_sock **prev)
+				   struct request_sock **prev)
 {
-	write_lock(&tp->syn_wait_lock);
-	*prev = req->dl_next;
-	write_unlock(&tp->syn_wait_lock);
+	reqsk_queue_unlink(&tp->accept_queue, req, prev);
 }
 
 static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
diff --git a/net/core/Makefile b/net/core/Makefile
index 81f03243fe2f..5e0c56b7f607 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,8 @@
 # Makefile for the Linux networking core.
 #
 
-obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o
+obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+	 gen_stats.o gen_estimator.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 000000000000..1258333ca007
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,48 @@
+/*
+ * NET		Generic infrastructure for Network protocols.
+ *
+ * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * 		From code originally in include/net/tcp.h
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <net/request_sock.h>
+
+int reqsk_queue_alloc(struct request_sock_queue *queue,
+		      const int nr_table_entries)
+{
+	const int lopt_size = sizeof(struct tcp_listen_opt) +
+			      nr_table_entries * sizeof(struct request_sock *);
+	struct tcp_listen_opt *lopt = kmalloc(lopt_size, GFP_KERNEL);
+
+	if (lopt == NULL)
+		return -ENOMEM;
+
+	memset(lopt, 0, lopt_size);
+
+	for (lopt->max_qlen_log = 6;
+	     (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
+	     lopt->max_qlen_log++);
+
+	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
+	rwlock_init(&queue->syn_wait_lock);
+	queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	queue->listen_opt = lopt;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(reqsk_queue_alloc);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1c29feb6b35f..b85a46dd40a0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
 static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
 					       poll_table *wait)
 {
-	return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0;
+	return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
 }
 
 /*
@@ -462,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt;
+	int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
+
+	if (rc != 0)
+		return rc;
 
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
-	tp->accept_queue = tp->accept_queue_tail = NULL;
-	rwlock_init(&tp->syn_wait_lock);
 	tcp_delack_init(tp);
 
-	lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
-	if (!lopt)
-		return -ENOMEM;
-
-	memset(lopt, 0, sizeof(struct tcp_listen_opt));
-	for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
-		if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
-			break;
-	get_random_bytes(&lopt->hash_rnd, 4);
-
-	write_lock_bh(&tp->syn_wait_lock);
-	tp->listen_opt = lopt;
-	write_unlock_bh(&tp->syn_wait_lock);
-
 	/* There is race window here: we announce ourselves listening,
 	 * but this transition is still not validated by get_port().
 	 * It is OK, because this socket enters to hash table only
@@ -500,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
 	}
 
 	sk->sk_state = TCP_CLOSE;
-	write_lock_bh(&tp->syn_wait_lock);
-	tp->listen_opt = NULL;
-	write_unlock_bh(&tp->syn_wait_lock);
-	kfree(lopt);
+	reqsk_queue_destroy(&tp->accept_queue);
 	return -EADDRINUSE;
 }
 
@@ -515,18 +499,16 @@ int tcp_listen_start(struct sock *sk)
 static void tcp_listen_stop (struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->listen_opt;
-	struct request_sock *acc_req = tp->accept_queue;
+	struct tcp_listen_opt *lopt;
+	struct request_sock *acc_req;
 	struct request_sock *req;
 	int i;
 
 	tcp_delete_keepalive_timer(sk);
 
 	/* make all the listen_opt local to us */
-	write_lock_bh(&tp->syn_wait_lock);
-	tp->listen_opt = NULL;
-	write_unlock_bh(&tp->syn_wait_lock);
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+	lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
+	acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
 
 	if (lopt->qlen) {
 		for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
@@ -1867,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
 		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
-		if (!tp->accept_queue)
+		if (reqsk_queue_empty(&tp->accept_queue))
 			timeo = schedule_timeout(timeo);
 		lock_sock(sk);
 		err = 0;
-		if (tp->accept_queue)
+		if (!reqsk_queue_empty(&tp->accept_queue))
 			break;
 		err = -EINVAL;
 		if (sk->sk_state != TCP_LISTEN)
@@ -1894,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
 struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct request_sock *req;
 	struct sock *newsk;
 	int error;
 
@@ -1905,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 	 */
 	error = -EINVAL;
 	if (sk->sk_state != TCP_LISTEN)
-		goto out;
+		goto out_err;
 
 	/* Find already established connection */
-	if (!tp->accept_queue) {
+	if (reqsk_queue_empty(&tp->accept_queue)) {
 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 		/* If this is a non blocking socket don't sleep */
 		error = -EAGAIN;
 		if (!timeo)
-			goto out;
+			goto out_err;
 
 		error = wait_for_connect(sk, timeo);
 		if (error)
-			goto out;
+			goto out_err;
 	}
 
-	req = tp->accept_queue;
-	if ((tp->accept_queue = req->dl_next) == NULL)
-		tp->accept_queue_tail = NULL;
-
- 	newsk = req->sk;
-	sk_acceptq_removed(sk);
-	__reqsk_free(req);
+	newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
 	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
-	release_sock(sk);
-	return newsk;
-
 out:
 	release_sock(sk);
+	return newsk;
+out_err:
+	newsk = NULL;
 	*err = error;
-	return NULL;
+	goto out;
 }
 
 /*
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 67277800d0c1..c3328fa48837 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -529,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 	entry.family = sk->sk_family;
 
-	read_lock_bh(&tp->syn_wait_lock);
+	read_lock_bh(&tp->accept_queue.syn_wait_lock);
 
-	lopt = tp->listen_opt;
+	lopt = tp->accept_queue.listen_opt;
 	if (!lopt || !lopt->qlen)
 		goto out;
 
@@ -588,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 	}
 
 out:
-	read_unlock_bh(&tp->syn_wait_lock);
+	read_unlock_bh(&tp->accept_queue.syn_wait_lock);
 
 	return err;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 95528a75a63d..1745dc8d25e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -874,7 +874,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
 					      __u16 rport,
 					      __u32 raddr, __u32 laddr)
 {
-	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
 	struct request_sock *req, **prev;
 
 	for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -898,18 +898,10 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
 static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
 	u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
 
-	req->expires = jiffies + TCP_TIMEOUT_INIT;
-	req->retrans = 0;
-	req->sk = NULL;
-	req->dl_next = lopt->syn_table[h];
-
-	write_lock(&tp->syn_wait_lock);
-	lopt->syn_table[h] = req;
-	write_unlock(&tp->syn_wait_lock);
-
+	reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
 	tcp_synq_added(sk);
 }
 
@@ -2167,17 +2159,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 			if (++st->sbucket >= TCP_SYNQ_HSIZE)
 				break;
 get_req:
-			req = tp->listen_opt->syn_table[st->sbucket];
+			req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
 		}
 		sk	  = sk_next(st->syn_wait_sk);
 		st->state = TCP_SEQ_STATE_LISTENING;
-		read_unlock_bh(&tp->syn_wait_lock);
+		read_unlock_bh(&tp->accept_queue.syn_wait_lock);
 	} else {
 	       	tp = tcp_sk(sk);
-		read_lock_bh(&tp->syn_wait_lock);
-		if (tp->listen_opt && tp->listen_opt->qlen)
+		read_lock_bh(&tp->accept_queue.syn_wait_lock);
+		if (reqsk_queue_len(&tp->accept_queue))
 			goto start_req;
-		read_unlock_bh(&tp->syn_wait_lock);
+		read_unlock_bh(&tp->accept_queue.syn_wait_lock);
 		sk = sk_next(sk);
 	}
 get_sk:
@@ -2187,8 +2179,8 @@ get_sk:
 			goto out;
 		}
 	       	tp = tcp_sk(sk);
-		read_lock_bh(&tp->syn_wait_lock);
-		if (tp->listen_opt && tp->listen_opt->qlen) {
+		read_lock_bh(&tp->accept_queue.syn_wait_lock);
+		if (reqsk_queue_len(&tp->accept_queue)) {
 start_req:
 			st->uid		= sock_i_uid(sk);
 			st->syn_wait_sk = sk;
@@ -2196,7 +2188,7 @@ start_req:
 			st->sbucket	= 0;
 			goto get_req;
 		}
-		read_unlock_bh(&tp->syn_wait_lock);
+		read_unlock_bh(&tp->accept_queue.syn_wait_lock);
 	}
 	if (++st->bucket < TCP_LHTABLE_SIZE) {
 		sk = sk_head(&tcp_listening_hash[st->bucket]);
@@ -2383,7 +2375,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	case TCP_SEQ_STATE_OPENREQ:
 		if (v) {
 			struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
-			read_unlock_bh(&tp->syn_wait_lock);
+			read_unlock_bh(&tp->accept_queue.syn_wait_lock);
 		}
 	case TCP_SEQ_STATE_LISTENING:
 		if (v != SEQ_START_TOKEN)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0e6d525a8341..b3943e7562f3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -790,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->probes_out = 0;
 		newtp->rx_opt.num_sacks = 0;
 		newtp->urg_data = 0;
-		newtp->listen_opt = NULL;
-		newtp->accept_queue = newtp->accept_queue_tail = NULL;
-		/* Deinitialize syn_wait_lock to trap illegal accesses. */
-		memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
+		/* Deinitialize accept_queue to trap illegal accesses. */
+		memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
 
 		/* Back to base struct sock members. */
 		newsk->sk_err = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index f03efe5fb76a..d97d191149c1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,7 +464,7 @@ out_unlock:
 static void tcp_synack_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
 	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 	int thresh = max_retries;
 	unsigned long now = jiffies;
@@ -527,12 +527,8 @@ static void tcp_synack_timer(struct sock *sk)
 				}
 
 				/* Drop this request */
-				write_lock(&tp->syn_wait_lock);
-				*reqp = req->dl_next;
-				write_unlock(&tp->syn_wait_lock);
-				lopt->qlen--;
-				if (req->retrans == 0)
-					lopt->qlen_young--;
+				tcp_synq_unlink(tp, req, reqp);
+				reqsk_queue_removed(&tp->accept_queue, req);
 				reqsk_free(req);
 				continue;
 			}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 068cd4a8c292..84091daad6b5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -401,7 +401,7 @@ static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
 					      struct in6_addr *laddr,
 					      int iif)
 {
-	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
 	struct request_sock *req, **prev;  
 
 	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -1267,18 +1267,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
 	u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
 
-	req->sk = NULL;
-	req->expires = jiffies + TCP_TIMEOUT_INIT;
-	req->retrans = 0;
-	req->dl_next = lopt->syn_table[h];
-
-	write_lock(&tp->syn_wait_lock);
-	lopt->syn_table[h] = req;
-	write_unlock(&tp->syn_wait_lock);
-
+	reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
 	tcp_synq_added(sk);
 }
 
-- 
cgit v1.2.3


From 2ad69c55a282315e6119cf7fd744f26a925bdfd2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:48:55 -0700
Subject: [NET] rename struct tcp_listen_opt to struct listen_sock

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 16 ++++++++--------
 net/core/request_sock.c    |  4 ++--
 net/ipv4/tcp.c             |  2 +-
 net/ipv4/tcp_diag.c        |  2 +-
 net/ipv4/tcp_ipv4.c        |  4 ++--
 net/ipv4/tcp_timer.c       |  2 +-
 net/ipv6/tcp_ipv6.c        |  4 ++--
 7 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net/core')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 38943ed04e73..72fd6f5e86b1 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -78,11 +78,11 @@ static inline void reqsk_free(struct request_sock *req)
 
 extern int sysctl_max_syn_backlog;
 
-/** struct tcp_listen_opt - listen state
+/** struct listen_sock - listen state
  *
  * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
  */
-struct tcp_listen_opt {
+struct listen_sock {
 	u8			max_qlen_log;
 	/* 3 bytes hole, try to use */
 	int			qlen;
@@ -111,15 +111,15 @@ struct request_sock_queue {
 	struct request_sock	*rskq_accept_head;
 	struct request_sock	*rskq_accept_tail;
 	rwlock_t		syn_wait_lock;
-	struct tcp_listen_opt	*listen_opt;
+	struct listen_sock	*listen_opt;
 };
 
 extern int reqsk_queue_alloc(struct request_sock_queue *queue,
 			     const int nr_table_entries);
 
-static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue)
+static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue)
 {
-	struct tcp_listen_opt *lopt;
+	struct listen_sock *lopt;
 
 	write_lock_bh(&queue->syn_wait_lock);
 	lopt = queue->listen_opt;
@@ -203,7 +203,7 @@ static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queu
 static inline int reqsk_queue_removed(struct request_sock_queue *queue,
 				      struct request_sock *req)
 {
-	struct tcp_listen_opt *lopt = queue->listen_opt;
+	struct listen_sock *lopt = queue->listen_opt;
 
 	if (req->retrans == 0)
 		--lopt->qlen_young;
@@ -213,7 +213,7 @@ static inline int reqsk_queue_removed(struct request_sock_queue *queue,
 
 static inline int reqsk_queue_added(struct request_sock_queue *queue)
 {
-	struct tcp_listen_opt *lopt = queue->listen_opt;
+	struct listen_sock *lopt = queue->listen_opt;
 	const int prev_qlen = lopt->qlen;
 
 	lopt->qlen_young++;
@@ -240,7 +240,7 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
 					u32 hash, struct request_sock *req,
 					unsigned timeout)
 {
-	struct tcp_listen_opt *lopt = queue->listen_opt;
+	struct listen_sock *lopt = queue->listen_opt;
 
 	req->expires = jiffies + timeout;
 	req->retrans = 0;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 1258333ca007..78fd60a46bf2 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -21,9 +21,9 @@
 int reqsk_queue_alloc(struct request_sock_queue *queue,
 		      const int nr_table_entries)
 {
-	const int lopt_size = sizeof(struct tcp_listen_opt) +
+	const int lopt_size = sizeof(struct listen_sock) +
 			      nr_table_entries * sizeof(struct request_sock *);
-	struct tcp_listen_opt *lopt = kmalloc(lopt_size, GFP_KERNEL);
+	struct listen_sock *lopt = kmalloc(lopt_size, GFP_KERNEL);
 
 	if (lopt == NULL)
 		return -ENOMEM;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b85a46dd40a0..3a4c52e77e01 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -499,7 +499,7 @@ int tcp_listen_start(struct sock *sk)
 static void tcp_listen_stop (struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt;
+	struct listen_sock *lopt;
 	struct request_sock *acc_req;
 	struct request_sock *req;
 	int i;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index c3328fa48837..634befc07921 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -514,7 +514,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 	struct tcpdiag_entry entry;
 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt;
+	struct listen_sock *lopt;
 	struct rtattr *bc = NULL;
 	struct inet_sock *inet = inet_sk(sk);
 	int j, s_j;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1745dc8d25e6..485ca9cb1707 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -874,7 +874,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
 					      __u16 rport,
 					      __u32 raddr, __u32 laddr)
 {
-	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
+	struct listen_sock *lopt = tp->accept_queue.listen_opt;
 	struct request_sock *req, **prev;
 
 	for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -898,7 +898,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
 static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
+	struct listen_sock *lopt = tp->accept_queue.listen_opt;
 	u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
 
 	reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d97d191149c1..b127b4498565 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,7 +464,7 @@ out_unlock:
 static void tcp_synack_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
+	struct listen_sock *lopt = tp->accept_queue.listen_opt;
 	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 	int thresh = max_retries;
 	unsigned long now = jiffies;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 84091daad6b5..2414937f2a83 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -401,7 +401,7 @@ static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
 					      struct in6_addr *laddr,
 					      int iif)
 {
-	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
+	struct listen_sock *lopt = tp->accept_queue.listen_opt;
 	struct request_sock *req, **prev;  
 
 	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -1267,7 +1267,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
+	struct listen_sock *lopt = tp->accept_queue.listen_opt;
 	u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
 
 	reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
-- 
cgit v1.2.3


From e52c1f17e4ea8e61bd26eb25f1a184202693c2b9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 18 Jun 2005 22:49:40 -0700
Subject: [NET]: Move sysctl_max_syn_backlog into request_sock.c

This fixes the CONFIG_INET=n build failure noticed
by Andrew Morton.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  1 -
 net/core/request_sock.c | 16 ++++++++++++++++
 net/ipv4/tcp_ipv4.c     | 16 ----------------
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'net/core')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a2e323c54457..f730935b824a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -564,7 +564,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
 #define TCP_NAGLE_PUSH		4	/* Cork is overriden for already queued data */
 
 /* sysctl variables for tcp */
-extern int sysctl_max_syn_backlog;
 extern int sysctl_tcp_timestamps;
 extern int sysctl_tcp_window_scaling;
 extern int sysctl_tcp_sack;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 78fd60a46bf2..bb55675f0685 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -18,6 +18,22 @@
 
 #include <net/request_sock.h>
 
+/*
+ * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
+ * One SYN_RECV socket costs about 80bytes on a 32bit machine.
+ * It would be better to replace it with a global counter for all sockets
+ * but then some measure against one socket starving all other sockets
+ * would be needed.
+ *
+ * It was 128 by default. Experiments with real servers show, that
+ * it is absolutely not enough even at 100conn/sec. 256 cures most
+ * of problems. This value is adjusted to 128 for very small machines
+ * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
+ * Further increasing requires to change hash table size.
+ */
+int sysctl_max_syn_backlog = 256;
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
+
 int reqsk_queue_alloc(struct request_sock_queue *queue,
 		      const int nr_table_entries)
 {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 485ca9cb1707..2d41d5d6ad19 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1366,21 +1366,6 @@ static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
 	return dopt;
 }
 
-/*
- * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
- * One SYN_RECV socket costs about 80bytes on a 32bit machine.
- * It would be better to replace it with a global counter for all sockets
- * but then some measure against one socket starving all other sockets
- * would be needed.
- *
- * It was 128 by default. Experiments with real servers show, that
- * it is absolutely not enough even at 100conn/sec. 256 cures most
- * of problems. This value is adjusted to 128 for very small machines
- * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
- * Further increasing requires to change hash table size.
- */
-int sysctl_max_syn_backlog = 256;
-
 struct request_sock_ops tcp_request_sock_ops = {
 	.family		=	PF_INET,
 	.obj_size	=	sizeof(struct tcp_request_sock),
@@ -2662,7 +2647,6 @@ EXPORT_SYMBOL(tcp_proc_register);
 EXPORT_SYMBOL(tcp_proc_unregister);
 #endif
 EXPORT_SYMBOL(sysctl_local_port_range);
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
 EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
 
-- 
cgit v1.2.3


From c7fb64db001f83ece669c76a02d8ec2fdb1dd307 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:50:55 -0700
Subject: [NETLINK]: Neighbour table configuration and statistics via rtnetlink

To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
NLM_F_DUMP flag set. Every neighbour table configuration is
spread over multiple messages to avoid running into message
size limits on systems with many interfaces. The first message
in the sequence transports all not device specific data such as
statistics, configuration, and the default parameter set.
This message is followed by 0..n messages carrying device
specific parameter sets.

Although the ordering should be sufficient, NDTA_NAME can be
used to identify sequences. The initial message can be identified
by checking for NDTA_CONFIG. The device specific messages do
not contain this TLV but have NDTPA_IFINDEX set to the
corresponding interface index.

To change neighbour table attributes, send RTM_SETNEIGHTBL
with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
otherwise. Device specific parameter sets can be changed by
setting NDTPA_IFINDEX to the interface index of the corresponding
device.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h   | 107 +++++++++++++++
 include/net/neighbour.h     |   4 +
 net/core/neighbour.c        | 317 +++++++++++++++++++++++++++++++++++++++++++-
 net/core/rtnetlink.c        |  20 +--
 security/selinux/nlmsgtab.c |   2 +
 5 files changed, 439 insertions(+), 11 deletions(-)

(limited to 'net/core')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a09b5d42babf..5a5cda160267 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,6 +89,13 @@ enum {
 	RTM_GETANYCAST	= 62,
 #define RTM_GETANYCAST	RTM_GETANYCAST
 
+	RTM_NEWNEIGHTBL	= 64,
+#define RTM_NEWNEIGHTBL	RTM_NEWNEIGHTBL
+	RTM_GETNEIGHTBL	= 66,
+#define RTM_GETNEIGHTBL	RTM_GETNEIGHTBL
+	RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL	RTM_SETNEIGHTBL
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
@@ -493,6 +500,106 @@ struct nda_cacheinfo
 	__u32		ndm_refcnt;
 };
 
+
+/*****************************************************************
+ *		Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats
+{
+	__u64		ndts_allocs;
+	__u64		ndts_destroys;
+	__u64		ndts_hash_grows;
+	__u64		ndts_res_failed;
+	__u64		ndts_lookups;
+	__u64		ndts_hits;
+	__u64		ndts_rcv_probes_mcast;
+	__u64		ndts_rcv_probes_ucast;
+	__u64		ndts_periodic_gc_runs;
+	__u64		ndts_forced_gc_runs;
+};
+
+enum {
+	NDTPA_UNSPEC,
+	NDTPA_IFINDEX,			/* u32, unchangeable */
+	NDTPA_REFCNT,			/* u32, read-only */
+	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
+	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
+	NDTPA_RETRANS_TIME,		/* u64, msecs */
+	NDTPA_GC_STALETIME,		/* u64, msecs */
+	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
+	NDTPA_QUEUE_LEN,		/* u32 */
+	NDTPA_APP_PROBES,		/* u32 */
+	NDTPA_UCAST_PROBES,		/* u32 */
+	NDTPA_MCAST_PROBES,		/* u32 */
+	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_QLEN,		/* u32 */
+	NDTPA_LOCKTIME,			/* u64, msecs */
+	__NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+	__u8		ndtm_family;
+	__u8		ndtm_pad1;
+	__u16		ndtm_pad2;
+};
+
+struct ndt_config
+{
+	__u16		ndtc_key_len;
+	__u16		ndtc_entry_size;
+	__u32		ndtc_entries;
+	__u32		ndtc_last_flush;	/* delta to now in msecs */
+	__u32		ndtc_last_rand;		/* delta to now in msecs */
+	__u32		ndtc_hash_rnd;
+	__u32		ndtc_hash_mask;
+	__u32		ndtc_hash_chain_gc;
+	__u32		ndtc_proxy_qlen;
+};
+
+enum {
+	NDTA_UNSPEC,
+	NDTA_NAME,			/* char *, unchangeable */
+	NDTA_THRESH1,			/* u32 */
+	NDTA_THRESH2,			/* u32 */
+	NDTA_THRESH3,			/* u32 */
+	NDTA_CONFIG,			/* struct ndt_config, read-only */
+	NDTA_PARMS,			/* nested TLV NDTPA_* */
+	NDTA_STATS,			/* struct ndt_stats, read-only */
+	NDTA_GC_INTERVAL,		/* u64, msecs */
+	__NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
+		     NLMSG_ALIGN(sizeof(struct ndtmsg))))
+#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
+
+
 /****
  *		General form of address family dependent message.
  ****/
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 4f33bbc21e7f..17191ac9be70 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -65,6 +65,7 @@ struct neighbour;
 
 struct neigh_parms
 {
+	struct net_device *dev;
 	struct neigh_parms *next;
 	int	(*neigh_setup)(struct neighbour *);
 	struct neigh_table *tbl;
@@ -252,6 +253,9 @@ extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
 extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
 extern void neigh_app_ns(struct neighbour *n);
 
+extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb);
+extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
+
 extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie);
 extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *));
 extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *));
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 43bdc521e20d..0841ac78c67d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1276,9 +1276,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 		INIT_RCU_HEAD(&p->rcu_head);
 		p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
-		if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) {
-			kfree(p);
-			return NULL;
+		if (dev) {
+			if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
+				kfree(p);
+				return NULL;
+			}
+
+			dev_hold(dev);
+			p->dev = dev;
 		}
 		p->sysctl_table = NULL;
 		write_lock_bh(&tbl->lock);
@@ -1309,6 +1314,8 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 			*p = parms->next;
 			parms->dead = 1;
 			write_unlock_bh(&tbl->lock);
+			if (parms->dev)
+				dev_put(parms->dev);
 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
 			return;
 		}
@@ -1546,6 +1553,308 @@ out:
 	return err;
 }
 
+static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
+{
+	struct rtattr *nest = RTA_NEST(skb, NDTA_PARMS);
+
+	if (parms->dev)
+		RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+
+	RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+	RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+	RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+	RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+	RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+	RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+	RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+	RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+		      parms->base_reachable_time);
+	RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+	RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+	RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+	RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+	RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+	RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+
+	return RTA_NEST_END(skb, nest);
+
+rtattr_failure:
+	return RTA_NEST_CANCEL(skb, nest);
+}
+
+static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
+			      struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh;
+	struct ndtmsg *ndtmsg;
+
+	nlh = NLMSG_PUT_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg));
+	ndtmsg = NLMSG_DATA(nlh);
+
+	NLMSG_SET_MULTIPART(nlh);
+
+	read_lock_bh(&tbl->lock);
+	ndtmsg->ndtm_family = tbl->family;
+
+	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+	RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+	RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+	RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+	RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+
+	{
+		unsigned long now = jiffies;
+		unsigned int flush_delta = now - tbl->last_flush;
+		unsigned int rand_delta = now - tbl->last_rand;
+
+		struct ndt_config ndc = {
+			.ndtc_key_len		= tbl->key_len,
+			.ndtc_entry_size	= tbl->entry_size,
+			.ndtc_entries		= atomic_read(&tbl->entries),
+			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
+			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
+			.ndtc_hash_rnd		= tbl->hash_rnd,
+			.ndtc_hash_mask		= tbl->hash_mask,
+			.ndtc_hash_chain_gc	= tbl->hash_chain_gc,
+			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
+		};
+
+		RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+	}
+
+	{
+		int cpu;
+		struct ndt_stats ndst;
+
+		memset(&ndst, 0, sizeof(ndst));
+
+		for (cpu = 0; cpu < NR_CPUS; cpu++) {
+			struct neigh_statistics	*st;
+
+			if (!cpu_possible(cpu))
+				continue;
+
+			st = per_cpu_ptr(tbl->stats, cpu);
+			ndst.ndts_allocs		+= st->allocs;
+			ndst.ndts_destroys		+= st->destroys;
+			ndst.ndts_hash_grows		+= st->hash_grows;
+			ndst.ndts_res_failed		+= st->res_failed;
+			ndst.ndts_lookups		+= st->lookups;
+			ndst.ndts_hits			+= st->hits;
+			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
+			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
+			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
+			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
+		}
+
+		RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+	}
+
+	BUG_ON(tbl->parms.dev);
+	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
+		goto rtattr_failure;
+
+	read_unlock_bh(&tbl->lock);
+	return NLMSG_END(skb, nlh);
+
+rtattr_failure:
+	read_unlock_bh(&tbl->lock);
+	return NLMSG_CANCEL(skb, nlh);
+ 
+nlmsg_failure:
+	return -1;
+}
+
+static int neightbl_fill_param_info(struct neigh_table *tbl,
+				    struct neigh_parms *parms,
+				    struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	struct ndtmsg *ndtmsg;
+	struct nlmsghdr *nlh;
+
+	nlh = NLMSG_PUT_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg));
+	ndtmsg = NLMSG_DATA(nlh);
+
+	NLMSG_SET_MULTIPART(nlh);
+
+	read_lock_bh(&tbl->lock);
+	ndtmsg->ndtm_family = tbl->family;
+	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+
+	if (neightbl_fill_parms(skb, parms) < 0)
+		goto rtattr_failure;
+
+	read_unlock_bh(&tbl->lock);
+	return NLMSG_END(skb, nlh);
+
+rtattr_failure:
+	read_unlock_bh(&tbl->lock);
+	return NLMSG_CANCEL(skb, nlh);
+
+nlmsg_failure:
+	return -1;
+}
+ 
+static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+						      int ifindex)
+{
+	struct neigh_parms *p;
+	
+	for (p = &tbl->parms; p; p = p->next)
+		if ((p->dev && p->dev->ifindex == ifindex) ||
+		    (!p->dev && !ifindex))
+			return p;
+
+	return NULL;
+}
+
+int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct neigh_table *tbl;
+	struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
+	struct rtattr **tb = arg;
+	int err = -EINVAL;
+
+	if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
+		return -EINVAL;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
+			continue;
+
+		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
+			break;
+	}
+
+	if (tbl == NULL) {
+		err = -ENOENT;
+		goto errout;
+	}
+
+	/* 
+	 * We acquire tbl->lock to be nice to the periodic timers and
+	 * make sure they always see a consistent set of values.
+	 */
+	write_lock_bh(&tbl->lock);
+
+	if (tb[NDTA_THRESH1 - 1])
+		tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
+
+	if (tb[NDTA_THRESH2 - 1])
+		tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
+
+	if (tb[NDTA_THRESH3 - 1])
+		tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
+
+	if (tb[NDTA_GC_INTERVAL - 1])
+		tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
+
+	if (tb[NDTA_PARMS - 1]) {
+		struct rtattr *tbp[NDTPA_MAX];
+		struct neigh_parms *p;
+		u32 ifindex = 0;
+
+		if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
+			goto rtattr_failure;
+
+		if (tbp[NDTPA_IFINDEX - 1])
+			ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
+
+		p = lookup_neigh_params(tbl, ifindex);
+		if (p == NULL) {
+			err = -ENOENT;
+			goto rtattr_failure;
+		}
+	
+		if (tbp[NDTPA_QUEUE_LEN - 1])
+			p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
+
+		if (tbp[NDTPA_PROXY_QLEN - 1])
+			p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
+
+		if (tbp[NDTPA_APP_PROBES - 1])
+			p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
+
+		if (tbp[NDTPA_UCAST_PROBES - 1])
+			p->ucast_probes =
+			   RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
+
+		if (tbp[NDTPA_MCAST_PROBES - 1])
+			p->mcast_probes =
+			   RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
+
+		if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
+			p->base_reachable_time =
+			   RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
+
+		if (tbp[NDTPA_GC_STALETIME - 1])
+			p->gc_staletime =
+			   RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
+
+		if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
+			p->delay_probe_time =
+			   RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
+
+		if (tbp[NDTPA_RETRANS_TIME - 1])
+			p->retrans_time =
+			   RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
+
+		if (tbp[NDTPA_ANYCAST_DELAY - 1])
+			p->anycast_delay =
+			   RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
+
+		if (tbp[NDTPA_PROXY_DELAY - 1])
+			p->proxy_delay =
+			   RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
+
+		if (tbp[NDTPA_LOCKTIME - 1])
+			p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
+	}
+
+	err = 0;
+
+rtattr_failure:
+	write_unlock_bh(&tbl->lock);
+errout:
+	read_unlock(&neigh_tbl_lock);
+	return err;
+}
+
+int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, family;
+	int s_idx = cb->args[0];
+	struct neigh_table *tbl;
+
+	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
+		struct neigh_parms *p;
+
+		if (idx < s_idx || (family && tbl->family != family))
+			continue;
+
+		if (neightbl_fill_info(tbl, skb, cb) <= 0)
+			break;
+
+		for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
+			if (idx < s_idx)
+				continue;
+
+			if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
+				goto out;
+		}
+
+	}
+out:
+	read_unlock(&neigh_tbl_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
 
 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
 			   u32 pid, u32 seq, int event)
@@ -2352,6 +2661,8 @@ EXPORT_SYMBOL(neigh_update);
 EXPORT_SYMBOL(neigh_update_hhs);
 EXPORT_SYMBOL(pneigh_enqueue);
 EXPORT_SYMBOL(pneigh_lookup);
+EXPORT_SYMBOL(neightbl_dump_info);
+EXPORT_SYMBOL(neightbl_set);
 
 #ifdef CONFIG_ARPD
 EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00caf4b318b2..56a20f014b8a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -100,6 +100,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWPREFIX)]    = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NLMSG_LENGTH(sizeof(struct ndtmsg)),
 };
 
 static const int rta_max[RTM_NR_FAMILIES] =
@@ -113,6 +114,7 @@ static const int rta_max[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
 	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
 	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
+	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NDTA_MAX,
 };
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -649,14 +651,16 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
-	[RTM_GETLINK  - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
-	[RTM_SETLINK  - RTM_BASE] = { .doit   = do_setlink	      },
-	[RTM_GETADDR  - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
-	[RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
-	[RTM_NEWNEIGH - RTM_BASE] = { .doit   = neigh_add	      },
-	[RTM_DELNEIGH - RTM_BASE] = { .doit   = neigh_delete	      },
-	[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info	      },
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
+	[RTM_GETLINK     - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
+	[RTM_SETLINK     - RTM_BASE] = { .doit   = do_setlink		 },
+	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
+	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
+	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
+	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
+	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
 };
 
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index f0fb6d76f7c5..92b057becb4b 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -63,6 +63,8 @@ static struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_GETPREFIX,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_GETMULTICAST,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_GETANYCAST,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_GETNEIGHTBL,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_SETNEIGHTBL,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 };
 
 static struct nlmsg_perm nlmsg_firewall_perms[] =
-- 
cgit v1.2.3


From 4b6ea82dd18c97598c3caaa8d0b1feec87857e70 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:51:43 -0700
Subject: [NETLINK]: Kill bogus NLMSG_SET_MULTIPART uses.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/core')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0841ac78c67d..d1f8f7847f7c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1589,9 +1589,9 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
 	struct ndtmsg *ndtmsg;
 
 	nlh = NLMSG_PUT_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg));
-	ndtmsg = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags |= NLM_F_MULTI;
 
-	NLMSG_SET_MULTIPART(nlh);
+	ndtmsg = NLMSG_DATA(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
@@ -1674,9 +1674,9 @@ static int neightbl_fill_param_info(struct neigh_table *tbl,
 	struct nlmsghdr *nlh;
 
 	nlh = NLMSG_PUT_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg));
-	ndtmsg = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags |= NLM_F_MULTI;
 
-	NLMSG_SET_MULTIPART(nlh);
+	ndtmsg = NLMSG_DATA(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
-- 
cgit v1.2.3


From e386c6eb431ca2e435d0202ad6997f3d2ccab2ce Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:52:09 -0700
Subject: [NEIGH]: Fix use of uninitialized variable when trimming in
 neightbl_fill_parms

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d1f8f7847f7c..2296a145fb78 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1555,7 +1555,9 @@ out:
 
 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 {
-	struct rtattr *nest = RTA_NEST(skb, NDTA_PARMS);
+	struct rtattr *nest = NULL;
+	
+	nest = RTA_NEST(skb, NDTA_PARMS);
 
 	if (parms->dev)
 		RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
-- 
cgit v1.2.3


From 1797754ea7ee5e0d859b0a32506ff999f8d5fb71 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:53:48 -0700
Subject: [NETLINK]: Introduce NLMSG_NEW macro to better handle netlink flags

Introduces a new macro NLMSG_NEW which extends NLMSG_PUT but takes
a flags argument. NLMSG_PUT stays there for compatibility but now
calls NLMSG_NEW with flags == 0. NLMSG_PUT_ANSWER is renamed to
NLMSG_NEW_ANSWER which now also takes a flags argument.

Also converts the users of NLMSG_PUT_ANSWER to use NLMSG_NEW_ANSWER
and fixes the two direct users of __nlmsg_put to either provide
the flags or use NLMSG_NEW(_ANSWER).

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 17 ++++++++++-------
 net/core/neighbour.c     |  8 ++++----
 net/netlink/af_netlink.c |  8 +++++---
 3 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'net/core')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 8d1cb419a930..e38407a23d04 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -156,7 +156,7 @@ struct netlink_notify
 };
 
 static __inline__ struct nlmsghdr *
-__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len)
+__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 {
 	struct nlmsghdr *nlh;
 	int size = NLMSG_LENGTH(len);
@@ -164,20 +164,23 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len)
 	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
 	nlh->nlmsg_type = type;
 	nlh->nlmsg_len = size;
-	nlh->nlmsg_flags = 0;
+	nlh->nlmsg_flags = flags;
 	nlh->nlmsg_pid = pid;
 	nlh->nlmsg_seq = seq;
 	return nlh;
 }
 
-#define NLMSG_PUT(skb, pid, seq, type, len) \
+#define NLMSG_NEW(skb, pid, seq, type, len, flags) \
 ({	if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \
 		goto nlmsg_failure; \
-	__nlmsg_put(skb, pid, seq, type, len); })
+	__nlmsg_put(skb, pid, seq, type, len, flags); })
+
+#define NLMSG_PUT(skb, pid, seq, type, len) \
+	NLMSG_NEW(skb, pid, seq, type, len, 0)
 
-#define NLMSG_PUT_ANSWER(skb, cb, type, len) \
-	NLMSG_PUT(skb, NETLINK_CB((cb)->skb).pid, \
-		  (cb)->nlh->nlmsg_seq, type, len)
+#define NLMSG_NEW_ANSWER(skb, cb, type, len, flags) \
+	NLMSG_NEW(skb, NETLINK_CB((cb)->skb).pid, \
+		  (cb)->nlh->nlmsg_seq, type, len, flags)
 
 #define NLMSG_END(skb, nlh) \
 ({	(nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2296a145fb78..0fb742e228cc 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1590,8 +1590,8 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
 	struct nlmsghdr *nlh;
 	struct ndtmsg *ndtmsg;
 
-	nlh = NLMSG_PUT_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg));
-	nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
+			       NLM_F_MULTI);
 
 	ndtmsg = NLMSG_DATA(nlh);
 
@@ -1675,8 +1675,8 @@ static int neightbl_fill_param_info(struct neigh_table *tbl,
 	struct ndtmsg *ndtmsg;
 	struct nlmsghdr *nlh;
 
-	nlh = NLMSG_PUT_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg));
-	nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
+			       NLM_F_MULTI);
 
 	ndtmsg = NLMSG_DATA(nlh);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e41ce458c2a9..70bcd4744d93 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1095,8 +1095,7 @@ static int netlink_dump(struct sock *sk)
 		return 0;
 	}
 
-	nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int));
-	nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
 	memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
@@ -1107,6 +1106,9 @@ static int netlink_dump(struct sock *sk)
 
 	netlink_destroy_callback(cb);
 	return 0;
+
+nlmsg_failure:
+	return -ENOBUFS;
 }
 
 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1178,7 +1180,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 	}
 
 	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
-			  NLMSG_ERROR, sizeof(struct nlmsgerr));
+			  NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
 	errmsg = NLMSG_DATA(rep);
 	errmsg->error = err;
 	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
-- 
cgit v1.2.3


From b6544c0b4cf2bd96195f3cdb7cebfb35090fc557 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Sat, 18 Jun 2005 22:54:12 -0700
Subject: [NETLINK]: Correctly set NLM_F_MULTI without checking the pid

This patch rectifies some rtnetlink message builders that derive the
flags from the pid. It is now explicit like the other cases
which get it right. Also fixes half a dozen dumpers which did not
set NLM_F_MULTI at all.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c     | 14 +++++++-------
 net/core/rtnetlink.c     | 13 ++++++++-----
 net/decnet/dn_dev.c      |  9 +++++----
 net/decnet/dn_route.c    | 11 ++++++-----
 net/decnet/dn_rules.c    |  7 ++++---
 net/decnet/dn_table.c    |  8 ++++----
 net/ipv4/devinet.c       |  9 ++++-----
 net/ipv4/fib_hash.c      |  3 ++-
 net/ipv4/fib_lookup.h    |  3 ++-
 net/ipv4/fib_rules.c     |  7 ++++---
 net/ipv4/fib_semantics.c |  6 +++---
 net/ipv4/route.c         | 11 +++++------
 net/ipv6/addrconf.c      | 49 +++++++++++++++++++++++-------------------------
 net/ipv6/route.c         | 11 ++++++-----
 14 files changed, 83 insertions(+), 78 deletions(-)

(limited to 'net/core')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0fb742e228cc..f6bdcad47da6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1859,18 +1859,17 @@ out:
 }
 
 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
-			   u32 pid, u32 seq, int event)
+			   u32 pid, u32 seq, int event, unsigned int flags)
 {
 	unsigned long now = jiffies;
 	unsigned char *b = skb->tail;
 	struct nda_cacheinfo ci;
 	int locked = 0;
 	u32 probes;
-	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event,
-					 sizeof(struct ndmsg));
+	struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
+					 sizeof(struct ndmsg), flags);
 	struct ndmsg *ndm = NLMSG_DATA(nlh);
 
-	nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
 	ndm->ndm_family	 = n->ops->family;
 	ndm->ndm_flags	 = n->flags;
 	ndm->ndm_type	 = n->type;
@@ -1920,7 +1919,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 				continue;
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
 					    cb->nlh->nlmsg_seq,
-					    RTM_NEWNEIGH) <= 0) {
+					    RTM_NEWNEIGH,
+					    NLM_F_MULTI) <= 0) {
 				read_unlock_bh(&tbl->lock);
 				rc = -1;
 				goto out;
@@ -2329,7 +2329,7 @@ void neigh_app_ns(struct neighbour *n)
 	if (!skb)
 		return;
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
 		kfree_skb(skb);
 		return;
 	}
@@ -2348,7 +2348,7 @@ static void neigh_app_notify(struct neighbour *n)
 	if (!skb)
 		return;
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
 		kfree_skb(skb);
 		return;
 	}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 56a20f014b8a..63bd88665182 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -178,14 +178,14 @@ rtattr_failure:
 
 
 static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
-				 int type, u32 pid, u32 seq, u32 change)
+				 int type, u32 pid, u32 seq, u32 change, 
+				 unsigned int flags)
 {
 	struct ifinfomsg *r;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
-	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
 	r = NLMSG_DATA(nlh);
 	r->ifi_family = AF_UNSPEC;
 	r->ifi_type = dev->type;
@@ -275,7 +275,10 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
+		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
+					  NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq, 0,
+					  NLM_F_MULTI) <= 0)
 			break;
 	}
 	read_unlock(&dev_base_lock);
@@ -449,7 +452,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	if (!skb)
 		return;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
+	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
 		kfree_skb(skb);
 		return;
 	}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ee7bf46eb78a..00233ecbc9cb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -716,13 +716,13 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a
 }
 
 static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
-				u32 pid, u32 seq, int event)
+				u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
 	ifm = NLMSG_DATA(nlh);
 
 	ifm->ifa_family = AF_DECnet;
@@ -755,7 +755,7 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
 		netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
 		return;
 	}
-	if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+	if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
 		return;
@@ -790,7 +790,8 @@ static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 			if (dn_dev_fill_ifaddr(skb, ifa,
 					       NETLINK_CB(cb->skb).pid,
 					       cb->nlh->nlmsg_seq,
-					       RTM_NEWADDR) <= 0)
+					       RTM_NEWADDR,
+					       NLM_F_MULTI) <= 0)
 				goto done;
 		}
 	}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1e7b5c3ea215..2399fa8a3f86 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1465,7 +1465,8 @@ int dn_route_input(struct sk_buff *skb)
 	return dn_route_input_slow(skb);
 }
 
-static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait)
+static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+			   int event, int nowait, unsigned int flags)
 {
 	struct dn_route *rt = (struct dn_route *)skb->dst;
 	struct rtmsg *r;
@@ -1473,9 +1474,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int
 	unsigned char *b = skb->tail;
 	struct rta_cacheinfo ci;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
 	r = NLMSG_DATA(nlh);
-	nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
 	r->rtm_family = AF_DECnet;
 	r->rtm_dst_len = 16;
 	r->rtm_src_len = 0;
@@ -1596,7 +1596,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 
 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
 
-	err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0);
+	err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
 
 	if (err == 0)
 		goto out_free;
@@ -1644,7 +1644,8 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			skb->dst = dst_clone(&rt->u.dst);
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
-					cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) {
+					cb->nlh->nlmsg_seq, RTM_NEWROUTE, 
+					1, NLM_F_MULTI) <= 0) {
 				dst_release(xchg(&skb->dst, NULL));
 				rcu_read_unlock_bh();
 				goto done;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 597587d170d8..1060de70bc0c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -342,14 +342,15 @@ static struct notifier_block dn_fib_rules_notifier = {
 	.notifier_call =	dn_fib_rules_event,
 };
 
-static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, struct netlink_callback *cb)
+static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
+			    struct netlink_callback *cb, unsigned int flags)
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb->tail;
 
 
-	nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm));
+	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
 	rtm = NLMSG_DATA(nlh);
 	rtm->rtm_family = AF_DECnet;
 	rtm->rtm_dst_len = r->r_dst_len;
@@ -394,7 +395,7 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 	for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (dn_fib_fill_rule(skb, r, cb) < 0)
+		if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
 			break;
 	}
 	read_unlock(&dn_fib_rules_lock);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index dad5603912be..28ba5777a25a 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -270,13 +270,13 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
 
 static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
                         u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
-                        struct dn_fib_info *fi)
+                        struct dn_fib_info *fi, unsigned int flags)
 {
         struct rtmsg *rtm;
         struct nlmsghdr *nlh;
         unsigned char *b = skb->tail;
 
-        nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
+        nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
         rtm = NLMSG_DATA(nlh);
         rtm->rtm_family = AF_DECnet;
         rtm->rtm_dst_len = dst_len;
@@ -345,7 +345,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
 
         if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 
                                 f->fn_type, f->fn_scope, &f->fn_key, z, 
-                                DN_FIB_INFO(f)) < 0) {
+                                DN_FIB_INFO(f), 0) < 0) {
                 kfree_skb(skb);
                 return;
         }
@@ -377,7 +377,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
 				tb->n, 
 				(f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
 				f->fn_scope, &f->fn_key, dz->dz_order, 
-				f->fn_info) < 0) {
+				f->fn_info, NLM_F_MULTI) < 0) {
 			cb->args[3] = i;
 			return -1;
 		}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 478a30179a52..fd47a1e890fa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1030,14 +1030,13 @@ static struct notifier_block ip_netdev_notifier = {
 };
 
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
-			    u32 pid, u32 seq, int event)
+			    u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
-	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
 	ifm = NLMSG_DATA(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
@@ -1090,7 +1089,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
 					     cb->nlh->nlmsg_seq,
-					     RTM_NEWADDR) <= 0) {
+					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
 				rcu_read_unlock();
 				goto done;
 			}
@@ -1113,7 +1112,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
 
 	if (!skb)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
-	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
 	} else {
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 6506dcc01b46..b10d6bb5ef3d 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -703,7 +703,8 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 					  &f->fn_key,
 					  fz->fz_order,
 					  fa->fa_tos,
-					  fa->fa_info) < 0) {
+					  fa->fa_info,
+					  NLM_F_MULTI) < 0) {
 				cb->args[3] = i;
 				return -1;
 			}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ac4485f75e97..b729d97cfa93 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
 			struct kern_rta *rta, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			 u8 tb_id, u8 type, u8 scope, void *dst,
-			 int dst_len, u8 tos, struct fib_info *fi);
+			 int dst_len, u8 tos, struct fib_info *fi,
+			 unsigned int);
 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
 		      int z, int tb_id,
 		      struct nlmsghdr *n, struct netlink_skb_parms *req);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 39d0aadb9a2a..0b298bbc1518 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -367,13 +367,14 @@ static struct notifier_block fib_rules_notifier = {
 
 static __inline__ int inet_fill_rule(struct sk_buff *skb,
 				     struct fib_rule *r,
-				     struct netlink_callback *cb)
+				     struct netlink_callback *cb,
+				     unsigned int flags)
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm));
+	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
 	rtm = NLMSG_DATA(nlh);
 	rtm->rtm_family = AF_INET;
 	rtm->rtm_dst_len = r->r_dst_len;
@@ -422,7 +423,7 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 	for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (inet_fill_rule(skb, r, cb) < 0)
+		if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
 			break;
 	}
 	read_unlock(&fib_rules_lock);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 029362d66135..a9a44b4bef46 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
 			  fa->fa_type, fa->fa_scope, &key, z,
 			  fa->fa_tos,
-			  fa->fa_info) < 0) {
+			  fa->fa_info, 0) < 0) {
 		kfree_skb(skb);
 		return;
 	}
@@ -932,13 +932,13 @@ u32 __fib_res_prefsrc(struct fib_result *res)
 int
 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
-	      struct fib_info *fi)
+	      struct fib_info *fi, unsigned int flags)
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
 	rtm = NLMSG_DATA(nlh);
 	rtm->rtm_family = AF_INET;
 	rtm->rtm_dst_len = dst_len;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a682d28e247b..f4d53c919869 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2581,7 +2581,7 @@ int ip_route_output_key(struct rtable **rp, struct flowi *flp)
 }
 
 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			int nowait)
+			int nowait, unsigned int flags)
 {
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtmsg *r;
@@ -2591,9 +2591,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 #ifdef CONFIG_IP_MROUTE
 	struct rtattr *eptr;
 #endif
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
 	r = NLMSG_DATA(nlh);
-	nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
@@ -2744,7 +2743,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
 
 	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
-				RTM_NEWROUTE, 0);
+				RTM_NEWROUTE, 0, 0);
 	if (!err)
 		goto out_free;
 	if (err < 0) {
@@ -2781,8 +2780,8 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 				continue;
 			skb->dst = dst_clone(&rt->u.dst);
 			if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
-					 cb->nlh->nlmsg_seq,
-					 RTM_NEWROUTE, 1) <= 0) {
+					 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 
+					 1, NLM_F_MULTI) <= 0) {
 				dst_release(xchg(&skb->dst, NULL));
 				rcu_read_unlock_bh();
 				goto done;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2720899d516c..cdd19c54c03f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2622,15 +2622,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 }
 
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
-			     u32 pid, u32 seq, int event)
+			     u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	struct ifa_cacheinfo ci;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
-	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
 	ifm = NLMSG_DATA(nlh);
 	ifm->ifa_family = AF_INET6;
 	ifm->ifa_prefixlen = ifa->prefix_len;
@@ -2672,15 +2671,14 @@ rtattr_failure:
 }
 
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
-				u32 pid, u32 seq, int event)
+				u32 pid, u32 seq, int event, unsigned flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	struct ifa_cacheinfo ci;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
-	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
 	ifm = NLMSG_DATA(nlh);
 	ifm->ifa_family = AF_INET6;	
 	ifm->ifa_prefixlen = 128;
@@ -2709,15 +2707,14 @@ rtattr_failure:
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
-				u32 pid, u32 seq, int event)
+				u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	struct ifa_cacheinfo ci;
 	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
-	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
 	ifm = NLMSG_DATA(nlh);
 	ifm->ifa_family = AF_INET6;	
 	ifm->ifa_prefixlen = 128;
@@ -2786,7 +2783,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 					continue;
 				if ((err = inet6_fill_ifaddr(skb, ifa, 
 				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0)
+				    cb->nlh->nlmsg_seq, RTM_NEWADDR,
+				    NLM_F_MULTI)) <= 0)
 					goto done;
 			}
 			/* temp addr */
@@ -2797,7 +2795,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 					continue;
 				if ((err = inet6_fill_ifaddr(skb, ifa, 
 				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 
+				    cb->nlh->nlmsg_seq, RTM_NEWADDR,
+				    NLM_F_MULTI)) <= 0) 
 					goto done;
 			}
 #endif
@@ -2810,7 +2809,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 					continue;
 				if ((err = inet6_fill_ifmcaddr(skb, ifmca, 
 				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0)
+				    cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
+				    NLM_F_MULTI)) <= 0)
 					goto done;
 			}
 			break;
@@ -2822,7 +2822,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 					continue;
 				if ((err = inet6_fill_ifacaddr(skb, ifaca, 
 				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) 
+				    cb->nlh->nlmsg_seq, RTM_GETANYCAST,
+				    NLM_F_MULTI)) <= 0) 
 					goto done;
 			}
 			break;
@@ -2872,7 +2873,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
 		return;
 	}
-	if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+	if (inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
 		return;
@@ -2907,7 +2908,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
 }
 
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 
-			     u32 pid, u32 seq, int event)
+			     u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct net_device	*dev = idev->dev;
 	__s32			*array = NULL;
@@ -2918,8 +2919,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	__u32			mtu = dev->mtu;
 	struct ifla_cacheinfo	ci;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
-	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
 	r = NLMSG_DATA(nlh);
 	r->ifi_family = AF_INET6;
 	r->ifi_type = dev->type;
@@ -2986,7 +2986,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 		if ((idev = in6_dev_get(dev)) == NULL)
 			continue;
 		err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 
-				cb->nlh->nlmsg_seq, RTM_NEWLINK);
+				cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
 		in6_dev_put(idev);
 		if (err <= 0)
 			break;
@@ -3008,7 +3008,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
 		return;
 	}
-	if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) {
+	if (inet6_fill_ifinfo(skb, idev, 0, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
 		return;
@@ -3018,18 +3018,15 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 }
 
 static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
-			struct prefix_info *pinfo, u32 pid, u32 seq, int event)
+			struct prefix_info *pinfo, u32 pid, u32 seq, 
+			int event, unsigned int flags)
 {
 	struct prefixmsg	*pmsg;
 	struct nlmsghdr 	*nlh;
 	unsigned char		*b = skb->tail;
 	struct prefix_cacheinfo	ci;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg));
-	
-	if (pid) 
-		nlh->nlmsg_flags |= NLM_F_MULTI;
-	
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
 	pmsg = NLMSG_DATA(nlh);
 	pmsg->prefix_family = AF_INET6;
 	pmsg->prefix_ifindex = idev->dev->ifindex;
@@ -3068,7 +3065,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
 		return;
 	}
-	if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) {
+	if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
 		return;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3bf8a0254f81..1f5b226c3573 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1570,7 +1570,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *src,
 			 int iif,
 			 int type, u32 pid, u32 seq,
-			 struct nlmsghdr *in_nlh, int prefix)
+			 struct nlmsghdr *in_nlh, int prefix,
+			 unsigned int flags)
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr  *nlh;
@@ -1588,7 +1589,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 		pid = in_nlh->nlmsg_pid;
 	}
 
-	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
+	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
 	rtm = NLMSG_DATA(nlh);
 	rtm->rtm_family = AF_INET6;
 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
@@ -1674,7 +1675,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
-		     NULL, prefix);
+		     NULL, prefix, NLM_F_MULTI);
 }
 
 static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1822,7 +1823,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 			    &fl.fl6_dst, &fl.fl6_src,
 			    iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
-			    nlh->nlmsg_seq, nlh, 0);
+			    nlh->nlmsg_seq, nlh, 0, 0);
 	if (err < 0) {
 		err = -EMSGSIZE;
 		goto out_free;
@@ -1848,7 +1849,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
 		return;
 	}
-	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
+	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
 		return;
-- 
cgit v1.2.3


From 9ed19f339e12e731986de84134ac293cd15910a7 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Sat, 18 Jun 2005 22:55:51 -0700
Subject: [NETLINK]: Set correct pid for ioctl originating netlink events

This patch ensures that netlink events created as a result of programns
using ioctls (such as ifconfig, route etc) contains the correct PID of
those events.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c     | 2 +-
 net/ipv4/devinet.c       | 2 +-
 net/ipv4/fib_semantics.c | 4 ++--
 net/ipv6/addrconf.c      | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net/core')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 63bd88665182..e013d836a7ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -452,7 +452,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	if (!skb)
 		return;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
+	if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) {
 		kfree_skb(skb);
 		return;
 	}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index fd47a1e890fa..650dcb12d9a1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1112,7 +1112,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
 
 	if (!skb)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
-	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
 	} else {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a9a44b4bef46..c886b28ba9f5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -276,7 +276,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct sk_buff *skb;
-	u32 pid = req ? req->pid : 0;
+	u32 pid = req ? req->pid : n->nlmsg_pid;
 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
 
 	skb = alloc_skb(size, GFP_KERNEL);
@@ -1035,7 +1035,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
 	}
 
 	nl->nlmsg_flags = NLM_F_REQUEST;
-	nl->nlmsg_pid = 0;
+	nl->nlmsg_pid = current->pid;
 	nl->nlmsg_seq = 0;
 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
 	if (cmd == SIOCDELRT) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4d4cb46f0439..47a30c3188ea 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2873,7 +2873,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
 		return;
 	}
-	if (inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
 		return;
@@ -3008,7 +3008,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
 		return;
 	}
-	if (inet6_fill_ifinfo(skb, idev, 0, 0, event, 0) < 0) {
+	if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
 		return;
@@ -3065,7 +3065,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
 		return;
 	}
-	if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0) < 0) {
+	if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
 		return;
-- 
cgit v1.2.3