summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 12:52:30 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 12:52:30 -0800
commit4b75679f60d0ce780609cbff249769b669f4fb69 (patch)
tree2c8890020b5e7d340036acb01f73a6e53feb038d
parent30c14e40ed85469f166b5effdab6705c73c5cd5e (diff)
parent025be81e83043f20538dcced1e12c5f8d152fbdb (diff)
downloadlwn-4b75679f60d0ce780609cbff249769b669f4fb69.tar.gz
lwn-4b75679f60d0ce780609cbff249769b669f4fb69.zip
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: [NET]: Allow skb headroom to be overridden [TCP]: Kill unused extern decl for tcp_v4_hash_connecting() [NET]: add SO_RCVBUF comment [NET]: Deinline some larger functions from netdevice.h [DCCP]: Use NULL for pointers, comfort sparse. [DECNET]: Fix refcount
-rw-r--r--include/asm-powerpc/system.h5
-rw-r--r--include/linux/netdevice.h55
-rw-r--r--include/linux/skbuff.h29
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/core/dev.c64
-rw-r--r--net/core/sock.c16
-rw-r--r--net/dccp/feat.c6
-rw-r--r--net/decnet/dn_dev.c2
8 files changed, 117 insertions, 63 deletions
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 65f5a7b2646b..d075725bf444 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
* powers of 2 writes until it reaches sufficient alignment).
*
* Based on this we disable the IP header alignment in network drivers.
+ * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
+ * cacheline alignment of buffers.
*/
-#define NET_IP_ALIGN 0
+#define NET_IP_ALIGN 0
+#define NET_SKB_PAD L1_CACHE_BYTES
#endif
#define arch_align_stack(x) (x)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 950dc55e5192..40ccf8cc4239 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
#define HAVE_NETIF_QUEUE
-static inline void __netif_schedule(struct net_device *dev)
-{
- if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
- unsigned long flags;
- struct softnet_data *sd;
-
- local_irq_save(flags);
- sd = &__get_cpu_var(softnet_data);
- dev->next_sched = sd->output_queue;
- sd->output_queue = dev;
- raise_softirq_irqoff(NET_TX_SOFTIRQ);
- local_irq_restore(flags);
- }
-}
+extern void __netif_schedule(struct net_device *dev);
static inline void netif_schedule(struct net_device *dev)
{
@@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb)
/* Use this variant in places where it could be invoked
* either from interrupt or non-interrupt context.
*/
-static inline void dev_kfree_skb_any(struct sk_buff *skb)
-{
- if (in_irq() || irqs_disabled())
- dev_kfree_skb_irq(skb);
- else
- dev_kfree_skb(skb);
-}
+extern void dev_kfree_skb_any(struct sk_buff *skb);
#define HAVE_NETIF_RX 1
extern int netif_rx(struct sk_buff *skb);
@@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev)
return test_bit(__LINK_STATE_PRESENT, &dev->state);
}
-static inline void netif_device_detach(struct net_device *dev)
-{
- if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
- netif_running(dev)) {
- netif_stop_queue(dev);
- }
-}
+extern void netif_device_detach(struct net_device *dev);
-static inline void netif_device_attach(struct net_device *dev)
-{
- if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
- netif_running(dev)) {
- netif_wake_queue(dev);
- __netdev_watchdog_up(dev);
- }
-}
+extern void netif_device_attach(struct net_device *dev);
/*
* Network interface message level settings
@@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev)
* already been called and returned 1.
*/
-static inline void __netif_rx_schedule(struct net_device *dev)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- dev_hold(dev);
- list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- local_irq_restore(flags);
-}
+extern void __netif_rx_schedule(struct net_device *dev);
/* Try to reschedule poll. Called by irq handler. */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 613b9513f8b9..c4619a428d9b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
#define NET_IP_ALIGN 2
#endif
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+
extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
@@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
gfp_t gfp_mask)
{
- struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+ struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
if (likely(skb))
- skb_reserve(skb, 16);
+ skb_reserve(skb, NET_SKB_PAD);
return skb;
}
#else
@@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length)
*/
static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
{
- int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+ int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
+ skb_headroom(skb);
if (delta < 0)
delta = 0;
if (delta || skb_cloned(skb))
- return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+ return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
+ ~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
return 0;
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9418f4d1afbb..3c989db8a7aa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -405,9 +405,6 @@ extern int tcp_disconnect(struct sock *sk, int flags);
extern void tcp_unhash(struct sock *sk);
-extern int tcp_v4_hash_connecting(struct sock *sk);
-
-
/* From syncookies.c */
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt);
diff --git a/net/core/dev.c b/net/core/dev.c
index a3ab11f34153..434220d093aa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1080,6 +1080,70 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_unlock();
}
+
+void __netif_schedule(struct net_device *dev)
+{
+ if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+ unsigned long flags;
+ struct softnet_data *sd;
+
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ dev->next_sched = sd->output_queue;
+ sd->output_queue = dev;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void __netif_rx_schedule(struct net_device *dev)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ dev_hold(dev);
+ list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+ if (dev->quota < 0)
+ dev->quota += dev->weight;
+ else
+ dev->quota = dev->weight;
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__netif_rx_schedule);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+ if (in_irq() || irqs_disabled())
+ dev_kfree_skb_irq(skb);
+ else
+ dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/* Hot-plugging. */
+void netif_device_detach(struct net_device *dev)
+{
+ if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+ netif_running(dev)) {
+ netif_stop_queue(dev);
+ }
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+void netif_device_attach(struct net_device *dev)
+{
+ if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+ netif_running(dev)) {
+ netif_wake_queue(dev);
+ __netdev_watchdog_up(dev);
+ }
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+
/*
* Invalidate hardware checksum when packet is to be mangled, and
* complete checksum manually on outgoing path.
diff --git a/net/core/sock.c b/net/core/sock.c
index a96ea7dd0fc1..ed2afdb9ea2d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -385,7 +385,21 @@ set_sndbuf:
val = sysctl_rmem_max;
set_rcvbuf:
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /* FIXME: is this lower bound the right one? */
+ /*
+ * We double it on the way in to account for
+ * "struct sk_buff" etc. overhead. Applications
+ * assume that the SO_RCVBUF setting they make will
+ * allow that much actual data to be received on that
+ * socket.
+ *
+ * Applications are unaware that "struct sk_buff" and
+ * other overheads allocate from the receive buffer
+ * during socket buffer allocation.
+ *
+ * And after considering the possible alternatives,
+ * returning the value we actually used in getsockopt
+ * is the most desirable behavior.
+ */
if ((val * 2) < SOCK_MIN_RCVBUF)
sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
else
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index e3dd30d36c8a..b39e2a597889 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -204,7 +204,7 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
if (rc) {
kfree(opt->dccpop_sc->dccpoc_val);
kfree(opt->dccpop_sc);
- opt->dccpop_sc = 0;
+ opt->dccpop_sc = NULL;
return rc;
}
@@ -322,7 +322,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R :
DCCPO_CONFIRM_L;
opt->dccpop_feat = feature;
- opt->dccpop_val = 0;
+ opt->dccpop_val = NULL;
opt->dccpop_len = 0;
/* change feature */
@@ -523,7 +523,7 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
* once...
*/
/* the master socket no longer needs to worry about confirms */
- opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */
+ opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
/* reset state for a new socket */
opt->dccpop_conf = 0;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index d2ae9893ca17..a26ff9f44576 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -620,7 +620,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
}
write_unlock(&dndev_lock);
if (old)
- dev_put(dev);
+ dev_put(old);
return rv;
}