From 21d9e30ed020d24336cc3bee2a4e04da232ed554 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 13 Sep 2005 01:32:25 +0200 Subject: [Bluetooth] Add support for extended inquiry responses This patch adds the handling of the extended inquiry responses and inserts them into the inquiry cache. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d6da0939216d..b61b4e8e36fd 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -558,6 +558,35 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +/* Extended Inquiry Result */ +static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct inquiry_data data; + struct extended_inquiry_info *info = (struct extended_inquiry_info *) (skb->data + 1); + int num_rsp = *((__u8 *) skb->data); + + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + + if (!num_rsp) + return; + + hci_dev_lock(hdev); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + + hci_dev_unlock(hdev); +} + /* Connect Request */ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { @@ -940,6 +969,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_inquiry_result_with_rssi_evt(hdev, skb); break; + case HCI_EV_EXTENDED_INQUIRY_RESULT: + hci_extended_inquiry_result_evt(hdev, skb); + break; + case HCI_EV_CONN_REQUEST: hci_conn_request_evt(hdev, skb); break; -- cgit v1.2.3 From 354d28d5f8546e115ebaae9311897f0bc4b6a8d4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 13 Sep 2005 01:32:31 +0200 Subject: [Bluetooth] Prevent RFCOMM connections through the RAW socket This patch adds additional checks to prevent RFCOMM connections be established through the RAW socket interface. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/sock.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 90e19eb6d3cc..f49e7e938bfb 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -363,6 +363,11 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr goto done; } + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } + write_lock_bh(&rfcomm_sk_list.lock); if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) { @@ -393,13 +398,17 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_rc)) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; + lock_sock(sk); - if (sk->sk_type != SOCK_STREAM) - return -EINVAL; + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } - lock_sock(sk); + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } sk->sk_state = BT_CONNECT; bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); @@ -410,6 +419,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); +done: release_sock(sk); return err; } @@ -428,6 +438,11 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) goto done; } + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } + if (!rfcomm_pi(sk)->channel) { bdaddr_t *src = &bt_sk(sk)->src; u8 channel; @@ -472,6 +487,11 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f goto done; } + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + goto done; + } + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); -- cgit v1.2.3 From 939bb7ef901b2537aa5b4cd819f9c1b25c6a5710 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Tue, 13 Sep 2005 01:25:39 -0700 Subject: [PATCH] Code cleanups in calbacks in svcsock Change a printk(KERN_WARNING to dprintk, and it is really only interesting when trying to debug a problem, and can occur normally without error. Remove various gratuitous gotos in surrounding code, and remove some type-cast assignments from inside 'if' conditionals, as that is just obscuring what it going on. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sunrpc/svcsock.c | 83 ++++++++++++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 51885b5f744e..30ec3efc48a6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -512,15 +512,14 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) static void svc_udp_data_ready(struct sock *sk, int count) { - struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - if (!svsk) - goto out; - dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", - svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); - set_bit(SK_DATA, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: + if (svsk) { + dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", + svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); + set_bit(SK_DATA, &svsk->sk_flags); + svc_sock_enqueue(svsk); + } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); } @@ -540,7 +539,7 @@ svc_write_space(struct sock *sk) } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { - printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n", + dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); wake_up_interruptible(sk->sk_sleep); } @@ -692,31 +691,29 @@ svc_udp_init(struct svc_sock *svsk) static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) { - struct svc_sock *svsk; + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP (listen) state change %d\n", - sk, sk->sk_state); + sk, sk->sk_state); - if (sk->sk_state != TCP_LISTEN) { - /* - * This callback may called twice when a new connection - * is established as a child socket inherits everything - * from a parent LISTEN socket. - * 1) data_ready method of the parent socket will be called - * when one of child sockets become ESTABLISHED. - * 2) data_ready method of the child socket may be called - * when it receives data before the socket is accepted. - * In case of 2, we should ignore it silently. - */ - goto out; - } - if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { - printk("svc: socket %p: no user data\n", sk); - goto out; + /* + * This callback may called twice when a new connection + * is established as a child socket inherits everything + * from a parent LISTEN socket. + * 1) data_ready method of the parent socket will be called + * when one of child sockets become ESTABLISHED. + * 2) data_ready method of the child socket may be called + * when it receives data before the socket is accepted. + * In case of 2, we should ignore it silently. + */ + if (sk->sk_state == TCP_LISTEN) { + if (svsk) { + set_bit(SK_CONN, &svsk->sk_flags); + svc_sock_enqueue(svsk); + } else + printk("svc: socket %p: no user data\n", sk); } - set_bit(SK_CONN, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); } @@ -727,18 +724,17 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) static void svc_tcp_state_change(struct sock *sk) { - struct svc_sock *svsk; + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", - sk, sk->sk_state, sk->sk_user_data); + sk, sk->sk_state, sk->sk_user_data); - if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { + if (!svsk) printk("svc: socket %p: no user data\n", sk); - goto out; + else { + set_bit(SK_CLOSE, &svsk->sk_flags); + svc_sock_enqueue(svsk); } - set_bit(SK_CLOSE, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); } @@ -746,15 +742,14 @@ svc_tcp_state_change(struct sock *sk) static void svc_tcp_data_ready(struct sock *sk, int count) { - struct svc_sock * svsk; + struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; dprintk("svc: socket %p TCP data ready (svsk %p)\n", - sk, sk->sk_user_data); - if (!(svsk = (struct svc_sock *)(sk->sk_user_data))) - goto out; - set_bit(SK_DATA, &svsk->sk_flags); - svc_sock_enqueue(svsk); - out: + sk, sk->sk_user_data); + if (svsk) { + set_bit(SK_DATA, &svsk->sk_flags); + svc_sock_enqueue(svsk); + } if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); } -- cgit v1.2.3 From 5cb30640ce01d76d256533bb2824c9cc14eb4070 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 13 Sep 2005 13:48:00 -0700 Subject: [NETFILTER]: Use correct type for "ports" module parameter With large port numbers the helper_names buffer can overflow. Noticed by Samir Bellabes Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_ftp.c | 6 +++--- net/ipv4/netfilter/ip_conntrack_irc.c | 6 +++--- net/ipv4/netfilter/ip_conntrack_tftp.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 1b79ec36085f..d77d6b3f5f80 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -29,9 +29,9 @@ static char *ftp_buffer; static DEFINE_SPINLOCK(ip_ftp_lock); #define MAX_PORTS 8 -static int ports[MAX_PORTS]; +static short ports[MAX_PORTS]; static int ports_c; -module_param_array(ports, int, &ports_c, 0400); +module_param_array(ports, short, &ports_c, 0400); static int loose; module_param(loose, int, 0600); @@ -450,7 +450,7 @@ out_update_nl: } static struct ip_conntrack_helper ftp[MAX_PORTS]; -static char ftp_names[MAX_PORTS][10]; +static char ftp_names[MAX_PORTS][sizeof("ftp-65535")]; /* Not __exit: called from init() */ static void fini(void) diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index d7a8a98c05e1..15457415a4f3 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -34,7 +34,7 @@ #include #define MAX_PORTS 8 -static int ports[MAX_PORTS]; +static short ports[MAX_PORTS]; static int ports_c; static int max_dcc_channels = 8; static unsigned int dcc_timeout = 300; @@ -52,7 +52,7 @@ EXPORT_SYMBOL_GPL(ip_nat_irc_hook); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); -module_param_array(ports, int, &ports_c, 0400); +module_param_array(ports, short, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); module_param(max_dcc_channels, int, 0400); MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); @@ -240,7 +240,7 @@ static int help(struct sk_buff **pskb, } static struct ip_conntrack_helper irc_helpers[MAX_PORTS]; -static char irc_names[MAX_PORTS][10]; +static char irc_names[MAX_PORTS][sizeof("irc-65535")]; static void fini(void); diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index d2b590533452..a78736b8525d 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -26,9 +26,9 @@ MODULE_DESCRIPTION("tftp connection tracking helper"); MODULE_LICENSE("GPL"); #define MAX_PORTS 8 -static int ports[MAX_PORTS]; +static short ports[MAX_PORTS]; static int ports_c; -module_param_array(ports, int, &ports_c, 0400); +module_param_array(ports, short, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of tftp servers"); #if 0 @@ -100,7 +100,7 @@ static int tftp_help(struct sk_buff **pskb, } static struct ip_conntrack_helper tftp[MAX_PORTS]; -static char tftp_names[MAX_PORTS][10]; +static char tftp_names[MAX_PORTS][sizeof("tftp-65535")]; static void fini(void) { -- cgit v1.2.3 From e7fa1bd93f977c03050bd6b3d13846aa43310fef Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 13 Sep 2005 13:48:34 -0700 Subject: [NETFILTER]: Simplify netbios helper Don't parse the packet, the data is already available in the conntrack structure. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index bb7246683b74..71ef19d126d0 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -31,6 +30,8 @@ #include #include +#define NMBD_PORT 137 + MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); @@ -44,7 +45,6 @@ static int help(struct sk_buff **pskb, { struct ip_conntrack_expect *exp; struct iphdr *iph = (*pskb)->nh.iph; - struct udphdr _uh, *uh; struct rtable *rt = (struct rtable *)(*pskb)->dst; struct in_device *in_dev; u_int32_t mask = 0; @@ -72,20 +72,15 @@ static int help(struct sk_buff **pskb, if (mask == 0) goto out; - uh = skb_header_pointer(*pskb, iph->ihl * 4, sizeof(_uh), &_uh); - BUG_ON(uh == NULL); - exp = ip_conntrack_expect_alloc(ct); if (exp == NULL) goto out; - memset(&exp->tuple, 0, sizeof(exp->tuple)); - exp->tuple.src.ip = iph->daddr & mask; - exp->tuple.dst.ip = iph->saddr; - exp->tuple.dst.u.udp.port = uh->source; - exp->tuple.dst.protonum = IPPROTO_UDP; - memset(&exp->mask, 0, sizeof(exp->mask)); + exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + exp->tuple.src.u.udp.port = ntohs(NMBD_PORT); + exp->mask.src.ip = mask; + exp->mask.src.u.udp.port = 0xFFFF; exp->mask.dst.ip = 0xFFFFFFFF; exp->mask.dst.u.udp.port = 0xFFFF; exp->mask.dst.protonum = 0xFF; @@ -107,7 +102,7 @@ static struct ip_conntrack_helper helper = { .src = { .u = { .udp = { - .port = __constant_htons(137), + .port = __constant_htons(NMBD_PORT), } } }, -- cgit v1.2.3 From cd0bf2d796ebb51c346b1ed4208cdbfd86e98a61 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 13 Sep 2005 13:48:58 -0700 Subject: [NETFILTER]: Fix rcu race in ipt_REDIRECT Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_REDIRECT.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index d2e13447678e..715cb613405c 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -88,14 +88,18 @@ redirect_target(struct sk_buff **pskb, newdst = htonl(0x7F000001); else { struct in_device *indev; + struct in_ifaddr *ifa; - /* Device might not have an associated in_device. */ - indev = (struct in_device *)(*pskb)->dev->ip_ptr; - if (indev == NULL || indev->ifa_list == NULL) - return NF_DROP; + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get((*pskb)->dev); + if (indev && (ifa = indev->ifa_list)) + newdst = ifa->ifa_local; + rcu_read_unlock(); - /* Grab first address on interface. */ - newdst = indev->ifa_list->ifa_local; + if (!newdst) + return NF_DROP; } /* Transfer from original range. */ -- cgit v1.2.3 From adcb5ad1e5ba3996d53a047c5486efa6e734b413 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 13 Sep 2005 13:49:15 -0700 Subject: [NETFILTER]: Fix DHCP + MASQUERADE problem In 2.6.13-rcX the MASQUERADE target was changed not to exclude local packets for better source address consistency. This breaks DHCP clients using UDP sockets when the DHCP requests are caught by a MASQUERADE rule because the MASQUERADE target drops packets when no address is configured on the outgoing interface. This patch makes it ignore packets with a source address of 0. Thanks to Rusty for this suggestion. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_MASQUERADE.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 2f3e181c8e97..275a174c6fe6 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -90,6 +90,12 @@ masquerade_target(struct sk_buff **pskb, IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + /* Source address is 0.0.0.0 - locally generated packet that is + * probably not supposed to be masqueraded. + */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0) + return NF_ACCEPT; + mr = targinfo; rt = (struct rtable *)(*pskb)->dst; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); -- cgit v1.2.3 From 811265b8e8bf84e3b91507bf6a3a8eea20e7a4b7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Sep 2005 19:03:15 -0300 Subject: [DCCP]: Check if already in the CLOSING state in dccp_rcv_closereq It is possible to receive more than one CLOSEREQ packet if the CLOSE packet sent in response is somehow lost, change the state to DCCP_CLOSING only on the first CLOSEREQ packet received. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index c60bc3433f5e..f01d588299c8 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -50,7 +50,8 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) return; } - dccp_set_state(sk, DCCP_CLOSING); + if (sk->sk_state != DCCP_CLOSING) + dccp_set_state(sk, DCCP_CLOSING); dccp_send_close(sk, 0); } -- cgit v1.2.3 From 2b80230a7f8cd346c1e2ebafdd02be432bf10459 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Sep 2005 19:05:08 -0300 Subject: [DCCP]: Handle SYNC packets in dccp_rcv_state_process Eliciting a SYNCACK in response, we were handling SYNC packets only in the DCCP_OPEN state, in dccp_rcv_established. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/input.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index f01d588299c8..c74034cf7ede 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -562,6 +562,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } + if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNCACK); + goto discard; + } + switch (sk->sk_state) { case DCCP_CLOSED: return 1; -- cgit v1.2.3 From 3c05d92ed49f644d1f5a960fa48637d63b946016 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Sep 2005 20:50:35 -0700 Subject: [TCP]: Compute in_sacked properly when we split up a TSO frame. The problem is that the SACK fragmenting code may incorrectly call tcp_fragment() with a length larger than the skb->len. This happens when the skb on the transmit queue completely falls to the LHS of the SACK. And add a BUG() check to tcp_fragment() so we can spot this kind of error more quickly in the future. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 +++++++++------- net/ipv4/tcp_output.c | 2 ++ 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 29222b964951..a7537c7bbd06 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -979,14 +979,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + pcount = tcp_skb_pcount(skb); - if (pcount > 1 && - (after(start_seq, TCP_SKB_CB(skb)->seq) || - before(end_seq, TCP_SKB_CB(skb)->end_seq))) { + if (pcount > 1 && !in_sack && + after(TCP_SKB_CB(skb)->end_seq, start_seq)) { unsigned int pkt_len; - if (after(start_seq, TCP_SKB_CB(skb)->seq)) + in_sack = !after(start_seq, + TCP_SKB_CB(skb)->seq); + + if (!in_sack) pkt_len = (start_seq - TCP_SKB_CB(skb)->seq); else @@ -999,9 +1004,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ fack_count += pcount; - in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && - !before(end_seq, TCP_SKB_CB(skb)->end_seq); - sacked = TCP_SKB_CB(skb)->sacked; /* Account D-SACK for retransmitted packet. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c10e4435e3b1..b018e31b6530 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -435,6 +435,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss int nsize, old_factor; u16 flags; + BUG_ON(len >= skb->len); + nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; -- cgit v1.2.3 From de9daad90ecb54f3c37c3f8967d581e20d927539 Mon Sep 17 00:00:00 2001 From: Denis Lukianov Date: Wed, 14 Sep 2005 20:53:42 -0700 Subject: [MCAST]: Fix MCAST_EXCLUDE line dupes This patch fixes line dupes at /ipv4/igmp.c and /ipv6/mcast.c in the 2.6 kernel, where MCAST_EXCLUDE is mistakenly used instead of MCAST_INCLUDE. Signed-off-by: Denis Lukianov Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- net/ipv6/mcast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44607f4767b8..70c44e4c3ceb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) } pmc->sources = NULL; pmc->sfmode = MCAST_EXCLUDE; - pmc->sfcount[MCAST_EXCLUDE] = 0; + pmc->sfcount[MCAST_INCLUDE] = 0; pmc->sfcount[MCAST_EXCLUDE] = 1; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 29fed6e58d0a..519899fb11d5 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1968,7 +1968,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) } pmc->mca_sources = NULL; pmc->mca_sfmode = MCAST_EXCLUDE; - pmc->mca_sfcount[MCAST_EXCLUDE] = 0; + pmc->mca_sfcount[MCAST_INCLUDE] = 0; pmc->mca_sfcount[MCAST_EXCLUDE] = 1; } -- cgit v1.2.3 From 1c011bed5f49ce9e6193b18b226106a41ecfa95c Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Wed, 14 Sep 2005 20:55:16 -0700 Subject: [BRIDGE-NF]: Fix iptables redirect on bridge interface Here's a slightly altered patch, originally from Mark Glines who diagnosed and fixed the problem. Signed-off-by: Bart De Schuymer Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2d52fee63a8c..d8e36b775125 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -214,9 +214,11 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) .tos = RT_TOS(iph->tos)} }, .proto = 0}; if (!ip_route_output_key(&rt, &fl)) { - /* Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. */ - if (((struct dst_entry *)rt)->dev == dev) { + /* - Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. + * - Deal with redirected traffic. */ + if (((struct dst_entry *)rt)->dev == dev || + rt->rt_type == RTN_LOCAL) { skb->dst = (struct dst_entry *)rt; goto bridged_dnat; } -- cgit v1.2.3 From f5e229db9cdb27f83594712ca4bb98d9377eb6ed Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 14 Sep 2005 21:04:23 -0700 Subject: [IPVS]: Really invalidate persistent templates Agostino di Salle noticed that persistent templates are not invalidated due to buggy optimization. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index e11952ea17af..5994521fddc5 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -467,7 +467,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) /* * Invalidate the connection template */ - if (ct->cport) { + if (ct->vport != 65535) { if (ip_vs_conn_unhash(ct)) { ct->dport = 65535; ct->vport = 65535; -- cgit v1.2.3 From 87375ab47cd0ba04124c6d3fd80db5c368f5dcb6 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 14 Sep 2005 21:08:51 -0700 Subject: [IPVS]: ip_vs_ftp breaks connections using persistence ip_vs_ftp when loaded can create NAT connections with unknown client port for passive FTP. For such expectations we lookup with cport=0 on incoming packet but it matches the format of the persistence templates causing packets to other persistent virtual servers to be forwarded to real server without creating connection. Later the reply packets are treated as foreign and not SNAT-ed. This patch changes the connection lookup for packets from clients: * introduce IP_VS_CONN_F_TEMPLATE connection flag to mark the connection as template * create new connection lookup function just for templates - ip_vs_ct_in_get * make sure ip_vs_conn_in_get hits only connections with IP_VS_CONN_F_NO_CPORT flag set when s_port is 0. By this way we avoid returning template when looking for cport=0 (ftp) Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 +++ net/ipv4/ipvs/ip_vs_conn.c | 41 ++++++++++++++++++++++++++++++++++++++--- net/ipv4/ipvs/ip_vs_core.c | 16 ++++++++-------- net/ipv4/ipvs/ip_vs_sync.c | 20 ++++++++++++++------ 4 files changed, 63 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e426641c519f..06b4235aa016 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -84,6 +84,7 @@ #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ +#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ /* Move it to better place one day, for now keep it unique */ #define NFC_IPVS_PROPERTY 0x10000 @@ -739,6 +740,8 @@ enum { extern struct ip_vs_conn *ip_vs_conn_in_get (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); +extern struct ip_vs_conn *ip_vs_ct_in_get +(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); extern struct ip_vs_conn *ip_vs_conn_out_get (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 5994521fddc5..f828fa2eb7de 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (s_addr==cp->caddr && s_port==cp->cport && d_port==cp->vport && d_addr==cp->vaddr && + ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +/* Get reference to connection template */ +struct ip_vs_conn *ip_vs_ct_in_get +(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +{ + unsigned hash; + struct ip_vs_conn *cp; + + hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + + ct_read_lock(hash); + + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (s_addr==cp->caddr && s_port==cp->cport && + d_port==cp->vport && d_addr==cp->vaddr && + cp->flags & IP_VS_CONN_F_TEMPLATE && + protocol==cp->protocol) { + /* HIT */ + atomic_inc(&cp->refcnt); + goto out; + } + } + cp = NULL; + + out: + ct_read_unlock(hash); + + IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", + ip_vs_proto_name(protocol), + NIPQUAD(s_addr), ntohs(s_port), + NIPQUAD(d_addr), ntohs(d_port), + cp?"hit":"not hit"); + + return cp; +} /* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. @@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so increase the inactive connection counter because it is in TCP SYNRECV state (inactive) or other protocol inacive state */ @@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so decrease the inactconns or activeconns counter */ if (cp->flags & IP_VS_CONN_F_INACTIVE) { @@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void) ct_write_lock_bh(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) + if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3ac7eeca04ac..981cc3244ef2 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, ports[1]); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, iph->daddr, ports[1], dest->addr, dest->port, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; @@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * port zero template: */ if (svc->fwmark) - ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, + ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, snet, 0, htonl(svc->fwmark), 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 574d1f509b46..2e5ced3d8062 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); for (i=0; inr_conns; i++) { + unsigned flags; + s = (struct ip_vs_sync_conn *)p; - cp = ip_vs_conn_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + flags = ntohs(s->flags); + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport); + else + cp = ip_vs_ct_in_get(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport); if (!cp) { cp = ip_vs_conn_new(s->protocol, s->caddr, s->cport, s->vaddr, s->vport, s->daddr, s->dport, - ntohs(s->flags), NULL); + flags, NULL); if (!cp) { IP_VS_ERR("ip_vs_conn_new failed\n"); return; @@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } else if (!cp->dest) { /* it is an entry created by the synchronization */ cp->state = ntohs(s->state); - cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; + cp->flags = flags | IP_VS_CONN_F_HASHED; } /* Note that we don't touch its state and flags if it is a normal entry. */ - if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { + if (flags & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(&cp->in_seq, opt, sizeof(*opt)); p += FULL_CONN_SIZE; -- cgit v1.2.3 From 40796c5e8f2a93008e9034b3110a7e7b1fa0fba0 Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Wed, 14 Sep 2005 21:10:20 -0700 Subject: [IPV6]: Fix per-socket multicast filtering in sk_reuse case per-socket multicast filters were not being applied to all sockets in the case of an exact-match bound address, due to an over-exuberant "return" in the look-up code. Fix below. IPv4 does not have this problem. Thanks to Hoerdt Mickael for reporting the bug. Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv6/udp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 69b146843a20..2b9bf9bd177f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -405,9 +405,8 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, continue; if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) - return s; - continue; + if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + continue; } if(!inet6_mc_check(s, loc_addr, rmt_addr)) continue; -- cgit v1.2.3 From 37f7f421cce13435fdc0d870caf51141e5ebf079 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 16 Sep 2005 16:51:01 -0700 Subject: [NET]: Do not leak MSG_CMSG_COMPAT into userspace. Noticed by Sridhar Samudrala. Signed-off-by: David S. Miller --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index c699e93c33d7..f9264472377f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err < 0) goto out_freeiov; } - err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); + err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) -- cgit v1.2.3 From 0c10c5d96865ce611d6a780888eff0ef4fab358b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Sep 2005 16:58:33 -0700 Subject: [DCCP]: More precisely set reset_code when sending RESET packets Moving the setting of DCCP_SKB_CB(skb)->dccpd_reset_code to the places where events happen that trigger sending a RESET packet. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 22 +++++++++++----------- net/dccp/ipv4.c | 10 +++++++--- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index c74034cf7ede..062e9f8359d0 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -384,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } out_invalid_packet: - return 1; /* dccp_v4_do_rcv will send a reset, but... - FIXME: the reset code should be - DCCP_RESET_CODE_PACKET_ERROR */ + /* dccp_v4_do_rcv will send a reset */ + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -433,6 +433,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; @@ -473,7 +474,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dh->dccph_type == DCCP_PKT_RESET) goto discard; - /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ + /* Caller (dccp_v4_do_rcv) will send Reset */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } @@ -487,8 +489,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ) + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); @@ -500,7 +501,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (dp->dccps_options.dccpo_send_ack_vector) { if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_seq, + dcb->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; /* @@ -551,8 +552,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNC); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); @@ -563,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNCACK); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); goto discard; } switch (sk->sk_state) { case DCCP_CLOSED: + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2afaa464e7f0..e09907d8b7da 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -669,12 +669,16 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; goto drop; + } /* * TW buckets are converted to open requests without @@ -718,7 +722,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_isr = dcb->dccpd_seq; dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; @@ -735,6 +739,7 @@ drop_and_free: __reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; return -1; } @@ -1005,7 +1010,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); discard: kfree_skb(skb); -- cgit v1.2.3 From 67e6b629212fa9ffb7420e8a88a41806af637e28 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Sep 2005 16:58:40 -0700 Subject: [DCCP]: Introduce DCCP_SOCKOPT_SERVICE As discussed in the dccp@vger mailing list: Now applications have to use setsockopt(DCCP_SOCKOPT_SERVICE, service[s]), prior to calling listen() and connect(). An array of unsigned ints can be passed meaning that the listening sock accepts connection requests for several services. With this we can ditch struct sockaddr_dccp and use only sockaddr_in (and sockaddr_in6 in the future). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 40 ++++++++++++++++++------- net/dccp/dccp.h | 9 +++--- net/dccp/ipv4.c | 30 +++++++++++++++++-- net/dccp/minisocks.c | 6 ++-- net/dccp/output.c | 14 ++++----- net/dccp/proto.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 154 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 8bf4bacb5051..0e72708677e4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -4,16 +4,6 @@ #include #include -/* Structure describing an Internet (DCCP) socket address. */ -struct sockaddr_dccp { - __u16 sdccp_family; /* Address family */ - __u16 sdccp_port; /* Port number */ - __u32 sdccp_addr; /* Internet address */ - __u32 sdccp_service; /* Service */ - /* Pad to size of `struct sockaddr': 16 bytes . */ - __u32 sdccp_pad; -}; - /** * struct dccp_hdr - generic part of DCCP packet header * @@ -188,6 +178,9 @@ enum { /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 +#define DCCP_SOCKOPT_SERVICE 2 + +#define DCCP_SERVICE_LIST_MAX_LEN 32 #ifdef __KERNEL__ @@ -382,6 +375,25 @@ enum dccp_role { DCCP_ROLE_SERVER, }; +struct dccp_service_list { + __u32 dccpsl_nr; + __u32 dccpsl_list[0]; +}; + +#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) + +static inline int dccp_list_has_service(const struct dccp_service_list *sl, + const u32 service) +{ + if (likely(sl != NULL)) { + u32 i = sl->dccpsl_nr; + while (i--) + if (sl->dccpsl_list[i] == service) + return 1; + } + return 0; +} + /** * struct dccp_sock - DCCP socket state * @@ -417,7 +429,8 @@ struct dccp_sock { __u64 dccps_gss; __u64 dccps_gsr; __u64 dccps_gar; - unsigned long dccps_service; + __u32 dccps_service; + struct dccp_service_list *dccps_service_list; struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; __u32 dccps_packet_size; @@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } +static inline int dccp_service_not_initialized(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE; +} + static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 95c4630b3b18..be7a660b6b24 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -258,13 +258,12 @@ extern int dccp_v4_send_reset(struct sock *sk, extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { - __u8 dccpd_type; - __u8 dccpd_reset_code; - __u8 dccpd_service; - __u8 dccpd_ccval; + __u8 dccpd_type:4; + __u8 dccpd_ccval:4; + __u8 dccpd_reset_code; + __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; - int dccpd_opt_len; }; #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e09907d8b7da..94a440b2685b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_role = DCCP_ROLE_CLIENT; + if (dccp_service_not_initialized(sk)) + return -EPROTO; + if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } +static inline int dccp_bad_service_code(const struct sock *sk, + const __u32 service) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_service == service) + return 0; + return !dccp_list_has_service(dp->dccps_service_list, service); +} + int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -669,6 +682,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; + const __u32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; @@ -680,6 +694,10 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -722,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = service; if (dccp_v4_send_response(sk, req, dst)) goto drop_and_free; @@ -1284,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk) sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; + dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; return 0; } @@ -1305,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); + if (dp->dccps_service_list != NULL) { + kfree(dp->dccps_service_list); + dp->dccps_service_list = NULL; + } + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 18461bc04cbe..933e10db1789 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct inet_connection_sock *newicsk = inet_csk(sk); struct dccp_sock *newdp = dccp_sk(newsk); + newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackpkts = NULL; - newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + newdp->dccps_service_list = NULL; + newdp->dccps_service = dreq->dreq_service; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { diff --git a/net/dccp/output.c b/net/dccp/output.c index ea6d0e91e511..156b1d29a156 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = - dcb->dccpd_service; + dp->dccps_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { struct dccp_hdr *dh; + struct dccp_request_sock *dreq; const int dccp_header_size = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_response); @@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); skb->csum = 0; + dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); @@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + dccp_hdr_set_seq(dh, dreq->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); + dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, inet_rsk(req)->rmt_addr); @@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, MAX_DCCP_HEADER); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - /* FIXME: set service to something meaningful, coming - * from userspace*/ - DCCP_SKB_CB(skb)->dccpd_service = 0; skb->csum = 0; skb_set_owner_w(skb, sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 18a0e69c9dc7..9bda2868eba6 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); static inline int dccp_listen_start(struct sock *sk) { - dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + struct dccp_sock *dp = dccp_sk(sk); + + dp->dccps_role = DCCP_ROLE_LISTEN; + /* + * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) + * before calling listen() + */ + if (dccp_service_not_initialized(sk)) + return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } +static int dccp_setsockopt_service(struct sock *sk, const u32 service, + char __user *optval, int optlen) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_service_list *sl = NULL; + + if (service == DCCP_SERVICE_INVALID_VALUE || + optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) + return -EINVAL; + + if (optlen > sizeof(service)) { + sl = kmalloc(optlen, GFP_KERNEL); + if (sl == NULL) + return -ENOMEM; + + sl->dccpsl_nr = optlen / sizeof(u32) - 1; + if (copy_from_user(sl->dccpsl_list, + optval + sizeof(service), + optlen - sizeof(service)) || + dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { + kfree(sl); + return -EFAULT; + } + } + + lock_sock(sk); + dp->dccps_service = service; + + if (dp->dccps_service_list != NULL) + kfree(dp->dccps_service_list); + + dp->dccps_service_list = sl; + release_sock(sk); + return 0; +} + int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - lock_sock(sk); + if (optname == DCCP_SOCKOPT_SERVICE) + return dccp_setsockopt_service(sk, val, optval, optlen); + lock_sock(sk); dp = dccp_sk(sk); err = 0; @@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, return err; } +static int dccp_getsockopt_service(struct sock *sk, int len, + u32 __user *optval, + int __user *optlen) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct dccp_service_list *sl; + int err = -ENOENT, slen = 0, total_len = sizeof(u32); + + lock_sock(sk); + if (dccp_service_not_initialized(sk)) + goto out; + + if ((sl = dp->dccps_service_list) != NULL) { + slen = sl->dccpsl_nr * sizeof(u32); + total_len += slen; + } + + err = -EINVAL; + if (total_len > len) + goto out; + + err = 0; + if (put_user(total_len, optlen) || + put_user(dp->dccps_service, optval) || + (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) + err = -EFAULT; +out: + release_sock(sk); + return err; +} + int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (optname == DCCP_SOCKOPT_SERVICE) + return dccp_getsockopt_service(sk, len, + (u32 __user *)optval, optlen); + len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; -- cgit v1.2.3 From 4451362445b2d83886003f1d739b94e4f000eeeb Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Fri, 16 Sep 2005 16:59:46 -0700 Subject: [NETFILTER] CLUSTERIP: introduce reference counting for entries The CLUSTERIP target creates a procfs entry for all different cluster IPs. Although more than one rules can refer to a single cluster IP (and thus a single config structure), removal of the procfs entry is done unconditionally in destroy(). In more complicated situations involving deferred dereferencing of the config structure by procfs and creating a new rule with the same cluster IP it's also possible that no entry will be created for the new rule. This patch fixes the problem by counting the number of entries referencing a given config structure and moving the config list manipulation and procfs entry deletion parts to the clusterip_config_entry_put() function. Signed-off-by: KOVACS Krisztian Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 80 +++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d38913754b1..adbf4d752d0f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -49,6 +49,8 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); struct clusterip_config { struct list_head list; /* list of all configs */ atomic_t refcount; /* reference count */ + atomic_t entries; /* number of entries/rules + * referencing us */ u_int32_t clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ @@ -76,23 +78,48 @@ static struct proc_dir_entry *clusterip_procdir; #endif static inline void -clusterip_config_get(struct clusterip_config *c) { +clusterip_config_get(struct clusterip_config *c) +{ atomic_inc(&c->refcount); } static inline void -clusterip_config_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->refcount)) { +clusterip_config_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->refcount)) + kfree(c); +} + +/* increase the count of entries(rules) using/referencing this config */ +static inline void +clusterip_config_entry_get(struct clusterip_config *c) +{ + atomic_inc(&c->entries); +} + +/* decrease the count of entries using/referencing this config. If last + * entry(rule) is removed, remove the config from lists, but don't free it + * yet, since proc-files could still be holding references */ +static inline void +clusterip_config_entry_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->entries)) { write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); + dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); - kfree(c); + + /* In case anyone still accesses the file, the open/close + * functions are also incrementing the refcount on their own, + * so it's safe to remove the entry even if it's in use. */ +#ifdef CONFIG_PROC_FS + remove_proc_entry(c->pde->name, c->pde->parent); +#endif } } - static struct clusterip_config * __clusterip_config_find(u_int32_t clusterip) { @@ -111,7 +138,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip) +clusterip_config_find_get(u_int32_t clusterip, int entry) { struct clusterip_config *c; @@ -122,6 +149,8 @@ clusterip_config_find_get(u_int32_t clusterip) return NULL; } atomic_inc(&c->refcount); + if (entry) + atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); return c; @@ -148,6 +177,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); + atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS /* create proc dir entry */ @@ -415,8 +445,26 @@ checkentry(const char *tablename, /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr); - if (!config) { + config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + if (config) { + if (cipinfo->config != NULL) { + /* Case A: This is an entry that gets reloaded, since + * it still has a cipinfo->config pointer. Simply + * increase the entry refcount and return */ + if (cipinfo->config != config) { + printk(KERN_ERR "CLUSTERIP: Reloaded entry " + "has invalid config pointer!\n"); + return 0; + } + clusterip_config_entry_get(cipinfo->config); + } else { + /* Case B: This is a new rule referring to an existing + * clusterip config. */ + cipinfo->config = config; + clusterip_config_entry_get(cipinfo->config); + } + } else { + /* Case C: This is a completely new clusterip config */ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); return 0; @@ -443,10 +491,9 @@ checkentry(const char *tablename, } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } + cipinfo->config = config; } - cipinfo->config = config; - return 1; } @@ -455,13 +502,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) { struct ipt_clusterip_tgt_info *cipinfo = matchinfo; - /* we first remove the proc entry and then drop the reference - * count. In case anyone still accesses the file, the open/close - * functions are also incrementing the refcount on their own */ -#ifdef CONFIG_PROC_FS - remove_proc_entry(cipinfo->config->pde->name, - cipinfo->config->pde->parent); -#endif + /* if no more entries are referencing the config, remove it + * from the list and destroy the proc entry */ + clusterip_config_entry_put(cipinfo->config); + clusterip_config_put(cipinfo->config); } @@ -533,7 +577,7 @@ arp_mangle(unsigned int hook, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip); + c = clusterip_config_find_get(payload->src_ip, 0); if (!c) return NF_ACCEPT; -- cgit v1.2.3 From 136e92bbec0a6d4c2dd1e5b5ac869ab5470547a4 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Fri, 16 Sep 2005 17:00:04 -0700 Subject: [NETFILTER] CLUSTERIP: use a bitmap to store node responsibility data Instead of maintaining an array containing a list of nodes this instance is responsible for let's use a simple bitmap. This provides the following features: * clusterip_responsible() and the add_node()/delete_node() operations become very simple and don't need locking * the config structure is much smaller In spite of the completely different internal data representation the user-space interface remains almost unchanged; the only difference is that the proc file does not list nodes in the order they were added. (The target info structure remains the same.) Signed-off-by: KOVACS Krisztian Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 143 ++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 82 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index adbf4d752d0f..9bcb398fbc1f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,7 @@ #include #include -#define CLUSTERIP_VERSION "0.7" +#define CLUSTERIP_VERSION "0.8" #define DEBUG_CLUSTERIP @@ -56,8 +57,7 @@ struct clusterip_config { u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ - u_int16_t num_local_nodes; /* number of local nodes */ - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ + unsigned long local_nodes; /* node number array */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ @@ -68,8 +68,7 @@ struct clusterip_config { static LIST_HEAD(clusterip_configs); -/* clusterip_lock protects the clusterip_configs list _AND_ the configurable - * data within all structurses (num_local_nodes, local_nodes[]) */ +/* clusterip_lock protects the clusterip_configs list */ static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS @@ -156,6 +155,17 @@ clusterip_config_find_get(u_int32_t clusterip, int entry) return c; } +static void +clusterip_config_init_nodelist(struct clusterip_config *c, + const struct ipt_clusterip_tgt_info *i) +{ + int n; + + for (n = 0; n < i->num_local_nodes; n++) { + set_bit(i->local_nodes[n] - 1, &c->local_nodes); + } +} + static struct clusterip_config * clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, struct net_device *dev) @@ -172,8 +182,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; - c->num_local_nodes = i->num_local_nodes; - memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); + clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); @@ -201,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - if (c->num_local_nodes >= CLUSTERIP_MAX_NODES - || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - - /* check if we alrady have this number in our array */ - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - write_unlock_bh(&clusterip_lock); - return 1; - } - } - c->local_nodes[c->num_local_nodes++] = nodenum; + /* check if we already have this number in our bitfield */ + if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + return 1; - write_unlock_bh(&clusterip_lock); return 0; } static int clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - - if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); - memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); - c->num_local_nodes--; - write_unlock_bh(&clusterip_lock); - return 0; - } - } + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) + return 0; - write_unlock_bh(&clusterip_lock); return 1; } @@ -315,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) static inline int clusterip_responsible(struct clusterip_config *config, u_int32_t hash) { - int i; - - read_lock_bh(&clusterip_lock); - - if (config->num_local_nodes == 0) { - read_unlock_bh(&clusterip_lock); - return 0; - } - - for (i = 0; i < config->num_local_nodes; i++) { - if (config->local_nodes[i] == hash) { - read_unlock_bh(&clusterip_lock); - return 1; - } - } - - read_unlock_bh(&clusterip_lock); - - return 0; + return test_bit(hash - 1, &config->local_nodes); } /*********************************************************************** @@ -618,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { #ifdef CONFIG_PROC_FS +struct clusterip_seq_position { + unsigned int pos; /* position */ + unsigned int weight; /* number of bits set == size */ + unsigned int bit; /* current bit */ + unsigned long val; /* current value */ +}; + static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; struct clusterip_config *c = pde->data; - unsigned int *nodeidx; - - read_lock_bh(&clusterip_lock); - if (*pos >= c->num_local_nodes) + unsigned int weight; + u_int32_t local_nodes; + struct clusterip_seq_position *idx; + + /* FIXME: possible race */ + local_nodes = c->local_nodes; + weight = hweight32(local_nodes); + if (*pos >= weight) return NULL; - nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); - if (!nodeidx) + idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); + if (!idx) return ERR_PTR(-ENOMEM); - *nodeidx = *pos; - return nodeidx; + idx->pos = *pos; + idx->weight = weight; + idx->bit = ffs(local_nodes); + idx->val = local_nodes; + clear_bit(idx->bit - 1, &idx->val); + + return idx; } static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - *pos = ++(*nodeidx); - if (*pos >= c->num_local_nodes) { + *pos = ++idx->pos; + if (*pos >= idx->weight) { kfree(v); return NULL; } - return nodeidx; + idx->bit = ffs(idx->val); + clear_bit(idx->bit - 1, &idx->val); + return idx; } static void clusterip_seq_stop(struct seq_file *s, void *v) { kfree(v); - - read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - if (*nodeidx != 0) + if (idx->pos != 0) seq_putc(s, ','); - seq_printf(s, "%u", c->local_nodes[*nodeidx]); - if (*nodeidx == c->num_local_nodes-1) + seq_printf(s, "%u", idx->bit); + + if (idx->pos == idx->weight - 1) seq_putc(s, '\n'); return 0; -- cgit v1.2.3 From a8f39143ac67ffa2e26ce48aaac6bf5dc7dae95f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 16 Sep 2005 17:00:38 -0700 Subject: [NETFILTER]: Fix oops in conntrack event cache ip_ct_refresh_acct() can be called without a valid "skb" pointer. This used to work, since ct_add_counters() deals with that fact. However, the recently-added event cache doesn't handle this at all. This patch is a quick fix that is supposed to be replaced soon by a cleaner solution during the pending redesign of the event cache. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 19cba16e6e1e..f8cd8e42961e 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - ip_conntrack_event_cache(IPCT_REFRESH, skb); + /* FIXME: We loose some REFRESH events if this function + * is called without an skb. I'll fix this later -HW */ + if (skb) + ip_conntrack_event_cache(IPCT_REFRESH, skb); } ct_add_counters(ct, ctinfo, skb); write_unlock_bh(&ip_conntrack_lock); -- cgit v1.2.3 From 777ed97f3e549832845d70dcae96cb36c41a543a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 17 Sep 2005 00:41:02 -0700 Subject: [NETFILTER] Fix Kconfig dependencies for nfnetlink/ctnetlink Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 30aa8e2ee214..85190de5710a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration" # connection tracking, helpers and protocols config IP_NF_CONNTRACK tristate "Connection tracking (required for masq/NAT)" + select NETFILTER_NETLINK if IP_NF_CONNTRACK_NETLINK!=n ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -51,6 +52,15 @@ config IP_NF_CONNTRACK_EVENTS IF unsure, say `N'. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + default IP_NF_CONNTRACK if NETFILTER_NETLINK=y + default m if NETFILTER_NETLINK=m + help + This option enables support for a netlink-based userspace interface + + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL @@ -774,11 +784,5 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. -config IP_NF_CONNTRACK_NETLINK - tristate 'Connection tracking netlink interface' - depends on IP_NF_CONNTRACK && NETFILTER_NETLINK - help - This option enables support for a netlink-based userspace interface - endmenu -- cgit v1.2.3 From 9eb0eec74d9425f4376df653304783339a027d79 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 17 Sep 2005 00:41:21 -0700 Subject: [NETFILTER] move nfnetlink options to right location in kconfig menu Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 2bdd5623fdd5..60f6f321bd76 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -140,6 +140,7 @@ config BRIDGE_NETFILTER If unsure, say N. +source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig" @@ -206,8 +207,6 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. -source "net/netfilter/Kconfig" - endmenu endmenu -- cgit v1.2.3 From ae31c3399d17b1f7bc1742724f70476b5417744f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 18 Sep 2005 00:17:51 -0700 Subject: [DCCP]: Move the ack vector code to net/dccp/ackvec.[ch] Isolating it, that will be used when we introduce a CCID2 (TCP-Like) implementation. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 14 +- net/dccp/Makefile | 2 + net/dccp/ackvec.c | 419 ++++++++++++++++++++++++++++++++++++++++++++++++ net/dccp/ackvec.h | 133 ++++++++++++++++ net/dccp/dccp.h | 82 ++-------- net/dccp/input.c | 69 ++------ net/dccp/ipv4.c | 59 ++----- net/dccp/minisocks.c | 13 +- net/dccp/options.c | 440 +-------------------------------------------------- net/dccp/output.c | 12 +- 10 files changed, 612 insertions(+), 631 deletions(-) create mode 100644 net/dccp/ackvec.c create mode 100644 net/dccp/ackvec.h (limited to 'net') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 0e72708677e4..8c8e029095a5 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -353,14 +353,8 @@ static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) extern struct inet_timewait_death_row dccp_death_row; -/* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACK_VECTOR_LEN 253 - struct dccp_options_received { - u32 dccpor_ndp:24, - dccpor_ack_vector_len:8; - u32 dccpor_ack_vector_idx:10; - /* 22 bits hole, try to pack */ + u32 dccpor_ndp; /* only 24 bits */ u32 dccpor_timestamp; u32 dccpor_timestamp_echo; u32 dccpor_elapsed_time; @@ -394,6 +388,8 @@ static inline int dccp_list_has_service(const struct dccp_service_list *sl, return 0; } +struct dccp_ackvec; + /** * struct dccp_sock - DCCP socket state * @@ -414,7 +410,7 @@ static inline int dccp_list_has_service(const struct dccp_service_list *sl, * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet - * @dccps_hc_rx_ackpkts - receiver half connection acked packets + * @dccps_hc_rx_ackvec - rx half connection ack vector */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ @@ -439,7 +435,7 @@ struct dccp_sock { __u32 dccps_pmtu_cookie; __u32 dccps_mss_cache; struct dccp_options dccps_options; - struct dccp_ackpkts *dccps_hc_rx_ackpkts; + struct dccp_ackvec *dccps_hc_rx_ackvec; void *dccps_hc_rx_ccid_private; void *dccps_hc_tx_ccid_private; struct ccid *dccps_hc_rx_ccid; diff --git a/net/dccp/Makefile b/net/dccp/Makefile index fb97bb042455..344a8da153fc 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o +dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o + obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o dccp_diag-y := diag.o diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c new file mode 100644 index 000000000000..6530283eafca --- /dev/null +++ b/net/dccp/ackvec.c @@ -0,0 +1,419 @@ +/* + * net/dccp/ackvec.c + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License; + */ + +#include "ackvec.h" +#include "dccp.h" + +#include +#include + +#include + +int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + int len = av->dccpav_vec_len + 2; + struct timeval now; + u32 elapsed_time; + unsigned char *to, *from; + + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return -1; + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1) + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = av->dccpav_vec_len; + from = av->dccpav_buf + av->dccpav_buf_head; + + /* Check if buf_head wraps */ + if (av->dccpav_buf_head + len > av->dccpav_vec_len) { + const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = av->dccpav_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. + * + * This implemention uses just one ack record for now. + */ + av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + av->dccpav_ack_ptr = av->dccpav_buf_head; + av->dccpav_ack_ackno = av->dccpav_buf_ackno; + av->dccpav_ack_nonce = av->dccpav_buf_nonce; + av->dccpav_sent_len = av->dccpav_vec_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", + debug_prefix, av->dccpav_sent_len, + (unsigned long long)av->dccpav_ack_seqno, + (unsigned long long)av->dccpav_ack_ackno); + return -1; +} + +struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, + const unsigned int __nocast priority) +{ + struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); + + if (av != NULL) { + av->dccpav_buf_len = len; + av->dccpav_buf_head = + av->dccpav_buf_tail = av->dccpav_buf_len - 1; + av->dccpav_buf_ackno = + av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU; + av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; + av->dccpav_ack_ptr = 0; + av->dccpav_time.tv_sec = 0; + av->dccpav_time.tv_usec = 0; + av->dccpav_sent_len = av->dccpav_vec_len = 0; + } + + return av; +} + +void dccp_ackvec_free(struct dccp_ackvec *av) +{ + kfree(av); +} + +static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, + const unsigned int index) +{ + return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; +} + +static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, + const unsigned int index) +{ + return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (av->dccpav_vec_len + packets > av->dccpav_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = av->dccpav_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += av->dccpav_buf_len; + } + + av->dccpav_buf_head = new_head; + + if (gap > 0) + memset(av->dccpav_buf + av->dccpav_buf_head + 1, + DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); + + av->dccpav_buf[av->dccpav_buf_head] = state; + av->dccpav_vec_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpav_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (av->dccpav_vec_len == 0) { + av->dccpav_buf[av->dccpav_buf_head] = state; + av->dccpav_vec_len = 1; + } else if (after48(ackno, av->dccpav_buf_ackno)) { + const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, + ackno); + + /* + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackvec_state(av, av->dccpav_buf_head) == state && + (dccp_ackvec_len(av, av->dccpav_buf_head) < + DCCP_ACKVEC_LEN_MASK)) + av->dccpav_buf[av->dccpav_buf_head]++; + else if (dccp_ackvec_set_buf_head_state(av, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and + * S <= buf_ackno, the HC-Receiver will scan the table + * for the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); + unsigned int index = av->dccpav_buf_head; + + while (1) { + const u8 len = dccp_ackvec_len(av, index); + const u8 state = dccp_ackvec_state(av, index); + /* + * valid packets not yet in dccpav_buf have a reserved + * entry, with a len equal to 0. + */ + if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our + reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long)ackno); + av->dccpav_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == av->dccpav_buf_len) + index = 0; + } + } + + av->dccpav_buf_ackno = ackno; + dccp_timestamp(sk, &av->dccpav_time); +out: + dccp_pr_debug(""); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long)ackno); + return -EILSEQ; +} + +#ifdef CONFIG_IP_DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long)ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackvec_print(const struct dccp_ackvec *av) +{ + dccp_ackvector_print(av->dccpav_buf_ackno, + av->dccpav_buf + av->dccpav_buf_head, + av->dccpav_vec_len); +} +#endif + +static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) +{ + /* + * As we're keeping track of the ack vector size (dccpav_vec_len) and + * the sent ack vector size (dccpav_sent_len) we don't need + * dccpav_buf_tail at all, but keep this code here as in the future + * we'll implement a vector of ack records, as suggested in + * draft-ietf-dccp-spec-11.txt Appendix A. -acme + */ +#if 0 + av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; + if (av->dccpav_buf_tail >= av->dccpav_vec_len) + av->dccpav_buf_tail -= av->dccpav_vec_len; +#endif + av->dccpav_vec_len -= av->dccpav_sent_len; +} + +void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, + const u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == av->dccpav_ack_seqno) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + (unsigned long long)av->dccpav_ack_seqno, + (unsigned long long)av->dccpav_ack_ackno); + dccp_ackvec_trow_away_ack_record(av); + av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK + * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're + * not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */ + if (before48(ackno, av->dccpav_ack_seqno)) { + /* dccp_pr_debug_cat("yes\n"); */ + return; + } + /* dccp_pr_debug_cat("no\n"); */ + + i = len; + while (i--) { + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + /* + * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, + * av->dccpav_ack_seqno, ackno); + */ + if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & + DCCP_ACKVEC_STATE_MASK) >> 6; + /* dccp_pr_debug_cat("yes\n"); */ + + if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = + dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", + debug_prefix, len, + (unsigned long long) + av->dccpav_ack_seqno, + (unsigned long long) + av->dccpav_ack_ackno); + dccp_ackvec_trow_away_ack_record(av); + } + /* + * If dccpav_ack_seqno was not received, no problem + * we'll send another ACK vector. + */ + av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + /* dccp_pr_debug_cat("no\n"); */ + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} + +int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len) +{ + if (len > DCCP_MAX_ACKVEC_LEN) + return -1; + + /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ + dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); + return 0; +} diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h new file mode 100644 index 000000000000..8ca51c9191f7 --- /dev/null +++ b/net/dccp/ackvec.h @@ -0,0 +1,133 @@ +#ifndef _ACKVEC_H +#define _ACKVEC_H +/* + * net/dccp/ackvec.h + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACKVEC_LEN 253 + +#define DCCP_ACKVEC_STATE_RECEIVED 0 +#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackvec - ack vector + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpav_buf_head - circular buffer head + * @dccpav_buf_tail - circular buffer tail + * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %dccpav_buf_head) + * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpav_ack_seqno - the Sequence Number used for the packet + * (HC-Receiver seqno) + * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpav_ack_ackno - the Acknowledgement Number used for the packet + * (HC-Sender seqno) + * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpav_buf_len - circular buffer length + * @dccpav_time - the time in usecs + * @dccpav_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackvec { + unsigned int dccpav_buf_head; + unsigned int dccpav_buf_tail; + u64 dccpav_buf_ackno; + u64 dccpav_ack_seqno; + u64 dccpav_ack_ackno; + unsigned int dccpav_ack_ptr; + unsigned int dccpav_sent_len; + unsigned int dccpav_vec_len; + unsigned int dccpav_buf_len; + struct timeval dccpav_time; + u8 dccpav_buf_nonce; + u8 dccpav_ack_nonce; + u8 dccpav_buf[0]; +}; + +struct sock; +struct sk_buff; + +#ifdef CONFIG_IP_DCCP_ACKVEC +extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, + const unsigned int __nocast priority); +extern void dccp_ackvec_free(struct dccp_ackvec *av); + +extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state); + +extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno); +extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len); + +extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return av->dccpav_sent_len != av->dccpav_vec_len; +} +#else /* CONFIG_IP_DCCP_ACKVEC */ +static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, + const unsigned int __nocast priority) +{ + return NULL; +} + +static inline void dccp_ackvec_free(struct dccp_ackvec *av) +{ +} + +static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + return -1; +} + +static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno) +{ +} + +static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len) +{ + return -1; +} + +static inline int dccp_insert_option_ackvec(const struct sock *sk, + const struct sk_buff *skb) +{ + return -1; +} + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return 0; +} +#endif /* CONFIG_IP_DCCP_ACKVEC */ +#endif /* _ACKVEC_H */ diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index be7a660b6b24..5871c027f9dc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -17,6 +17,7 @@ #include #include #include +#include "ackvec.h" #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; @@ -358,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) (dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1)); } + +static inline int dccp_ack_pending(const struct sock *sk) +{ + const struct dccp_sock *dp = dccp_sk(sk); + return dp->dccps_timestamp_echo != 0 || +#ifdef CONFIG_IP_DCCP_ACKVEC + (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || +#endif + inet_csk_ack_scheduled(sk); +} extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern void dccp_insert_option_elapsed_time(struct sock *sk, @@ -371,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, extern struct socket *dccp_ctl_socket; -#define DCCP_ACKPKTS_STATE_RECEIVED 0 -#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) -#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) - -#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ -#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ - -/** struct dccp_ackpkts - acknowledgeable packets - * - * This data structure is the one defined in the DCCP draft - * Appendix A. - * - * @dccpap_buf_head - circular buffer head - * @dccpap_buf_tail - circular buffer tail - * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the - * buffer (i.e. %dccpap_buf_head) - * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked - * by the buffer with State 0 - * - * Additionally, the HC-Receiver must keep some information about the - * Ack Vectors it has recently sent. For each packet sent carrying an - * Ack Vector, it remembers four variables: - * - * @dccpap_ack_seqno - the Sequence Number used for the packet - * (HC-Receiver seqno) - * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. - * @dccpap_ack_ackno - the Acknowledgement Number used for the packet - * (HC-Sender seqno) - * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. - * - * @dccpap_buf_len - circular buffer length - * @dccpap_time - the time in usecs - * @dccpap_buf - circular buffer of acknowledgeable packets - */ -struct dccp_ackpkts { - unsigned int dccpap_buf_head; - unsigned int dccpap_buf_tail; - u64 dccpap_buf_ackno; - u64 dccpap_ack_seqno; - u64 dccpap_ack_ackno; - unsigned int dccpap_ack_ptr; - unsigned int dccpap_buf_vector_len; - unsigned int dccpap_ack_vector_len; - unsigned int dccpap_buf_len; - struct timeval dccpap_time; - u8 dccpap_buf_nonce; - u8 dccpap_ack_nonce; - u8 dccpap_buf[0]; -}; - -extern struct dccp_ackpkts * - dccp_ackpkts_alloc(unsigned int len, - const unsigned int __nocast priority); -extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, - u64 ackno, u8 state); -extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, - struct sock *sk, u64 ackno); - extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); static inline suseconds_t timeval_usecs(const struct timeval *tv) @@ -470,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -#ifdef CONFIG_IP_DCCP_DEBUG -extern void dccp_ackvector_print(const u64 ackno, - const unsigned char *vector, int len); -extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); -#else -static inline void dccp_ackvector_print(const u64 ackno, - const unsigned char *vector, - int len) { } -static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } -#endif - #endif /* _DCCP_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 062e9f8359d0..1b6b2cb12376 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -16,6 +16,7 @@ #include +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); if (dp->dccps_options.dccpo_send_ack_vector) - dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); } static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) @@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - /* - * FIXME: check ECN to see if we should use - * DCCP_ACKPKTS_STATE_ECN_MARKED - */ - if (dp->dccps_options.dccpo_send_ack_vector) { - struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " - "packets buffer full!\n"); - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MIN, - DCCP_RTO_MAX); - goto discard; - } - - /* - * FIXME: this activation is probably wrong, have to study more - * TCP delack machinery and how it fits into DCCP draft, but - * for now it kinda "works" 8) - */ - if (!inet_csk_ack_scheduled(sk)) { - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, - DCCP_RTO_MAX); - } - } + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); @@ -495,29 +470,11 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); - /* - * FIXME: check ECN to see if we should use - * DCCP_ACKPKTS_STATE_ECN_MARKED - */ - if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, - dcb->dccpd_seq, - DCCP_ACKPKTS_STATE_RECEIVED)) - goto discard; - /* - * FIXME: this activation is probably wrong, have to - * study more TCP delack machinery and how it fits into - * DCCP draft, but for now it kinda "works" 8) - */ - if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == - DCCP_MAX_SEQNO + 1) && - !inet_csk_ack_scheduled(sk)) { - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MIN, - DCCP_RTO_MAX); - } - } + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; } /* diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 94a440b2685b..82434e4a42df 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -23,6 +23,7 @@ #include #include +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -1112,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb) goto discard_it; dh = dccp_hdr(skb); -#if 0 - /* - * Use something like this to simulate some DATA/DATAACK loss to test - * dccp_ackpkts_add, you'll get something like this on a session that - * sends 10 DATA/DATAACK packets: - * - * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| - * - * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet - * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets - * with the same state - * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet - * - * So... - * - * 281473596467422 was received - * 281473596467421 was not received - * 281473596467420 was received - * 281473596467419 was not received - * 281473596467418 was received - * 281473596467417 was not received - * 281473596467416 was received - * 281473596467415 was not received - * 281473596467414 was received - * 281473596467413 was received (this one was the 3way handshake - * RESPONSE) - * - */ - if (dh->dccph_type == DCCP_PKT_DATA || - dh->dccph_type == DCCP_PKT_DATAACK) { - static int discard = 0; - if (discard) { - discard = 0; - goto discard_it; - } - discard = 1; - } -#endif DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; @@ -1264,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk) do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackpkts = - dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_KERNEL); - - if (dp->dccps_hc_rx_ackpkts == NULL) + dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, + GFP_KERNEL); + if (dp->dccps_hc_rx_ackvec == NULL) return -ENOMEM; } @@ -1288,8 +1249,10 @@ static int dccp_v4_init_sock(struct sock *sk) dp->dccps_hc_tx_ccid == NULL) { ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); - dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); - dp->dccps_hc_rx_ackpkts = NULL; + if (dp->dccps_options.dccpo_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; return -ENOMEM; } @@ -1331,8 +1294,10 @@ static int dccp_v4_destroy_sock(struct sock *sk) ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); - dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); - dp->dccps_hc_rx_ackpkts = NULL; + if (dp->dccps_options.dccpo_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 933e10db1789..1393461898bb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -19,6 +19,7 @@ #include #include +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -94,23 +95,23 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct dccp_sock *newdp = dccp_sk(newsk); newdp->dccps_role = DCCP_ROLE_SERVER; - newdp->dccps_hc_rx_ackpkts = NULL; + newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; newdp->dccps_service = dreq->dreq_service; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { - newdp->dccps_hc_rx_ackpkts = - dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_ATOMIC); + newdp->dccps_hc_rx_ackvec = + dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, + GFP_ATOMIC); /* * XXX: We're using the same CCIDs set on the parent, * i.e. sk_clone copied the master sock and left the * CCID pointers for this child, that is why we do the * __ccid_get calls. */ - if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) + if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) goto out_free; } @@ -118,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newsk) != 0 || ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { - dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); + dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); out_free: diff --git a/net/dccp/options.c b/net/dccp/options.c index d4c4242d8dd7..c480c506a4a4 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -18,15 +18,10 @@ #include #include +#include "ackvec.h" #include "ccid.h" #include "dccp.h" -static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, - struct sock *sk, - const u64 ackno, - const unsigned char len, - const unsigned char *vector); - /* stores the default values for new connection. may be changed with sysctl */ static const struct dccp_options dccpo_default_values = { .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, @@ -113,25 +108,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_ndp); break; case DCCPO_ACK_VECTOR_0: - if (len > DCCP_MAX_ACK_VECTOR_LEN) - goto out_invalid_option; - + case DCCPO_ACK_VECTOR_1: if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_ack_vector_len = len; - opt_recv->dccpor_ack_vector_idx = value - options; - - dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", - debug_prefix, len, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq); - dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, - value, len); - dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, - sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - len, value); + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_parse(sk, skb, opt, value, len)) + goto out_invalid_option; break; case DCCPO_TIMESTAMP: if (len != 4) @@ -352,86 +335,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT TX opt: " : "server TX opt: "; -#endif - struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - int len = ap->dccpap_buf_vector_len + 2; - struct timeval now; - u32 elapsed_time; - unsigned char *to, *from; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; - - if (elapsed_time != 0) - dccp_insert_option_elapsed_time(sk, skb, elapsed_time); - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " - "insert ACK Vector!\n"); - return; - } - - /* - * XXX: now we have just one ack vector sent record, so - * we have to wait for it to be cleared. - * - * Of course this is not acceptable, but this is just for - * basic testing now. - */ - if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) - return; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = len; - - len = ap->dccpap_buf_vector_len; - from = ap->dccpap_buf + ap->dccpap_buf_head; - - /* Check if buf_head wraps */ - if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { - const unsigned int tailsize = (ap->dccpap_buf_len - - ap->dccpap_buf_head); - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - from = ap->dccpap_buf; - } - - memcpy(to, from, len); - /* - * From draft-ietf-dccp-spec-11.txt: - * - * For each acknowledgement it sends, the HC-Receiver will add an - * acknowledgement record. ack_seqno will equal the HC-Receiver - * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will - * equal buf_nonce. - * - * This implemention uses just one ack record for now. - */ - ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - ap->dccpap_ack_ptr = ap->dccpap_buf_head; - ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; - ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; - ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; - - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu\n", - debug_prefix, ap->dccpap_ack_vector_len, - (unsigned long long) ap->dccpap_ack_seqno, - (unsigned long long) ap->dccpap_ack_ackno); -} - void dccp_timestamp(const struct sock *sk, struct timeval *tv) { const struct dccp_sock *dp = dccp_sk(sk); @@ -528,9 +431,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (!dccp_packet_without_ack(skb)) { if (dp->dccps_options.dccpo_send_ack_vector && - (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != - DCCP_MAX_SEQNO + 1)) - dccp_insert_option_ack_vector(sk, skb); + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) + dccp_insert_option_ackvec(sk, skb); if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } @@ -557,331 +459,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) } } } - -struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, - const unsigned int __nocast priority) -{ - struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); - - if (ap != NULL) { -#ifdef CONFIG_IP_DCCP_DEBUG - memset(ap->dccpap_buf, 0xFF, len); -#endif - ap->dccpap_buf_len = len; - ap->dccpap_buf_head = - ap->dccpap_buf_tail = - ap->dccpap_buf_len - 1; - ap->dccpap_buf_ackno = - ap->dccpap_ack_ackno = - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; - ap->dccpap_ack_ptr = 0; - ap->dccpap_time.tv_sec = 0; - ap->dccpap_time.tv_usec = 0; - ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; - } - - return ap; -} - -void dccp_ackpkts_free(struct dccp_ackpkts *ap) -{ - if (ap != NULL) { -#ifdef CONFIG_IP_DCCP_DEBUG - memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); -#endif - kfree(ap); - } -} - -static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, - const unsigned int index) -{ - return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; -} - -static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, - const unsigned int index) -{ - return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; -} - -/* - * If several packets are missing, the HC-Receiver may prefer to enter multiple - * bytes with run length 0, rather than a single byte with a larger run length; - * this simplifies table updates if one of the missing packets arrives. - */ -static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, - const unsigned int packets, - const unsigned char state) -{ - unsigned int gap; - signed long new_head; - - if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) - return -ENOBUFS; - - gap = packets - 1; - new_head = ap->dccpap_buf_head - packets; - - if (new_head < 0) { - if (gap > 0) { - memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, - gap + new_head + 1); - gap = -new_head; - } - new_head += ap->dccpap_buf_len; - } - - ap->dccpap_buf_head = new_head; - - if (gap > 0) - memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, - DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); - - ap->dccpap_buf[ap->dccpap_buf_head] = state; - ap->dccpap_buf_vector_len += packets; - return 0; -} - -/* - * Implements the draft-ietf-dccp-spec-11.txt Appendix A - */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, - u64 ackno, u8 state) -{ - /* - * Check at the right places if the buffer is full, if it is, tell the - * caller to start dropping packets till the HC-Sender acks our ACK - * vectors, when we will free up space in dccpap_buf. - * - * We may well decide to do buffer compression, etc, but for now lets - * just drop. - * - * From Appendix A: - * - * Of course, the circular buffer may overflow, either when the - * HC-Sender is sending data at a very high rate, when the - * HC-Receiver's acknowledgements are not reaching the HC-Sender, - * or when the HC-Sender is forgetting to acknowledge those acks - * (so the HC-Receiver is unable to clean up old state). In this - * case, the HC-Receiver should either compress the buffer (by - * increasing run lengths when possible), transfer its state to - * a larger buffer, or, as a last resort, drop all received - * packets, without processing them whatsoever, until its buffer - * shrinks again. - */ - - /* See if this is the first ackno being inserted */ - if (ap->dccpap_buf_vector_len == 0) { - ap->dccpap_buf[ap->dccpap_buf_head] = state; - ap->dccpap_buf_vector_len = 1; - } else if (after48(ackno, ap->dccpap_buf_ackno)) { - const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, - ackno); - - /* - * Look if the state of this packet is the same as the - * previous ackno and if so if we can bump the head len. - */ - if (delta == 1 && - dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && - (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < - DCCP_ACKPKTS_LEN_MASK)) - ap->dccpap_buf[ap->dccpap_buf_head]++; - else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) - return -ENOBUFS; - } else { - /* - * A.1.2. Old Packets - * - * When a packet with Sequence Number S arrives, and - * S <= buf_ackno, the HC-Receiver will scan the table - * for the byte corresponding to S. (Indexing structures - * could reduce the complexity of this scan.) - */ - u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); - unsigned int index = ap->dccpap_buf_head; - - while (1) { - const u8 len = dccp_ackpkts_len(ap, index); - const u8 state = dccp_ackpkts_state(ap, index); - /* - * valid packets not yet in dccpap_buf have a reserved - * entry, with a len equal to 0. - */ - if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our - reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", - (unsigned long long) ackno); - ap->dccpap_buf[index] = state; - goto out; - } - /* len == 0 means one packet */ - if (delta < len + 1) - goto out_duplicate; - - delta -= len + 1; - if (++index == ap->dccpap_buf_len) - index = 0; - } - } - - ap->dccpap_buf_ackno = ackno; - dccp_timestamp(sk, &ap->dccpap_time); -out: - dccp_pr_debug(""); - dccp_ackpkts_print(ap); - return 0; - -out_duplicate: - /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost " - "packet: %llu\n", (unsigned long long) ackno); - return -EILSEQ; -} - -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, - int len) -{ - if (!dccp_debug) - return; - - printk("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long) ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; - const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); - - printk("%d,%d|", state, rl); - ++vector; - } - - printk("\n"); -} - -void dccp_ackpkts_print(const struct dccp_ackpkts *ap) -{ - dccp_ackvector_print(ap->dccpap_buf_ackno, - ap->dccpap_buf + ap->dccpap_buf_head, - ap->dccpap_buf_vector_len); -} -#endif - -static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) -{ - /* - * As we're keeping track of the ack vector size - * (dccpap_buf_vector_len) and the sent ack vector size - * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but - * keep this code here as in the future we'll implement a vector of - * ack records, as suggested in draft-ietf-dccp-spec-11.txt - * Appendix A. -acme - */ -#if 0 - ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; - if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) - ap->dccpap_buf_tail -= ap->dccpap_buf_len; -#endif - ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; -} - -void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, - u64 ackno) -{ - /* Check if we actually sent an ACK vector */ - if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - - if (ackno == ap->dccpap_ack_seqno) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu, ACKED!\n", - debug_prefix, 1, - (unsigned long long) ap->dccpap_ack_seqno, - (unsigned long long) ap->dccpap_ack_ackno); - dccp_ackpkts_trow_away_ack_record(ap); - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - } -} - -static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, - struct sock *sk, u64 ackno, - const unsigned char len, - const unsigned char *vector) -{ - unsigned char i; - - /* Check if we actually sent an ACK vector */ - if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - /* - * We're in the receiver half connection, so if the received an ACK - * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're - * not interested. - * - * Extra explanation with example: - * - * if we received an ACK vector with ackno 50, it can only be acking - * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). - */ - /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ - if (before48(ackno, ap->dccpap_ack_seqno)) { - /* dccp_pr_debug_cat("yes\n"); */ - return; - } - /* dccp_pr_debug_cat("no\n"); */ - - i = len; - while (i--) { - const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); - u64 ackno_end_rl; - - dccp_set_seqno(&ackno_end_rl, ackno - rl); - - /* - * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, - * ap->dccpap_ack_seqno, ackno); - */ - if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & - DCCP_ACKPKTS_STATE_MASK) >> 6; - /* dccp_pr_debug_cat("yes\n"); */ - - if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = - dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK vector 0, len=%d, " - "ack_seqno=%llu, ack_ackno=%llu, " - "ACKED!\n", - debug_prefix, len, - (unsigned long long) - ap->dccpap_ack_seqno, - (unsigned long long) - ap->dccpap_ack_ackno); - dccp_ackpkts_trow_away_ack_record(ap); - } - /* - * If dccpap_ack_seqno was not received, no problem - * we'll send another ACK vector. - */ - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - break; - } - /* dccp_pr_debug_cat("no\n"); */ - - dccp_set_seqno(&ackno, ackno_end_rl - 1); - ++vector; - } -} diff --git a/net/dccp/output.c b/net/dccp/output.c index 156b1d29a156..4786bdcddcc9 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -16,6 +16,7 @@ #include +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) err = dccp_wait_for_ccid(sk, skb, timeo); if (err == 0) { - const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int len = skb->len; @@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); dcb->dccpd_type = DCCP_PKT_DATAACK; - /* - * FIXME: we really should have a - * dccps_ack_pending or use icsk. - */ - } else if (inet_csk_ack_scheduled(sk) || - dp->dccps_timestamp_echo != 0 || - (dp->dccps_options.dccpo_send_ack_vector && - ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && - ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + } else if (dccp_ack_pending(sk)) dcb->dccpd_type = DCCP_PKT_DATAACK; else dcb->dccpd_type = DCCP_PKT_DATA; -- cgit v1.2.3 From 65299d6c3cfb49cc3eee4fc483e7edd23ea7b2ed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 18 Sep 2005 00:18:32 -0700 Subject: [CCID3]: Introduce include/linux/tfrc.h Moving the TFRC sender and receiver variables to separate structs, so that we can copy these structs to userspace thru getsockopt, dccp_diag, etc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tfrc.h | 35 +++++++++++++++++++++++++++++++++++ net/dccp/ccids/ccid3.h | 23 +++++++++++++---------- 2 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 include/linux/tfrc.h (limited to 'net') diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h new file mode 100644 index 000000000000..7dab7831c3cb --- /dev/null +++ b/include/linux/tfrc.h @@ -0,0 +1,35 @@ +#ifndef _LINUX_TFRC_H_ +#define _LINUX_TFRC_H_ +/* + * include/linux/tfrc.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +struct tfrc_rx_info { + __u32 tfrcrx_x_recv; + __u32 tfrcrx_rtt; + __u32 tfrcrx_p; +}; + +struct tfrc_tx_info { + __u32 tfrctx_x; + __u32 tfrctx_x_recv; + __u32 tfrctx_x_calc; + __u32 tfrctx_rtt; + __u32 tfrctx_p; + __u32 tfrctx_rto; + __u32 tfrctx_ipi; +}; + +#endif /* _LINUX_TFRC_H_ */ diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index eb248778eea3..0bde4583d091 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -40,6 +40,7 @@ #include #include #include +#include #define TFRC_MIN_PACKET_SIZE 16 #define TFRC_STD_PACKET_SIZE 256 @@ -93,12 +94,15 @@ struct ccid3_options_received { * @ccid3hctx_hist - Packet history */ struct ccid3_hc_tx_sock { - u32 ccid3hctx_x; - u32 ccid3hctx_x_recv; - u32 ccid3hctx_x_calc; + struct tfrc_tx_info ccid3hctx_tfrc; +#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x +#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv +#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc +#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt +#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p +#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto +#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - u32 ccid3hctx_rtt; - u32 ccid3hctx_p; u8 ccid3hctx_state; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; @@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock { struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; struct timeval ccid3hctx_t_nom; - u32 ccid3hctx_t_rto; - u32 ccid3hctx_t_ipi; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; struct ccid3_hc_rx_sock { + struct tfrc_rx_info ccid3hcrx_tfrc; +#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv +#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt +#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - u32 ccid3hcrx_rtt; - u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; @@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock { u16 ccid3hcrx_s; u32 ccid3hcrx_pinv; u32 ccid3hcrx_elapsed_time; - u32 ccid3hcrx_x_recv; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) -- cgit v1.2.3 From 561713cf475de1f671cc89c437927ec008a20209 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 18 Sep 2005 00:18:52 -0700 Subject: [DCCP]: Don't use necessarily the same CCID for tx and rx Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 ++- net/dccp/ipv4.c | 4 ++-- net/dccp/options.c | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 8c8e029095a5..13f9b78483fc 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -330,7 +330,8 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_options { __u64 dccpo_sequence_window; - __u8 dccpo_ccid; + __u8 dccpo_rx_ccid; + __u8 dccpo_tx_ccid; __u8 dccpo_send_ack_vector; __u8 dccpo_send_ndp_count; }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 82434e4a42df..40fe6afacde6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1241,9 +1241,9 @@ static int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, sk); if (dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL) { diff --git a/net/dccp/options.c b/net/dccp/options.c index c480c506a4a4..0a76426c9aea 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -25,7 +25,8 @@ /* stores the default values for new connection. may be changed with sysctl */ static const struct dccp_options dccpo_default_values = { .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, - .dccpo_ccid = DCCPF_INITIAL_CCID, + .dccpo_rx_ccid = DCCPF_INITIAL_CCID, + .dccpo_tx_ccid = DCCPF_INITIAL_CCID, .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, }; -- cgit v1.2.3 From 88f964db6ef728982734356bf4c406270ea29c1d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 18 Sep 2005 00:19:32 -0700 Subject: [DCCP]: Introduce CCID getsockopt for the CCIDs Allocation for the optnames is similar to the DCCP options, with a range for rx and tx half connection CCIDs. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 ++ net/dccp/ccid.h | 31 ++++++++++++++++++++++++++++ net/dccp/ccids/ccid3.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/dccp/proto.c | 17 +++++++++------ 4 files changed, 100 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 13f9b78483fc..71fab4311e92 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -179,6 +179,8 @@ enum { /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 #define DCCP_SOCKOPT_SERVICE 2 +#define DCCP_SOCKOPT_CCID_RX_INFO 128 +#define DCCP_SOCKOPT_CCID_TX_INFO 192 #define DCCP_SERVICE_LIST_MAX_LEN 32 diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 962f1e9e2f7e..21e55142dcd3 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -14,6 +14,7 @@ */ #include +#include #include #include #include @@ -54,6 +55,14 @@ struct ccid { struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, struct tcp_info *info); + int (*ccid_hc_rx_getsockopt)(struct sock *sk, + const int optname, int len, + u32 __user *optval, + int __user *optlen); + int (*ccid_hc_tx_getsockopt)(struct sock *sk, + const int optname, int len, + u32 __user *optval, + int __user *optlen); }; extern int ccid_register(struct ccid *ccid); @@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, if (ccid->ccid_hc_tx_get_info != NULL) ccid->ccid_hc_tx_get_info(sk, info); } + +static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, + const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + int rc = -ENOPROTOOPT; + if (ccid->ccid_hc_rx_getsockopt != NULL) + rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len, + optval, optlen); + return rc; +} + +static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, + const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + int rc = -ENOPROTOOPT; + if (ccid->ccid_hc_tx_getsockopt != NULL) + rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len, + optval, optlen); + return rc; +} #endif /* _CCID_H */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 38aa84986118..aa68e0ab274d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = hctx->ccid3hctx_rtt; } +static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_RX_INFO: + if (len < sizeof(hcrx->ccid3hcrx_tfrc)) + return -EINVAL; + len = sizeof(hcrx->ccid3hcrx_tfrc); + val = &hcrx->ccid3hcrx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + +static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_TX_INFO: + if (len < sizeof(hctx->ccid3hctx_tfrc)) + return -EINVAL; + len = sizeof(hctx->ccid3hctx_tfrc); + val = &hctx->ccid3hctx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + static struct ccid ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", @@ -1139,6 +1193,8 @@ static struct ccid ccid3 = { .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, + .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, + .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; module_param(ccid3_debug, int, 0444); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9bda2868eba6..a1cfd0e9e3bc 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -325,12 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - if (optname == DCCP_SOCKOPT_SERVICE) - return dccp_getsockopt_service(sk, len, - (u32 __user *)optval, optlen); - - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) + if (len < sizeof(int)) return -EINVAL; dp = dccp_sk(sk); @@ -338,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: val = dp->dccps_packet_size; + len = sizeof(dp->dccps_packet_size); break; + case DCCP_SOCKOPT_SERVICE: + return dccp_getsockopt_service(sk, len, + (u32 __user *)optval, optlen); + case 128 ... 191: + return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, + len, (u32 __user *)optval, optlen); + case 192 ... 255: + return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, + len, (u32 __user *)optval, optlen); default: return -ENOPROTOOPT; } -- cgit v1.2.3 From 987905ded3d19c196dae25cac46c569cac9594b8 Mon Sep 17 00:00:00 2001 From: Mitsuru KANDA Date: Sun, 18 Sep 2005 00:30:08 -0700 Subject: [IPV6]: Check connect(2) status for IPv6 UDP socket (Re: xfrm_lookup) I think we should cache the per-socket route(dst_entry) only when the IPv6 UDP socket is connect(2)'ed. (which is same as IPv4 UDP send behavior) Signed-off-by: Mitsuru KANDA Signed-off-by: David S. Miller --- net/ipv6/udp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2b9bf9bd177f..6001948600f3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -639,6 +639,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; + int connected = 0; /* destination address check */ if (sin6) { @@ -748,6 +749,7 @@ do_udp_sendmsg: fl->fl_ip_dport = inet->dport; daddr = &np->daddr; fl->fl6_flowlabel = np->flow_label; + connected = 1; } if (!fl->oif) @@ -770,6 +772,7 @@ do_udp_sendmsg: } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; + connected = 0; } if (opt == NULL) opt = np->opt; @@ -787,10 +790,13 @@ do_udp_sendmsg: ipv6_addr_copy(&final, &fl->fl6_dst); ipv6_addr_copy(&fl->fl6_dst, rt0->addr); final_p = &final; + connected = 0; } - if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) + if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { fl->oif = np->mcast_oif; + connected = 0; + } err = ip6_dst_lookup(sk, &dst, fl); if (err) @@ -846,7 +852,7 @@ do_append_data: else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); - if (dst) + if (dst && connected) ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? &np->daddr : NULL); -- cgit v1.2.3 From 628f87f3d585bd0c2b0e39df039585d7a5831cc9 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sun, 18 Sep 2005 00:33:02 -0700 Subject: [NETFILTER]: Solve Kconfig dependency problem As suggested by Roman Zippel. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 85190de5710a..e2162d270073 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -8,7 +8,6 @@ menu "IP: Netfilter Configuration" # connection tracking, helpers and protocols config IP_NF_CONNTRACK tristate "Connection tracking (required for masq/NAT)" - select NETFILTER_NETLINK if IP_NF_CONNTRACK_NETLINK!=n ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -55,8 +54,7 @@ config IP_NF_CONNTRACK_EVENTS config IP_NF_CONNTRACK_NETLINK tristate 'Connection tracking netlink interface' depends on IP_NF_CONNTRACK && NETFILTER_NETLINK - default IP_NF_CONNTRACK if NETFILTER_NETLINK=y - default m if NETFILTER_NETLINK=m + depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m help This option enables support for a netlink-based userspace interface -- cgit v1.2.3 From 1d25cd6cc2528e4af12ab18e84fe95ed78f3f21a Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Mon, 19 Sep 2005 15:29:52 -0700 Subject: [IPV4]: fib_trie tnode stats refinements * Prints the route tnode and set the stats level deepth as before. Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1b63b4824164..3dd70787081e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2029,7 +2029,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, iter->tnode = (struct tnode *) n; iter->trie = t; iter->index = 0; - iter->depth = 0; + iter->depth = 1; return n; } return NULL; @@ -2274,11 +2274,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_puts(seq, ":\n"); else seq_puts(seq, "
:\n"); - } else { - seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", - NIPQUAD(prf), tn->pos); - } + } + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", + NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + tn->empty_children); + } else { struct leaf *l = (struct leaf *) n; int i; -- cgit v1.2.3 From 772cb712b1373d335ef2874ea357ec681edc754b Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Mon, 19 Sep 2005 15:31:18 -0700 Subject: [IPV4]: fib_trie RCU refinements * This patch is from Paul McKenney's RCU reviewing. Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3dd70787081e..90ae70870a10 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.403" +#define VERSION "0.404" #include #include @@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) Consider a node 'n' and its parent 'tp'. If n is a leaf, every bit in its key is significant. Its presence is - necessitaded by path compression, since during a tree traversal (when + necessitated by path compression, since during a tree traversal (when searching for a leaf - unless we are doing an insertion) we will completely ignore all skipped bits we encounter. Thus we need to verify, at the end of a potentially successful search, that we have indeed been walking the @@ -836,11 +836,12 @@ static void trie_init(struct trie *t) #endif } -/* readside most use rcu_read_lock currently dump routines +/* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) +static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { + struct hlist_head *head = &l->list; struct hlist_node *node; struct leaf_info *li; @@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) static inline struct list_head * get_fa_head(struct leaf *l, int plen) { - struct leaf_info *li = find_leaf_info(&l->list, plen); + struct leaf_info *li = find_leaf_info(l, plen); if (!li) return NULL; @@ -1248,7 +1249,7 @@ err: } -/* should be clalled with rcu_read_lock */ +/* should be called with rcu_read_lock */ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res) @@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); l = fib_find_node(t, key); - li = find_leaf_info(&l->list, plen); + li = find_leaf_info(l, plen); list_del_rcu(&fa->fa_list); @@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb) t->revision++; - rcu_read_lock(); for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); @@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb) trie_leaf_remove(t, ll->key); ll = l; } - rcu_read_unlock(); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); @@ -2288,7 +2287,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth); seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); for (i = 32; i >= 0; i--) { - struct leaf_info *li = find_leaf_info(&l->list, i); + struct leaf_info *li = find_leaf_info(l, i); if (li) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2384,7 +2383,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; for (i=32; i>=0; i--) { - struct leaf_info *li = find_leaf_info(&l->list, i); + struct leaf_info *li = find_leaf_info(l, i); struct fib_alias *fa; u32 mask, prefix; -- cgit v1.2.3 From 926b50f92a30090da2c1a8675de954c2d9b09732 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 19 Sep 2005 15:33:08 -0700 Subject: [NETFILTER]: Add new PPTP conntrack and NAT helper This new "version 3" PPTP conntrack/nat helper is finally ready for mainline inclusion. Special thanks to lots of last-minute bugfixing by Patric McHardy. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 12 + include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 332 +++++++++ .../linux/netfilter_ipv4/ip_conntrack_proto_gre.h | 114 +++ include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 7 + include/linux/netfilter_ipv4/ip_nat_pptp.h | 11 + net/ipv4/netfilter/Kconfig | 22 + net/ipv4/netfilter/Makefile | 5 + net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 805 +++++++++++++++++++++ net/ipv4/netfilter/ip_conntrack_proto_gre.c | 327 +++++++++ net/ipv4/netfilter/ip_nat_helper_pptp.c | 401 ++++++++++ net/ipv4/netfilter/ip_nat_proto_gre.c | 214 ++++++ 11 files changed, 2250 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_pptp.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h create mode 100644 include/linux/netfilter_ipv4/ip_nat_pptp.h create mode 100644 net/ipv4/netfilter/ip_conntrack_helper_pptp.c create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_gre.c create mode 100644 net/ipv4/netfilter/ip_nat_helper_pptp.c create mode 100644 net/ipv4/netfilter/ip_nat_proto_gre.c (limited to 'net') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 7e033e9271a8..2df446c952ef 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -133,11 +133,13 @@ enum ip_conntrack_expect_events { #include #include +#include #include /* per conntrack: protocol private data */ union ip_conntrack_proto { /* insert conntrack proto private data here */ + struct ip_ct_gre gre; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct ip_ct_icmp icmp; @@ -148,6 +150,7 @@ union ip_conntrack_expect_proto { }; /* Add protocol helper include file here */ +#include #include #include #include @@ -155,12 +158,20 @@ union ip_conntrack_expect_proto { /* per conntrack: application helper private data */ union ip_conntrack_help { /* insert conntrack helper private data (master) here */ + struct ip_ct_pptp_master ct_pptp_info; struct ip_ct_ftp_master ct_ftp_info; struct ip_ct_irc_master ct_irc_info; }; #ifdef CONFIG_IP_NF_NAT_NEEDED #include +#include + +/* per conntrack: nat application helper private data */ +union ip_conntrack_nat_help { + /* insert nat helper private data here */ + struct ip_nat_pptp nat_pptp_info; +}; #endif #include @@ -223,6 +234,7 @@ struct ip_conntrack #ifdef CONFIG_IP_NF_NAT_NEEDED struct { struct ip_nat_info info; + union ip_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) int masq_index; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h new file mode 100644 index 000000000000..389e3851d52f --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -0,0 +1,332 @@ +/* PPTP constants and structs */ +#ifndef _CONNTRACK_PPTP_H +#define _CONNTRACK_PPTP_H + +/* state of the control session */ +enum pptp_ctrlsess_state { + PPTP_SESSION_NONE, /* no session present */ + PPTP_SESSION_ERROR, /* some session error */ + PPTP_SESSION_STOPREQ, /* stop_sess request seen */ + PPTP_SESSION_REQUESTED, /* start_sess request seen */ + PPTP_SESSION_CONFIRMED, /* session established */ +}; + +/* state of the call inside the control session */ +enum pptp_ctrlcall_state { + PPTP_CALL_NONE, + PPTP_CALL_ERROR, + PPTP_CALL_OUT_REQ, + PPTP_CALL_OUT_CONF, + PPTP_CALL_IN_REQ, + PPTP_CALL_IN_REP, + PPTP_CALL_IN_CONF, + PPTP_CALL_CLEAR_REQ, +}; + + +/* conntrack private data */ +struct ip_ct_pptp_master { + enum pptp_ctrlsess_state sstate; /* session state */ + + /* everything below is going to be per-expectation in newnat, + * since there could be more than one call within one session */ + enum pptp_ctrlcall_state cstate; /* call state */ + u_int16_t pac_call_id; /* call id of PAC, host byte order */ + u_int16_t pns_call_id; /* call id of PNS, host byte order */ + + /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack + * and therefore imposes a fixed limit on the number of maps */ + struct ip_ct_gre_keymap *keymap_orig, *keymap_reply; +}; + +/* conntrack_expect private member */ +struct ip_ct_pptp_expect { + enum pptp_ctrlcall_state cstate; /* call state */ + u_int16_t pac_call_id; /* call id of PAC */ + u_int16_t pns_call_id; /* call id of PNS */ +}; + + +#ifdef __KERNEL__ + +#define IP_CONNTR_PPTP PPTP_CONTROL_PORT + +#define PPTP_CONTROL_PORT 1723 + +#define PPTP_PACKET_CONTROL 1 +#define PPTP_PACKET_MGMT 2 + +#define PPTP_MAGIC_COOKIE 0x1a2b3c4d + +struct pptp_pkt_hdr { + __u16 packetLength; + __u16 packetType; + __u32 magicCookie; +}; + +/* PptpControlMessageType values */ +#define PPTP_START_SESSION_REQUEST 1 +#define PPTP_START_SESSION_REPLY 2 +#define PPTP_STOP_SESSION_REQUEST 3 +#define PPTP_STOP_SESSION_REPLY 4 +#define PPTP_ECHO_REQUEST 5 +#define PPTP_ECHO_REPLY 6 +#define PPTP_OUT_CALL_REQUEST 7 +#define PPTP_OUT_CALL_REPLY 8 +#define PPTP_IN_CALL_REQUEST 9 +#define PPTP_IN_CALL_REPLY 10 +#define PPTP_IN_CALL_CONNECT 11 +#define PPTP_CALL_CLEAR_REQUEST 12 +#define PPTP_CALL_DISCONNECT_NOTIFY 13 +#define PPTP_WAN_ERROR_NOTIFY 14 +#define PPTP_SET_LINK_INFO 15 + +#define PPTP_MSG_MAX 15 + +/* PptpGeneralError values */ +#define PPTP_ERROR_CODE_NONE 0 +#define PPTP_NOT_CONNECTED 1 +#define PPTP_BAD_FORMAT 2 +#define PPTP_BAD_VALUE 3 +#define PPTP_NO_RESOURCE 4 +#define PPTP_BAD_CALLID 5 +#define PPTP_REMOVE_DEVICE_ERROR 6 + +struct PptpControlHeader { + __u16 messageType; + __u16 reserved; +}; + +/* FramingCapability Bitmap Values */ +#define PPTP_FRAME_CAP_ASYNC 0x1 +#define PPTP_FRAME_CAP_SYNC 0x2 + +/* BearerCapability Bitmap Values */ +#define PPTP_BEARER_CAP_ANALOG 0x1 +#define PPTP_BEARER_CAP_DIGITAL 0x2 + +struct PptpStartSessionRequest { + __u16 protocolVersion; + __u8 reserved1; + __u8 reserved2; + __u32 framingCapability; + __u32 bearerCapability; + __u16 maxChannels; + __u16 firmwareRevision; + __u8 hostName[64]; + __u8 vendorString[64]; +}; + +/* PptpStartSessionResultCode Values */ +#define PPTP_START_OK 1 +#define PPTP_START_GENERAL_ERROR 2 +#define PPTP_START_ALREADY_CONNECTED 3 +#define PPTP_START_NOT_AUTHORIZED 4 +#define PPTP_START_UNKNOWN_PROTOCOL 5 + +struct PptpStartSessionReply { + __u16 protocolVersion; + __u8 resultCode; + __u8 generalErrorCode; + __u32 framingCapability; + __u32 bearerCapability; + __u16 maxChannels; + __u16 firmwareRevision; + __u8 hostName[64]; + __u8 vendorString[64]; +}; + +/* PptpStopReasons */ +#define PPTP_STOP_NONE 1 +#define PPTP_STOP_PROTOCOL 2 +#define PPTP_STOP_LOCAL_SHUTDOWN 3 + +struct PptpStopSessionRequest { + __u8 reason; +}; + +/* PptpStopSessionResultCode */ +#define PPTP_STOP_OK 1 +#define PPTP_STOP_GENERAL_ERROR 2 + +struct PptpStopSessionReply { + __u8 resultCode; + __u8 generalErrorCode; +}; + +struct PptpEchoRequest { + __u32 identNumber; +}; + +/* PptpEchoReplyResultCode */ +#define PPTP_ECHO_OK 1 +#define PPTP_ECHO_GENERAL_ERROR 2 + +struct PptpEchoReply { + __u32 identNumber; + __u8 resultCode; + __u8 generalErrorCode; + __u16 reserved; +}; + +/* PptpFramingType */ +#define PPTP_ASYNC_FRAMING 1 +#define PPTP_SYNC_FRAMING 2 +#define PPTP_DONT_CARE_FRAMING 3 + +/* PptpCallBearerType */ +#define PPTP_ANALOG_TYPE 1 +#define PPTP_DIGITAL_TYPE 2 +#define PPTP_DONT_CARE_BEARER_TYPE 3 + +struct PptpOutCallRequest { + __u16 callID; + __u16 callSerialNumber; + __u32 minBPS; + __u32 maxBPS; + __u32 bearerType; + __u32 framingType; + __u16 packetWindow; + __u16 packetProcDelay; + __u16 reserved1; + __u16 phoneNumberLength; + __u16 reserved2; + __u8 phoneNumber[64]; + __u8 subAddress[64]; +}; + +/* PptpCallResultCode */ +#define PPTP_OUTCALL_CONNECT 1 +#define PPTP_OUTCALL_GENERAL_ERROR 2 +#define PPTP_OUTCALL_NO_CARRIER 3 +#define PPTP_OUTCALL_BUSY 4 +#define PPTP_OUTCALL_NO_DIAL_TONE 5 +#define PPTP_OUTCALL_TIMEOUT 6 +#define PPTP_OUTCALL_DONT_ACCEPT 7 + +struct PptpOutCallReply { + __u16 callID; + __u16 peersCallID; + __u8 resultCode; + __u8 generalErrorCode; + __u16 causeCode; + __u32 connectSpeed; + __u16 packetWindow; + __u16 packetProcDelay; + __u32 physChannelID; +}; + +struct PptpInCallRequest { + __u16 callID; + __u16 callSerialNumber; + __u32 callBearerType; + __u32 physChannelID; + __u16 dialedNumberLength; + __u16 dialingNumberLength; + __u8 dialedNumber[64]; + __u8 dialingNumber[64]; + __u8 subAddress[64]; +}; + +/* PptpInCallResultCode */ +#define PPTP_INCALL_ACCEPT 1 +#define PPTP_INCALL_GENERAL_ERROR 2 +#define PPTP_INCALL_DONT_ACCEPT 3 + +struct PptpInCallReply { + __u16 callID; + __u16 peersCallID; + __u8 resultCode; + __u8 generalErrorCode; + __u16 packetWindow; + __u16 packetProcDelay; + __u16 reserved; +}; + +struct PptpInCallConnected { + __u16 peersCallID; + __u16 reserved; + __u32 connectSpeed; + __u16 packetWindow; + __u16 packetProcDelay; + __u32 callFramingType; +}; + +struct PptpClearCallRequest { + __u16 callID; + __u16 reserved; +}; + +struct PptpCallDisconnectNotify { + __u16 callID; + __u8 resultCode; + __u8 generalErrorCode; + __u16 causeCode; + __u16 reserved; + __u8 callStatistics[128]; +}; + +struct PptpWanErrorNotify { + __u16 peersCallID; + __u16 reserved; + __u32 crcErrors; + __u32 framingErrors; + __u32 hardwareOverRuns; + __u32 bufferOverRuns; + __u32 timeoutErrors; + __u32 alignmentErrors; +}; + +struct PptpSetLinkInfo { + __u16 peersCallID; + __u16 reserved; + __u32 sendAccm; + __u32 recvAccm; +}; + + +struct pptp_priv_data { + __u16 call_id; + __u16 mcall_id; + __u16 pcall_id; +}; + +union pptp_ctrl_union { + struct PptpStartSessionRequest sreq; + struct PptpStartSessionReply srep; + struct PptpStopSessionRequest streq; + struct PptpStopSessionReply strep; + struct PptpOutCallRequest ocreq; + struct PptpOutCallReply ocack; + struct PptpInCallRequest icreq; + struct PptpInCallReply icack; + struct PptpInCallConnected iccon; + struct PptpClearCallRequest clrreq; + struct PptpCallDisconnectNotify disc; + struct PptpWanErrorNotify wanerr; + struct PptpSetLinkInfo setlink; +}; + +extern int +(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +extern int +(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +extern int +(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig, + struct ip_conntrack_expect *exp_reply); + +extern void +(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp); +#endif /* __KERNEL__ */ +#endif /* _CONNTRACK_PPTP_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h new file mode 100644 index 000000000000..8d090ef82f5f --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h @@ -0,0 +1,114 @@ +#ifndef _CONNTRACK_PROTO_GRE_H +#define _CONNTRACK_PROTO_GRE_H +#include + +/* GRE PROTOCOL HEADER */ + +/* GRE Version field */ +#define GRE_VERSION_1701 0x0 +#define GRE_VERSION_PPTP 0x1 + +/* GRE Protocol field */ +#define GRE_PROTOCOL_PPTP 0x880B + +/* GRE Flags */ +#define GRE_FLAG_C 0x80 +#define GRE_FLAG_R 0x40 +#define GRE_FLAG_K 0x20 +#define GRE_FLAG_S 0x10 +#define GRE_FLAG_A 0x80 + +#define GRE_IS_C(f) ((f)&GRE_FLAG_C) +#define GRE_IS_R(f) ((f)&GRE_FLAG_R) +#define GRE_IS_K(f) ((f)&GRE_FLAG_K) +#define GRE_IS_S(f) ((f)&GRE_FLAG_S) +#define GRE_IS_A(f) ((f)&GRE_FLAG_A) + +/* GRE is a mess: Four different standards */ +struct gre_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 rec:3, + srr:1, + seq:1, + key:1, + routing:1, + csum:1, + version:3, + reserved:4, + ack:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 csum:1, + routing:1, + key:1, + seq:1, + srr:1, + rec:3, + ack:1, + reserved:4, + version:3; +#else +#error "Adjust your defines" +#endif + __u16 protocol; +}; + +/* modified GRE header for PPTP */ +struct gre_hdr_pptp { + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __u16 payload_len; /* size of ppp payload, not inc. gre header */ + __u16 call_id; /* peer's call_id for this session */ + __u32 seq; /* sequence number. Present if S==1 */ + __u32 ack; /* seq number of highest packet recieved by */ + /* sender in this session */ +}; + + +/* this is part of ip_conntrack */ +struct ip_ct_gre { + unsigned int stream_timeout; + unsigned int timeout; +}; + +#ifdef __KERNEL__ +struct ip_conntrack_expect; +struct ip_conntrack; + +/* structure for original <-> reply keymap */ +struct ip_ct_gre_keymap { + struct list_head list; + + struct ip_conntrack_tuple tuple; +}; + +/* add new tuple->key_reply pair to keymap */ +int ip_ct_gre_keymap_add(struct ip_conntrack *ct, + struct ip_conntrack_tuple *t, + int reply); + +/* delete keymap entries */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct); + + +/* get pointer to gre key, if present */ +static inline u_int32_t *gre_key(struct gre_hdr *greh) +{ + if (!greh->key) + return NULL; + if (greh->csum || greh->routing) + return (u_int32_t *) (greh+sizeof(*greh)+4); + return (u_int32_t *) (greh+sizeof(*greh)); +} + +/* get pointer ot gre csum, if present */ +static inline u_int16_t *gre_csum(struct gre_hdr *greh) +{ + if (!greh->csum) + return NULL; + return (u_int16_t *) (greh+sizeof(*greh)); +} + +#endif /* __KERNEL__ */ + +#endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index c33f0b5e0d0a..14dc0f7b6556 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -28,6 +28,9 @@ union ip_conntrack_manip_proto struct { u_int16_t port; } sctp; + struct { + u_int16_t key; /* key is 32bit, pptp only uses 16 */ + } gre; }; /* The manipulable part of the tuple. */ @@ -61,6 +64,10 @@ struct ip_conntrack_tuple struct { u_int16_t port; } sctp; + struct { + u_int16_t key; /* key is 32bit, + * pptp only uses 16 */ + } gre; } u; /* The protocol. */ diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h new file mode 100644 index 000000000000..eaf66c2e8f93 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h @@ -0,0 +1,11 @@ +/* PPTP constants and structs */ +#ifndef _NAT_PPTP_H +#define _NAT_PPTP_H + +/* conntrack private data */ +struct ip_nat_pptp { + u_int16_t pns_call_id; /* NAT'ed PNS call id */ + u_int16_t pac_call_id; /* NAT'ed PAC call id */ +}; + +#endif /* _NAT_PPTP_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e2162d270073..3cf9b451675c 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -137,6 +137,22 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. +config IP_NF_PPTP + tristate 'PPTP protocol support' + help + This module adds support for PPTP (Point to Point Tunnelling + Protocol, RFC2637) conncection tracking and NAT. + + If you are running PPTP sessions over a stateful firewall or NAT + box, you may want to enable this feature. + + Please note that not all PPTP modes of operation are supported yet. + For more info, read top of the file + net/ipv4/netfilter/ip_conntrack_pptp.c + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + config IP_NF_QUEUE tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help @@ -621,6 +637,12 @@ config IP_NF_NAT_AMANDA default IP_NF_NAT if IP_NF_AMANDA=y default m if IP_NF_AMANDA=m +config IP_NF_NAT_PPTP + tristate + depends on IP_NF_NAT!=n && IP_NF_PPTP!=n + default IP_NF_NAT if IP_NF_PPTP=y + default m if IP_NF_PPTP=m + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 1ba0db746817..3d45d3c0283c 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -6,6 +6,9 @@ ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o +ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o + # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o @@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o # connection tracking helpers +obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o @@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o # NAT helpers +obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c new file mode 100644 index 000000000000..79db5b70d5f6 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -0,0 +1,805 @@ +/* + * ip_conntrack_pptp.c - Version 3.0 + * + * Connection tracking support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * Limitations: + * - We blindly assume that control connections are always + * established in PNS->PAC direction. This is a violation + * of RFFC2673 + * - We can only support one single call within each session + * + * TODO: + * - testing of incoming PPTP calls + * + * Changes: + * 2002-02-05 - Version 1.3 + * - Call ip_conntrack_unexpect_related() from + * pptp_destroy_siblings() to destroy expectations in case + * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen + * (Philip Craig ) + * - Add Version information at module loadtime + * 2002-02-10 - Version 1.6 + * - move to C99 style initializers + * - remove second expectation if first arrives + * 2004-10-22 - Version 2.0 + * - merge Mandrake's 2.6.x port with recent 2.6.x API changes + * - fix lots of linear skb assumptions from Mandrake's port + * 2005-06-10 - Version 2.1 + * - use ip_conntrack_expect_free() instead of kfree() on the + * expect's (which are from the slab for quite some time) + * 2005-06-10 - Version 3.0 + * - port helper to post-2.6.11 API changes, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * 2005-07-30 - Version 3.1 + * - port helper to 2.6.13 API changes + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define IP_CT_PPTP_VERSION "3.1" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); + +static DEFINE_SPINLOCK(ip_pptp_lock); + +int +(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply); + +void +(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp); + +#if 0 +/* PptpControlMessageType names */ +const char *pptp_msg_name[] = { + "UNKNOWN_MESSAGE", + "START_SESSION_REQUEST", + "START_SESSION_REPLY", + "STOP_SESSION_REQUEST", + "STOP_SESSION_REPLY", + "ECHO_REQUEST", + "ECHO_REPLY", + "OUT_CALL_REQUEST", + "OUT_CALL_REPLY", + "IN_CALL_REQUEST", + "IN_CALL_REPLY", + "IN_CALL_CONNECT", + "CALL_CLEAR_REQUEST", + "CALL_DISCONNECT_NOTIFY", + "WAN_ERROR_NOTIFY", + "SET_LINK_INFO" +}; +EXPORT_SYMBOL(pptp_msg_name); +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS + +#define PPTP_GRE_TIMEOUT (10 MINS) +#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) + +static void pptp_expectfn(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + DEBUGP("increasing timeouts\n"); + + /* increase timeout of GRE data channel conntrack entry */ + ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; + ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; + + /* Can you see how rusty this code is, compared with the pre-2.6.11 + * one? That's what happened to my shiny newnat of 2002 ;( -HW */ + + if (!ip_nat_pptp_hook_expectfn) { + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_expect *exp_other; + + /* obviously this tuple inversion only works until you do NAT */ + invert_tuplepr(&inv_t, &exp->tuple); + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&inv_t); + + exp_other = ip_conntrack_expect_find(&inv_t); + if (exp_other) { + /* delete other expectation. */ + DEBUGP("found\n"); + ip_conntrack_unexpect_related(exp_other); + ip_conntrack_expect_put(exp_other); + } else { + DEBUGP("not found\n"); + } + } else { + /* we need more than simple inversion */ + ip_nat_pptp_hook_expectfn(ct, exp); + } +} + +static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_expect *exp; + + DEBUGP("trying to timeout ct or exp for tuple "); + DUMP_TUPLE(t); + + h = ip_conntrack_find_get(t, NULL); + if (h) { + struct ip_conntrack *sibling = tuplehash_to_ctrack(h); + DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + sibling->proto.gre.timeout = 0; + sibling->proto.gre.stream_timeout = 0; + /* refresh_acct will not modify counters if skb == NULL */ + if (del_timer(&sibling->timeout)) + sibling->timeout.function((unsigned long)sibling); + ip_conntrack_put(sibling); + return 1; + } else { + exp = ip_conntrack_expect_find(t); + if (exp) { + DEBUGP("unexpect_related of expect %p\n", exp); + ip_conntrack_unexpect_related(exp); + ip_conntrack_expect_put(exp); + return 1; + } + } + + return 0; +} + + +/* timeout GRE data connections */ +static void pptp_destroy_siblings(struct ip_conntrack *ct) +{ + struct ip_conntrack_tuple t; + + /* Since ct->sibling_list has literally rusted away in 2.6.11, + * we now need another way to find out about our sibling + * contrack and expects... -HW */ + + /* try original (pns->pac) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout original pns->pac ct/exp\n"); + + /* try reply (pac->pns) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout reply pac->pns ct/exp\n"); +} + +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ +static inline int +exp_gre(struct ip_conntrack *master, + u_int32_t seq, + u_int16_t callid, + u_int16_t peer_callid) +{ + struct ip_conntrack_tuple inv_tuple; + struct ip_conntrack_tuple exp_tuples[] = { + /* tuple in original direction, PNS->PAC */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, + .u = { .gre = { .key = peer_callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, + .u = { .gre = { .key = callid } }, + .protonum = IPPROTO_GRE + }, + }, + /* tuple in reply direction, PAC->PNS */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + .u = { .gre = { .key = callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + .u = { .gre = { .key = peer_callid } }, + .protonum = IPPROTO_GRE + }, + } + }; + struct ip_conntrack_expect *exp_orig, *exp_reply; + int ret = 1; + + exp_orig = ip_conntrack_expect_alloc(master); + if (exp_orig == NULL) + goto out; + + exp_reply = ip_conntrack_expect_alloc(master); + if (exp_reply == NULL) + goto out_put_orig; + + memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + + exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.u.all = 0; + exp_orig->mask.dst.u.all = 0; + exp_orig->mask.dst.u.gre.key = 0xffff; + exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.protonum = 0xff; + + exp_orig->master = master; + exp_orig->expectfn = pptp_expectfn; + exp_orig->flags = 0; + + exp_orig->dir = IP_CT_DIR_ORIGINAL; + + /* both expectations are identical apart from tuple */ + memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); + memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); + + exp_reply->dir = !exp_orig->dir; + + if (ip_nat_pptp_hook_exp_gre) + ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + else { + + DEBUGP("calling expect_related PNS->PAC"); + DUMP_TUPLE(&exp_orig->tuple); + + if (ip_conntrack_expect_related(exp_orig) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_put_both; + } + + DEBUGP("calling expect_related PAC->PNS"); + DUMP_TUPLE(&exp_reply->tuple); + + if (ip_conntrack_expect_related(exp_reply) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_unexpect_orig; + } + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { + DEBUGP("cannot keymap_add() exp\n"); + goto out_unexpect_both; + } + + invert_tuplepr(&inv_tuple, &exp_reply->tuple); + if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(master); + DEBUGP("cannot keymap_add() exp_inv\n"); + goto out_unexpect_both; + } + ret = 0; + } + +out_put_both: + ip_conntrack_expect_put(exp_reply); +out_put_orig: + ip_conntrack_expect_put(exp_orig); +out: + return ret; + +out_unexpect_both: + ip_conntrack_unexpect_related(exp_reply); +out_unexpect_orig: + ip_conntrack_unexpect_related(exp_orig); + goto out_put_both; +} + +static inline int +pptp_inbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg, *cid, *pcid; + u_int32_t seq; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + + msg = ntohs(ctlh->messageType); + DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.srep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms new control session */ + if (info->sstate < PPTP_SESSION_REQUESTED) { + DEBUGP("%s without START_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->srep.resultCode == PPTP_START_OK) + info->sstate = PPTP_SESSION_CONFIRMED; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_STOP_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.strep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms end of control session */ + if (info->sstate > PPTP_SESSION_STOPREQ) { + DEBUGP("%s without STOP_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->strep.resultCode == PPTP_STOP_OK) + info->sstate = PPTP_SESSION_NONE; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_OUT_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.ocack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server accepted call, we now expect GRE frames */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->cstate != PPTP_CALL_OUT_REQ && + info->cstate != PPTP_CALL_OUT_CONF) { + DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); + break; + } + if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_NONE; + break; + } + + cid = &pptpReq->ocack.callID; + pcid = &pptpReq->ocack.peersCallID; + + info->pac_call_id = ntohs(*cid); + + if (htons(info->pns_call_id) != *pcid) { + DEBUGP("%s for unknown callid %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(*cid), ntohs(*pcid)); + + info->cstate = PPTP_CALL_OUT_CONF; + + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + break; + + case PPTP_IN_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + pcid = &pptpReq->icack.peersCallID; + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_REQ; + info->pac_call_id = ntohs(*pcid); + break; + + case PPTP_IN_CALL_CONNECT: + if (reqlen < sizeof(_pptpReq.iccon)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call established */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->sstate != PPTP_CALL_IN_REP + && info->sstate != PPTP_CALL_IN_CONF) { + DEBUGP("%s but never sent IN_CALL_REPLY\n", + pptp_msg_name[msg]); + break; + } + + pcid = &pptpReq->iccon.peersCallID; + cid = &info->pac_call_id; + + if (info->pns_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown CallID %u\n", + pptp_msg_name[msg], ntohs(*cid)); + break; + } + + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_CONF; + + /* we expect a GRE connection from PAC to PNS */ + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + + break; + + case PPTP_CALL_DISCONNECT_NOTIFY: + if (reqlen < sizeof(_pptpReq.disc)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms disconnect */ + cid = &pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + break; + + case PPTP_WAN_ERROR_NOTIFY: + break; + + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) + ? pptp_msg_name[msg]:pptp_msg_name[0], msg); + break; + } + + + if (ip_nat_pptp_hook_inbound) + return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; + +} + +static inline int +pptp_outbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg, *cid, *pcid; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + + msg = ntohs(ctlh->messageType); + DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REQUEST: + /* client requests for new control session */ + if (info->sstate != PPTP_SESSION_NONE) { + DEBUGP("%s but we already have one", + pptp_msg_name[msg]); + } + info->sstate = PPTP_SESSION_REQUESTED; + break; + case PPTP_STOP_SESSION_REQUEST: + /* client requests end of control session */ + info->sstate = PPTP_SESSION_STOPREQ; + break; + + case PPTP_OUT_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.ocreq)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + /* FIXME: break; */ + } + + /* client initiating connection to server */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", + pptp_msg_name[msg]); + break; + } + info->cstate = PPTP_CALL_OUT_REQ; + /* track PNS call id */ + cid = &pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->pns_call_id = ntohs(*cid); + break; + case PPTP_IN_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* client answers incoming call */ + if (info->cstate != PPTP_CALL_IN_REQ + && info->cstate != PPTP_CALL_IN_REP) { + DEBUGP("%s without incall_req\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { + info->cstate = PPTP_CALL_NONE; + break; + } + pcid = &pptpReq->icack.peersCallID; + if (info->pac_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown call %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = ntohs(pptpReq->icack.callID); + break; + + case PPTP_CALL_CLEAR_REQUEST: + /* client requests hangup of call */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("CLEAR_CALL but no session\n"); + break; + } + /* FUTURE: iterate over all calls and check if + * call ID is valid. We don't do this without newnat, + * because we only know about last call */ + info->cstate = PPTP_CALL_CLEAR_REQ; + break; + case PPTP_SET_LINK_INFO: + break; + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0], msg); + /* unknown: no need to create GRE masq table entry */ + break; + } + + if (ip_nat_pptp_hook_outbound) + return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; +} + + +/* track caller id inside control connection, call expect_related */ +static int +conntrack_pptp_help(struct sk_buff **pskb, + struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) + +{ + struct pptp_pkt_hdr _pptph, *pptph; + struct tcphdr _tcph, *tcph; + u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + u_int32_t datalen; + int dir = CTINFO2DIR(ctinfo); + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + unsigned int nexthdr_off; + + int oldsstate, oldcstate; + int ret; + + /* don't do any tracking before tcp handshake complete */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ctinfo = %u, skipping\n", ctinfo); + return NF_ACCEPT; + } + + nexthdr_off = (*pskb)->nh.iph->ihl*4; + tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + BUG_ON(!tcph); + nexthdr_off += tcph->doff * 4; + datalen = tcplen - tcph->doff * 4; + + if (tcph->fin || tcph->rst) { + DEBUGP("RST/FIN received, timeouting GRE\n"); + /* can't do this after real newnat */ + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + } + + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + if (!pptph) { + DEBUGP("no full PPTP header, can't track\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_pptph); + datalen -= sizeof(_pptph); + + /* if it's not a control message we can't do anything with it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a control packet\n"); + return NF_ACCEPT; + } + + oldsstate = info->sstate; + oldcstate = info->cstate; + + spin_lock_bh(&ip_pptp_lock); + + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ + if (dir == IP_CT_DIR_ORIGINAL) + /* client -> server (PNS -> PAC) */ + ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + else + /* server -> client (PAC -> PNS) */ + ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + DEBUGP("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); + spin_unlock_bh(&ip_pptp_lock); + + return ret; +} + +/* control protocol helper */ +static struct ip_conntrack_helper pptp = { + .list = { NULL, NULL }, + .name = "pptp", + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 5 * 60, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = 0xffff } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xff + } + }, + .help = conntrack_pptp_help +}; + +extern void __exit ip_ct_proto_gre_fini(void); +extern int __init ip_ct_proto_gre_init(void); + +/* ip_conntrack_pptp initialization */ +static int __init init(void) +{ + int retcode; + + retcode = ip_ct_proto_gre_init(); + if (retcode < 0) + return retcode; + + DEBUGP(" registering helper\n"); + if ((retcode = ip_conntrack_helper_register(&pptp))) { + printk(KERN_ERR "Unable to register conntrack application " + "helper for pptp: %d\n", retcode); + ip_ct_proto_gre_fini(); + return retcode; + } + + printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&pptp); + ip_ct_proto_gre_fini(); + printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); +EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c new file mode 100644 index 000000000000..de3cb9db6f85 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -0,0 +1,327 @@ +/* + * ip_conntrack_proto_gre.c - Version 3.0 + * + * Connection tracking protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_RWLOCK(ip_ct_gre_lock); +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); + +/* shamelessly stolen from ip_conntrack_proto_udp.c */ +#define GRE_TIMEOUT (30*HZ) +#define GRE_STREAM_TIMEOUT (180*HZ) + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ + NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ + NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) +#else +#define DEBUGP(x, args...) +#define DUMP_TUPLE_GRE(x) +#endif + +/* GRE KEYMAP HANDLING FUNCTIONS */ +static LIST_HEAD(gre_keymap_list); + +static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, + const struct ip_conntrack_tuple *t) +{ + return ((km->tuple.src.ip == t->src.ip) && + (km->tuple.dst.ip == t->dst.ip) && + (km->tuple.dst.protonum == t->dst.protonum) && + (km->tuple.dst.u.all == t->dst.u.all)); +} + +/* look up the source key for a given tuple */ +static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) +{ + struct ip_ct_gre_keymap *km; + u_int32_t key = 0; + + read_lock_bh(&ip_ct_gre_lock); + km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (km) + key = km->tuple.src.u.gre.key; + read_unlock_bh(&ip_ct_gre_lock); + + DEBUGP("lookup src key 0x%x up key for ", key); + DUMP_TUPLE_GRE(t); + + return key; +} + +/* add a single keymap entry, associate with specified master ct */ +int +ip_ct_gre_keymap_add(struct ip_conntrack *ct, + struct ip_conntrack_tuple *t, int reply) +{ + struct ip_ct_gre_keymap **exist_km, *km, *old; + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to add GRE keymap to non-pptp session\n"); + return -1; + } + + if (!reply) + exist_km = &ct->help.ct_pptp_info.keymap_orig; + else + exist_km = &ct->help.ct_pptp_info.keymap_reply; + + if (*exist_km) { + /* check whether it's a retransmission */ + old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (old == *exist_km) { + DEBUGP("retransmission\n"); + return 0; + } + + DEBUGP("trying to override keymap_%s for ct %p\n", + reply? "reply":"orig", ct); + return -EEXIST; + } + + km = kmalloc(sizeof(*km), GFP_ATOMIC); + if (!km) + return -ENOMEM; + + memcpy(&km->tuple, t, sizeof(*t)); + *exist_km = km; + + DEBUGP("adding new entry %p: ", km); + DUMP_TUPLE_GRE(&km->tuple); + + write_lock_bh(&ip_ct_gre_lock); + list_append(&gre_keymap_list, km); + write_unlock_bh(&ip_ct_gre_lock); + + return 0; +} + +/* destroy the keymap entries associated with specified master ct */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) +{ + DEBUGP("entering for ct %p\n", ct); + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); + return; + } + + write_lock_bh(&ip_ct_gre_lock); + if (ct->help.ct_pptp_info.keymap_orig) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_orig); + list_del(&ct->help.ct_pptp_info.keymap_orig->list); + kfree(ct->help.ct_pptp_info.keymap_orig); + ct->help.ct_pptp_info.keymap_orig = NULL; + } + if (ct->help.ct_pptp_info.keymap_reply) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_reply); + list_del(&ct->help.ct_pptp_info.keymap_reply->list); + kfree(ct->help.ct_pptp_info.keymap_reply); + ct->help.ct_pptp_info.keymap_reply = NULL; + } + write_unlock_bh(&ip_ct_gre_lock); +} + + +/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ + +/* invert gre part of tuple */ +static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->dst.u.gre.key = orig->src.u.gre.key; + tuple->src.u.gre.key = orig->dst.u.gre.key; + + return 1; +} + +/* gre hdr info to tuple */ +static int gre_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple) +{ + struct gre_hdr_pptp _pgrehdr, *pgrehdr; + u_int32_t srckey; + struct gre_hdr _grehdr, *grehdr; + + /* first only delinearize old RFC1701 GRE header */ + grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); + if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + /* try to behave like "ip_conntrack_proto_generic" */ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + return 1; + } + + /* PPTP header is variable length, only need up to the call_id field */ + pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); + if (!pgrehdr) + return 1; + + if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + return 0; + } + + tuple->dst.u.gre.key = pgrehdr->call_id; + srckey = gre_keymap_lookup(tuple); + tuple->src.u.gre.key = srckey; + + return 1; +} + +/* print gre part of tuple */ +static int gre_print_tuple(struct seq_file *s, + const struct ip_conntrack_tuple *tuple) +{ + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); +} + +/* print private data for conntrack */ +static int gre_print_conntrack(struct seq_file *s, + const struct ip_conntrack *ct) +{ + return seq_printf(s, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); +} + +/* Returns verdict for packet, and may modify conntrack */ +static int gre_packet(struct ip_conntrack *ct, + const struct sk_buff *skb, + enum ip_conntrack_info conntrackinfo) +{ + /* If we've seen traffic both ways, this is a GRE connection. + * Extend timeout. */ + if (ct->status & IPS_SEEN_REPLY) { + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.stream_timeout); + /* Also, more likely to be important, and not a probe. */ + set_bit(IPS_ASSURED_BIT, &ct->status); + } else + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int gre_new(struct ip_conntrack *ct, + const struct sk_buff *skb) +{ + DEBUGP(": "); + DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + /* initialize to sane value. Ideally a conntrack helper + * (e.g. in case of pptp) is increasing them */ + ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; + ct->proto.gre.timeout = GRE_TIMEOUT; + + return 1; +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory */ +static void gre_destroy(struct ip_conntrack *ct) +{ + struct ip_conntrack *master = ct->master; + DEBUGP(" entering\n"); + + if (!master) + DEBUGP("no master !?!\n"); + else + ip_ct_gre_keymap_destroy(master); +} + +/* protocol helper struct */ +static struct ip_conntrack_protocol gre = { + .proto = IPPROTO_GRE, + .name = "gre", + .pkt_to_tuple = gre_pkt_to_tuple, + .invert_tuple = gre_invert_tuple, + .print_tuple = gre_print_tuple, + .print_conntrack = gre_print_conntrack, + .packet = gre_packet, + .new = gre_new, + .destroy = gre_destroy, + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif +}; + +/* ip_conntrack_proto_gre initialization */ +int __init ip_ct_proto_gre_init(void) +{ + return ip_conntrack_protocol_register(&gre); +} + +void __exit ip_ct_proto_gre_fini(void) +{ + struct list_head *pos, *n; + + /* delete all keymap entries */ + write_lock_bh(&ip_ct_gre_lock); + list_for_each_safe(pos, n, &gre_keymap_list) { + DEBUGP("deleting keymap %p at module unload time\n", pos); + list_del(pos); + kfree(pos); + } + write_unlock_bh(&ip_ct_gre_lock); + + ip_conntrack_protocol_unregister(&gre); +} + +EXPORT_SYMBOL(ip_ct_gre_keymap_add); +EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c new file mode 100644 index 000000000000..3cdd0684d30d --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -0,0 +1,401 @@ +/* + * ip_nat_pptp.c - Version 3.0 + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + * + * Changes: + * 2002-02-10 - Version 1.3 + * - Use ip_nat_mangle_tcp_packet() because of cloned skb's + * in local connections (Philip Craig ) + * - add checks for magicCookie and pptp version + * - make argument list of pptp_{out,in}bound_packet() shorter + * - move to C99 style initializers + * - print version number at module loadtime + * 2003-09-22 - Version 1.5 + * - use SNATed tcp sourceport as callid, since we get called before + * TCP header is mangled (Philip Craig ) + * 2004-10-22 - Version 2.0 + * - kernel 2.6.x version + * 2005-06-10 - Version 3.0 + * - kernel >= 2.6.11 version, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IP_NAT_PPTP_VERSION "3.0" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); + + +#if 0 +extern const char *pptp_msg_name[]; +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static void pptp_nat_expected(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + struct ip_conntrack *master = ct->master; + struct ip_conntrack_expect *other_exp; + struct ip_conntrack_tuple t; + struct ip_ct_pptp_master *ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info; + + ct_pptp_info = &master->help.ct_pptp_info; + nat_pptp_info = &master->nat.help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + + if (exp->dir == IP_CT_DIR_ORIGINAL) { + DEBUGP("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.protonum = IPPROTO_GRE; + } else { + DEBUGP("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + t.src.u.gre.key = + htons(master->nat.help.nat_pptp_info.pns_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + t.dst.u.gre.key = + htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.protonum = IPPROTO_GRE; + } + + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&t); + other_exp = ip_conntrack_expect_find(&t); + if (other_exp) { + ip_conntrack_unexpect_related(other_exp); + ip_conntrack_expect_put(other_exp); + DEBUGP("success\n"); + } else { + DEBUGP("not found!\n"); + } + + ip_nat_follow_master(ct, exp); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + u_int16_t msg, *cid = NULL, new_callid; + + new_callid = htons(ct_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid = &pptpReq->ocreq.callID; + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid = &pptpReq->icreq.callID; + break; + case PPTP_CALL_CLEAR_REQUEST: + cid = &pptpReq->clrreq.callID; + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + + IP_NF_ASSERT(cid); + + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_callid)); + + /* mangle packet */ + if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_callid), + (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + + return NF_ACCEPT; +} + +static int +pptp_exp_gre(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply) +{ + struct ip_ct_pptp_master *ct_pptp_info = + &expect_orig->master->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = + &expect_orig->master->nat.help.nat_pptp_info; + + struct ip_conntrack *ct = expect_orig->master; + + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation */ + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + /* alter expectation for PNS->PAC direction */ + invert_tuplepr(&inv_t, &expect_orig->tuple); + expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); + expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + + if (!ip_conntrack_expect_related(expect_orig)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_orig)\n"); + return 1; + } + + /* alter expectation for PAC->PNS direction */ + invert_tuplepr(&inv_t, &expect_reply->tuple); + expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); + expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + + if (!ip_conntrack_expect_related(expect_reply)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_reply)\n"); + ip_conntrack_unexpect_related(expect_orig); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { + DEBUGP("can't register original keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { + DEBUGP("can't register reply keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + ip_ct_gre_keymap_destroy(ct); + return 1; + } + + return 0; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; + + int ret = NF_ACCEPT, rv; + + new_pcid = htons(nat_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid = &pptpReq->ocack.peersCallID; + cid = &pptpReq->ocack.callID; + break; + case PPTP_IN_CALL_CONNECT: + pcid = &pptpReq->iccon.peersCallID; + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + break; + case PPTP_WAN_ERROR_NOTIFY: + pcid = &pptpReq->wanerr.peersCallID; + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid = &pptpReq->disc.callID; + break; + case PPTP_SET_LINK_INFO: + pcid = &pptpReq->setlink.peersCallID; + break; + + default: + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + IP_NF_ASSERT(pcid); + DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(*pcid), ntohs(new_pcid)); + + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)); + if (rv != NF_ACCEPT) + return rv; + + if (new_cid) { + IP_NF_ASSERT(cid); + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_cid)); + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_cid), + (char *)&new_cid, + sizeof(new_cid)); + if (rv != NF_ACCEPT) + return rv; + } + + /* check for earlier return value of 'switch' above */ + if (ret != NF_ACCEPT) + return ret; + + /* great, at least we don't need to resize packets */ + return NF_ACCEPT; +} + + +extern int __init ip_nat_proto_gre_init(void); +extern void __exit ip_nat_proto_gre_fini(void); + +static int __init init(void) +{ + int ret; + + DEBUGP("%s: registering NAT helper\n", __FILE__); + + ret = ip_nat_proto_gre_init(); + if (ret < 0) + return ret; + + BUG_ON(ip_nat_pptp_hook_outbound); + ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; + + BUG_ON(ip_nat_pptp_hook_inbound); + ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; + + BUG_ON(ip_nat_pptp_hook_exp_gre); + ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; + + BUG_ON(ip_nat_pptp_hook_expectfn); + ip_nat_pptp_hook_expectfn = &pptp_nat_expected; + + printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + DEBUGP("cleanup_module\n" ); + + ip_nat_pptp_hook_expectfn = NULL; + ip_nat_pptp_hook_exp_gre = NULL; + ip_nat_pptp_hook_inbound = NULL; + ip_nat_pptp_hook_outbound = NULL; + + ip_nat_proto_gre_fini(); + /* Make sure noone calls it, meanwhile */ + synchronize_net(); + + printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c new file mode 100644 index 000000000000..7c1285401672 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -0,0 +1,214 @@ +/* + * ip_nat_proto_gre.c - Version 2.0 + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* is key in given range between min and max */ +static int +gre_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int32_t key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohl(key) >= ntohl(min->gre.key) + && ntohl(key) <= ntohl(max->gre.key); +} + +/* generate unique tuple ... */ +static int +gre_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t key; + u_int16_t *keyptr; + unsigned int min, i, range_size; + + if (maniptype == IP_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + DEBUGP("%p: NATing GRE PPTP\n", conntrack); + min = 1; + range_size = 0xffff; + } else { + min = ntohl(range->min.gre.key); + range_size = ntohl(range->max.gre.key) - min + 1; + } + + DEBUGP("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; i < range_size; i++, key++) { + *keyptr = htonl(min + key % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + + DEBUGP("%p: no NAT mapping\n", conntrack); + + return 0; +} + +/* manipulate a GRE packet according to maniptype */ +static int +gre_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype) +{ + struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + unsigned int hdroff = iphdroff + iph->ihl*4; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) + return 0; + + greh = (void *)(*pskb)->data + hdroff; + pgreh = (struct gre_hdr_pptp *) greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype == IP_NAT_MANIP_DST) { + /* key manipulation is always dest */ + switch (greh->version) { + case 0: + if (!greh->key) { + DEBUGP("can't nat GRE w/o key\n"); + break; + } + if (greh->csum) { + /* FIXME: Never tested this code... */ + *(gre_csum(greh)) = + ip_nat_cheat_check(~*(gre_key(greh)), + tuple->dst.u.gre.key, + *(gre_csum(greh))); + } + *(gre_key(greh)) = tuple->dst.u.gre.key; + break; + case GRE_VERSION_PPTP: + DEBUGP("call_id -> 0x%04x\n", + ntohl(tuple->dst.u.gre.key)); + pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); + break; + default: + DEBUGP("can't nat unknown GRE version\n"); + return 0; + break; + } + } + return 1; +} + +/* print out a nat tuple */ +static unsigned int +gre_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.gre.key) + len += sprintf(buffer + len, "srckey=0x%x ", + ntohl(match->src.u.gre.key)); + + if (mask->dst.u.gre.key) + len += sprintf(buffer + len, "dstkey=0x%x ", + ntohl(match->src.u.gre.key)); + + return len; +} + +/* print a range of keys */ +static unsigned int +gre_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.gre.key != 0 + || range->max.gre.key != 0xFFFF) { + if (range->min.gre.key == range->max.gre.key) + return sprintf(buffer, "key 0x%x ", + ntohl(range->min.gre.key)); + else + return sprintf(buffer, "keys 0x%u-0x%u ", + ntohl(range->min.gre.key), + ntohl(range->max.gre.key)); + } else + return 0; +} + +/* nat helper struct */ +static struct ip_nat_protocol gre = { + .name = "GRE", + .protonum = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = gre_in_range, + .unique_tuple = gre_unique_tuple, + .print = gre_print, + .print_range = gre_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif +}; + +int __init ip_nat_proto_gre_init(void) +{ + return ip_nat_protocol_register(&gre); +} + +void __exit ip_nat_proto_gre_fini(void) +{ + ip_nat_protocol_unregister(&gre); +} -- cgit v1.2.3 From e674d0f38de6109b59dbe30fba8b296a03229b8e Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Mon, 19 Sep 2005 15:34:40 -0700 Subject: [NETFILTER] ip6tables: remove duplicate code Some IPv6 matches have very similar loops to find IPv6 extension header and we can unify them. This patch introduces ipv6_find_hdr() to do it. I just checked that it can find the target headers in the packet which has dst,hbh,rt,frag,ah,esp headers. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 3 ++ net/ipv6/netfilter/ip6_tables.c | 52 ++++++++++++++++++ net/ipv6/netfilter/ip6t_ah.c | 81 ++-------------------------- net/ipv6/netfilter/ip6t_dst.c | 88 +++--------------------------- net/ipv6/netfilter/ip6t_esp.c | 73 ++----------------------- net/ipv6/netfilter/ip6t_frag.c | 90 ++++--------------------------- net/ipv6/netfilter/ip6t_hbh.c | 88 +++--------------------------- net/ipv6/netfilter/ip6t_rt.c | 83 +++------------------------- 8 files changed, 94 insertions(+), 464 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 58c72a52dc65..59f70b34e029 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -455,6 +455,9 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, /* Check for an extension */ extern int ip6t_ext_hdr(u8 nexthdr); +/* find specified header and get offset to it */ +extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + u8 target); #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1)) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1cb8adb2787f..2da514b16d95 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1955,6 +1955,57 @@ static void __exit fini(void) #endif } +/* + * find specified header up to transport protocol header. + * If found target header, the offset to the header is set to *offset + * and return 0. otherwise, return -1. + * + * Notes: - non-1st Fragment Header isn't skipped. + * - ESP header isn't skipped. + * - The target header may be trancated. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +{ + unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; + u8 nexthdr = skb->nh.ipv6h->nexthdr; + unsigned int len = skb->len - start; + + while (nexthdr != target) { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + return -1; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -1; + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -1; + + if (ntohs(*fp) & ~0x7) + return -1; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen + 2) << 2; + else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *offset = start; + return 0; +} + EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); @@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match); EXPORT_SYMBOL(ip6t_register_target); EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); +EXPORT_SYMBOL(ipv6_find_hdr); module_init(init); module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d5b94f142bba..dde37793d20b 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -48,92 +48,21 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_auth_hdr *ah = NULL, _ah; + struct ip_auth_hdr *ah, _ah; const struct ip6t_ah *ahinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; - /*DEBUGP("IPv6 AH entered\n");*/ - /* if (opt->auth == 0) return 0; - * It does not filled on output */ - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_ah header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) - break; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* AH -> evaluate */ - if (nexthdr == NEXTHDR_AUTH) { - temp |= MASK_AH; - break; - } - - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_ah: new pointer too large! \n"); - break; - } - } - - /* AH header not found */ - if (temp != MASK_AH) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) return 0; - if (len < sizeof(struct ip_auth_hdr)){ + ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); + if (ah == NULL) { *hotdrop = 1; return 0; } - ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); - BUG_ON(ah == NULL); + hdrlen = (ah->hdrlen + 2) << 2; DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); DEBUGP("RES %04X ", ah->reserved); diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 540925e4a7a8..c450a635e54b 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index e39dd236fd8e..24bc0cde43a1 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -48,87 +48,22 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_esp_hdr _esp, *eh = NULL; + struct ip_esp_hdr _esp, *eh; const struct ip6t_esp *espinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; /* Make sure this isn't an evil packet */ /*DEBUGP("ipv6_esp entered \n");*/ - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - int hdrlen; - - DEBUGP("ipv6_esp header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - temp |= MASK_ESP; - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_esp: new pointer too large! \n"); - break; - } - } - - /* ESP header not found */ - if (temp != MASK_ESP) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) return 0; - if (len < sizeof(struct ip_esp_hdr)) { + eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); + if (eh == NULL) { *hotdrop = 1; return 0; } - eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); - BUG_ON(eh == NULL); - DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); return (eh != NULL) diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 4bfa30a9bc80..085d5f8eea29 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -48,90 +48,18 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct frag_hdr _frag, *fh = NULL; + struct frag_hdr _frag, *fh; const struct ip6t_frag *fraginfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; - unsigned int hdrlen = 0; - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_frag header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* FRAG -> evaluate */ - if (nexthdr == NEXTHDR_FRAGMENT) { - temp |= MASK_FRAGMENT; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_frag: new pointer too large! \n"); - break; - } - } - - /* FRAG header not found */ - if ( temp != MASK_FRAGMENT ) return 0; - - if (len < sizeof(struct frag_hdr)){ - *hotdrop = 1; - return 0; - } - fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); - BUG_ON(fh == NULL); + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) + return 0; + + fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); + if (fh == NULL){ + *hotdrop = 1; + return 0; + } DEBUGP("INFO %04X ", fh->frag_off); DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 27f3650d127e..1d09485111d0 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 2bb670037df3..beb2fd5cebbb 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -50,98 +50,29 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_rt_hdr _route, *rh = NULL; + struct ipv6_rt_hdr _route, *rh; const struct ip6t_rt *rtinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; struct in6_addr *ap, _addr; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) + return 0; - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_rt header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* ROUTING -> evaluate */ - if (nexthdr == NEXTHDR_ROUTING) { - temp |= MASK_ROUTING; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_rt: new pointer is too large! \n"); - break; - } - } - - /* ROUTING header not found */ - if ( temp != MASK_ROUTING ) return 0; - - if (len < (int)sizeof(struct ipv6_rt_hdr)){ + rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); + if (rh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(rh); + if (skb->len - ptr < hdrlen){ /* Pcket smaller than its length field */ return 0; } - rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); - BUG_ON(rh == NULL); - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); DEBUGP("TYPE %04X ", rh->type); DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); -- cgit v1.2.3 From a41bc00234a0a2ccaa99a194341ae108ae17ddc8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 19 Sep 2005 15:35:31 -0700 Subject: [NETFILTER]: Rename misnamed function Both __ip_conntrack_expect_find and ip_conntrack_expect_find_get take a reference to the expectation, the difference is that callers of __ip_conntrack_expect_find must hold ip_conntrack_lock. Signed-off-by: Patrick McHardy Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 2 +- net/ipv4/netfilter/ip_conntrack_netlink.c | 4 ++-- net/ipv4/netfilter/ip_conntrack_standalone.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 2df446c952ef..bace72a76cc4 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -384,7 +384,7 @@ extern struct ip_conntrack_expect * __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); extern struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); +ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); extern struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index f8cd8e42961e..c1f82e0c81cf 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) /* Just find a expectation corresponding to a tuple. */ struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 15aef3564742..b08a432efcf8 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; @@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ae3e3e655db5..d3c7808010ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -993,11 +993,11 @@ EXPORT_SYMBOL(ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); -EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); EXPORT_SYMBOL(ip_conntrack_tuple_taken); -- cgit v1.2.3 From 8922bc93aa78a202c2b68b2de19b71329cb321e1 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 19 Sep 2005 15:35:57 -0700 Subject: [NETFILTER]: Export ip_nat_port_{nfattr_to_range,range_to_nfattr} Those exports are needed by the PPTP helper following in the next couple of changes. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_nat_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1adedb743f60..c3ea891d38e7 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) return ret; } +EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif int __init ip_nat_init(void) -- cgit v1.2.3 From 3c3f8f25c177e4f9e4e00bcc1b90b28b1be37937 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 19 Sep 2005 15:41:28 -0700 Subject: [8021Q]: Add endian annotations. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 8 ++++---- net/8021q/vlan_dev.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 17d0c0d40b0e..eef0876d8307 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -42,8 +42,8 @@ struct hlist_node; struct vlan_ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ - unsigned short h_vlan_proto; /* Should always be 0x8100 */ - unsigned short h_vlan_TCI; /* Encapsulates priority and VLAN ID */ + __be16 h_vlan_proto; /* Should always be 0x8100 */ + __be16 h_vlan_TCI; /* Encapsulates priority and VLAN ID */ unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */ }; @@ -55,8 +55,8 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) } struct vlan_hdr { - unsigned short h_vlan_TCI; /* Encapsulates priority and VLAN ID */ - unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */ + __be16 h_vlan_TCI; /* Encapsulates priority and VLAN ID */ + __be16 h_vlan_encapsulated_proto; /* packet type ID field (or len) */ }; #define VLAN_VID_MASK 0xfff diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 145f5cde96cf..b74864889670 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vid; struct net_device_stats *stats; unsigned short vlan_TCI; - unsigned short proto; + __be16 proto; /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); -- cgit v1.2.3 From e14c3caf605dfd29bd1aac3097e39db94afc9f07 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2005 18:18:38 -0700 Subject: [TCP]: Handle SACK'd packets properly in tcp_fragment(). The problem is that we're now calling tcp_fragment() in a context where the packets might be marked as SACKED_ACKED or SACKED_RETRANS. This was not possible before as you never retransmitted packets that are so marked. Because of this, we need to adjust sacked_out and retrans_out in tcp_fragment(). This is exactly what the following patch does. We also need to preserve the SACKED_ACKED/SACKED_RETRANS marking if they exist. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b018e31b6530..5dd6dd7d091e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -461,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = - (TCP_SKB_CB(skb)->sacked & - (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); + TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { @@ -501,6 +499,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tcp_skb_pcount(buff); tp->packets_out -= diff; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= diff; tp->left_out -= diff; -- cgit v1.2.3 From 6d1cfe3f1752f17e297df60c8bcc6cd6e0a58449 Mon Sep 17 00:00:00 2001 From: Mark J Cox Date: Mon, 19 Sep 2005 17:55:30 -0700 Subject: [PATCH] raw_sendmsg DoS on 2.6 Fix unchecked __get_user that could be tricked into generating a memory read on an arbitrary address. The result of the read is not returned directly but you may be able to divine some information about it, or use the read to cause a crash on some architectures by reading hardware state. CAN-2004-2492. Fix from Al Viro, ack from Dave Miller. Signed-off-by: Linus Torvalds --- net/ipv4/raw.c | 2 +- net/ipv6/raw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 304bb0a1d4f0..4b0d7e4d6269 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5aa3691c578d..a1265a320b11 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; -- cgit v1.2.3 From 0fb375fb9b93b7d822debc6a734052337ccfdb1f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 21 Sep 2005 00:11:37 -0700 Subject: [AF_PACKET]: Allow for > 8 byte hardware addresses. The convention is that longer addresses will simply extend the hardeware address byte arrays at the end of sockaddr_ll and packet_mreq. In making this change a small information leak was also closed. The code only initializes the hardware address bytes that are used, but all of struct sockaddr_ll was copied to userspace. Now we just copy sockaddr_ll to the last byte of the hardware address used. For error checking larger structures than our internal maximums continue to be allowed but an error is signaled if we can not fit the hardware address into our internal structure. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/packet/af_packet.c | 65 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8690f171c1ef..ee865d88183b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -36,6 +36,11 @@ * Michal Ostrowski : Module initialization cleanup. * Ulises Alonso : Frame number limit removal and * packet_set_ring memory leak. + * Eric Biederman : Allow for > 8 byte hardware addresses. + * The convention is that longer addresses + * will simply extend the hardware address + * byte arrays at the end of sockaddr_ll + * and packet_mreq. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,7 +166,17 @@ struct packet_mclist int count; unsigned short type; unsigned short alen; - unsigned char addr[8]; + unsigned char addr[MAX_ADDR_LEN]; +}; +/* identical to struct packet_mreq except it has + * a longer address field. + */ +struct packet_mreq_max +{ + int mr_ifindex; + unsigned short mr_type; + unsigned short mr_alen; + unsigned char mr_address[MAX_ADDR_LEN]; }; #endif #ifdef CONFIG_PACKET_MMAP @@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; + if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) + goto out; ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; @@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (dev->hard_header) { int res; err = -EINVAL; + if (saddr) { + if (saddr->sll_halen != dev->addr_len) + goto out_free; + if (saddr->sll_hatype != dev->type) + goto out_free; + } res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (sock->type != SOCK_DGRAM) { skb->tail = skb->data; @@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; + struct sockaddr_ll *sll; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1056,16 +1080,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, return -ENODEV; #endif - /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sizeof(struct sockaddr_ll); - /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it @@ -1086,6 +1100,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if(skb==NULL) goto out; + /* + * If the address length field is there to be filled in, we fill + * it in now. + */ + + sll = (struct sockaddr_ll*)skb->cb; + if (sock->type == SOCK_PACKET) + msg->msg_namelen = sizeof(struct sockaddr_pkt); + else + msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + /* * You lose any data beyond the buffer you gave. If it worries a * user program they can ask the device for its MTU anyway. @@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } - *uaddr_len = sizeof(*sll); + *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; return 0; } @@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i } } -static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; @@ -1249,7 +1274,7 @@ done: return err; } -static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; @@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { - struct packet_mreq mreq; - if (optlen sizeof(mreq)) + len = sizeof(mreq); + if (copy_from_user(&mreq,optval,len)) return -EFAULT; + if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) + return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else -- cgit v1.2.3 From 78c6671a88313fd3c4364dc46e8c8186612616b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Sep 2005 00:15:39 -0700 Subject: [FIB_TRIE]: message cleanup Cleanup the printk's in fib_trie: * Convert a couple of places in the dump code to BUG_ON * Put log level's on each message The version message really needed the message since it leaks out on the pretty Fedora bootup. Signed-off-by: Stephen Hemminger Acked-by: Robert Olsson , Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 90ae70870a10..50c0519cd70d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1086,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) } if (tp && tp->pos + tp->bits > 32) - printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", + printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ @@ -1832,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi i++; continue; } - if (fa->fa_info->fib_nh == NULL) { - printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } - if (fa->fa_info == NULL) { - printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } + BUG_ON(!fa->fa_info); if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -1964,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id) trie_main = t; if (id == RT_TABLE_LOCAL) - printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); + printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; } -- cgit v1.2.3 From 7957aed72b104f7528da484f292442c6f3a131a1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Sep 2005 00:19:46 -0700 Subject: [TCP]: Set default congestion control correctly for incoming connections. Patch from Joel Sing to fix the default congestion control algorithm for incoming connections. If a new congestion control handler is added (via module), it should become the default for new connections. Instead, the incoming connections use reno. The cause is incorrect initialisation causes the tcp_init_congestion_control() function to return after the initial if test fails. Signed-off-by: Stephen Hemminger Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a88db28b0af7..b1a63b2c6b4a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); -- cgit v1.2.3 From 1d67e6501b8dba54ef8dcabebe2ad049b8ad0d67 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:27:56 -0300 Subject: [LLC]: Make llc_frame_alloc take a net_device as an argument So as to set the newly created sk_buff ->dev member with it, that way we stop using dev_base->next, that is the wrong thing to do, as there may well be several interfaces being used with LLC. This was not such a big problem after all as most of the users of llc_alloc_frame were setting the correct dev, but this way code is reduced. This also fixes another bug in llc_station_ac_send_null_dsap_xid_c, that was not setting the skb->dev field. Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc_sap.h | 3 +- net/llc/llc_c_ac.c | 105 ++++++++++++++++++++------------------------------ net/llc/llc_s_ac.c | 6 +-- net/llc/llc_sap.c | 5 ++- net/llc/llc_station.c | 8 ++-- 5 files changed, 52 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h index 353baaa627f3..6dd4cdccca42 100644 --- a/include/net/llc_sap.h +++ b/include/net/llc_sap.h @@ -13,10 +13,11 @@ */ struct llc_sap; struct sk_buff; +struct net_device; extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb); extern void llc_save_primitive(struct sk_buff* skb, unsigned char prim); -extern struct sk_buff *llc_alloc_frame(void); +extern struct sk_buff *llc_alloc_frame(struct net_device *dev); extern void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index b218be4c10ec..db98ea2113a5 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -217,13 +217,12 @@ int llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2(struct sock *sk, int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_disc_cmd(nskb, 1); @@ -243,14 +242,13 @@ free: int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit; - nskb->dev = llc->dev; llc_pdu_decode_pf_bit(skb, &f_bit); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); @@ -270,14 +268,13 @@ free: int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_dm_rsp(nskb, f_bit); @@ -306,11 +303,10 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb) llc_pdu_decode_pf_bit(skb, &f_bit); else f_bit = 0; - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, @@ -330,15 +326,14 @@ free: int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { u8 f_bit = 0; - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = (struct llc_pdu_sn *)&llc->rx_pdu_hdr; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, @@ -360,15 +355,14 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) u8 f_bit; int rc = -ENOBUFS; struct sk_buff *nskb; + struct llc_sock *llc = llc_sk(sk); llc_pdu_decode_pf_bit(skb, &f_bit); - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, @@ -451,13 +445,12 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk, u8 nr; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); @@ -487,13 +480,12 @@ int llc_conn_ac_resend_i_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rej_cmd(nskb, 1, llc->vR); @@ -512,14 +504,13 @@ free: int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { u8 f_bit = 1; - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR); @@ -538,14 +529,13 @@ free: int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit = 0; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR); @@ -564,13 +554,12 @@ free: int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rnr_cmd(nskb, 1, llc->vR); @@ -589,14 +578,13 @@ free: int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR); @@ -615,14 +603,13 @@ free: int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { u8 f_bit = 0; - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR); @@ -653,13 +640,12 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR); @@ -678,13 +664,12 @@ free: int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rr_cmd(nskb, 1, llc->vR); @@ -703,14 +688,13 @@ free: int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR); @@ -729,14 +713,13 @@ free: int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR); @@ -755,13 +738,12 @@ free: int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); @@ -780,13 +762,12 @@ free: int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); @@ -815,8 +796,8 @@ void llc_conn_set_p_flag(struct sock *sk, u8 value) int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -824,7 +805,6 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) if (llc->dev->flags & IFF_LOOPBACK) dmac = llc->dev->dev_addr; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_sabme_cmd(nskb, 1); @@ -845,11 +825,11 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { u8 f_bit; int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); llc_pdu_decode_pf_bit(skb, &f_bit); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; nskb->dev = llc->dev; @@ -1001,13 +981,12 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, llc->ack_pf, llc->vR); diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index ed8ba7de6122..270b2f2030cd 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -103,10 +103,9 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(skb->dev); if (!nskb) goto out; - nskb->dev = skb->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 0); @@ -149,10 +148,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(skb->dev); if (!nskb) goto out; - nskb->dev = skb->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 34228ef14985..0adaa289bf0a 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -26,11 +26,12 @@ /** * llc_alloc_frame - allocates sk_buff for frame + * @dev: network device this skb will be sent over * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ -struct sk_buff *llc_alloc_frame(void) +struct sk_buff *llc_alloc_frame(struct net_device *dev) { struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); @@ -38,7 +39,7 @@ struct sk_buff *llc_alloc_frame(void) skb_reserve(skb, 50); skb->nh.raw = skb->h.raw = skb->data; skb->protocol = htons(ETH_P_802_2); - skb->dev = dev_base->next; + skb->dev = dev; skb->mac.raw = skb->head; } return skb; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 8fe48a24bad5..85a7ac276141 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -249,7 +249,7 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb) static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) { int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(); + struct sk_buff *nskb = llc_alloc_frame(skb->dev); if (!nskb) goto out; @@ -270,12 +270,11 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff* nskb = llc_alloc_frame(); + struct sk_buff* nskb = llc_alloc_frame(skb->dev); if (!nskb) goto out; rc = 0; - nskb->dev = skb->dev; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); @@ -295,12 +294,11 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(); + struct sk_buff *nskb = llc_alloc_frame(skb->dev); if (!nskb) goto out; rc = 0; - nskb->dev = skb->dev; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); -- cgit v1.2.3 From bdcc66cca89cc2e97e93054d139b2e9a2b2ec1a7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:38:15 -0300 Subject: [LLC]: Simplify llc_c_ac code, removing unneeded assignments to variables Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_c_ac.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index db98ea2113a5..309c5682e58d 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -120,10 +120,8 @@ int llc_conn_ac_disc_ind(struct sock *sk, struct sk_buff *skb) reason = LLC_DISC_REASON_RX_DISC_CMD_PDU; } else if (ev->type == LLC_CONN_EV_TYPE_ACK_TMR) reason = LLC_DISC_REASON_ACK_TMR_EXP; - else { - reason = 0; + else rc = -EINVAL; - } if (!rc) { ev->reason = reason; ev->ind_prim = LLC_DISC_PRIM; @@ -160,9 +158,6 @@ int llc_conn_ac_rst_ind(struct sock *sk, struct sk_buff *skb) LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_SABME) { reason = LLC_RESET_REASON_REMOTE; rc = 0; - } else { - reason = 0; - rc = 1; } break; case LLC_CONN_EV_TYPE_ACK_TMR: @@ -172,8 +167,7 @@ int llc_conn_ac_rst_ind(struct sock *sk, struct sk_buff *skb) if (llc->retry_count > llc->n2) { reason = LLC_RESET_REASON_LOCAL; rc = 0; - } else - rc = 1; + } break; } if (!rc) { -- cgit v1.2.3 From 838a75dae05d59b7d7894c299508ae3d7dea9630 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:44:23 -0300 Subject: [LLC]: Remove unneeded f_bit variables Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_c_ac.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 309c5682e58d..9abaecceea68 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -267,11 +267,10 @@ int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - u8 f_bit = 1; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_dm_rsp(nskb, f_bit); + llc_pdu_init_as_dm_rsp(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) goto free; @@ -324,13 +323,12 @@ int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb) struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - u8 f_bit = 0; struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = (struct llc_pdu_sn *)&llc->rx_pdu_hdr; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, + llc_pdu_init_as_frmr_rsp(nskb, pdu, 0, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) @@ -502,12 +500,11 @@ int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - u8 f_bit = 1; struct llc_sap *sap = llc->sap; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rej_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) goto free; @@ -528,11 +525,10 @@ int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - u8 f_bit = 0; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rej_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) goto free; @@ -577,11 +573,10 @@ int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - u8 f_bit = 1; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rnr_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) goto free; @@ -601,12 +596,11 @@ int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) struct sk_buff *nskb = llc_alloc_frame(llc->dev); if (nskb) { - u8 f_bit = 0; struct llc_sap *sap = llc->sap; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) goto free; @@ -712,11 +706,10 @@ int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - u8 f_bit = 1; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rr_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); if (rc) goto free; -- cgit v1.2.3 From e0dd55190ff8c6485a721c9473ded92d03c0fe01 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:50:15 -0300 Subject: [LLC]: introduce llc_conn_tmr_common_cb, to avoid code duplication Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_c_ac.c | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 9abaecceea68..50c827e13a9f 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1326,7 +1326,7 @@ int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) return 0; } -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) +static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) { struct sock *sk = (struct sock *)timeout_data; struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); @@ -1336,58 +1336,30 @@ void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) struct llc_conn_state_ev *ev = llc_conn_ev(skb); skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_P_TMR; + ev->type = type; llc_process_tmr_ev(sk, skb); } bh_unlock_sock(sk); } -void llc_conn_busy_tmr_cb(unsigned long timeout_data) +void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) { - struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - bh_lock_sock(sk); - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR); +} - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_BUSY_TMR; - llc_process_tmr_ev(sk, skb); - } - bh_unlock_sock(sk); +void llc_conn_busy_tmr_cb(unsigned long timeout_data) +{ + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR); } void llc_conn_ack_tmr_cb(unsigned long timeout_data) { - struct sock* sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - bh_lock_sock(sk); - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_ACK_TMR; - llc_process_tmr_ev(sk, skb); - } - bh_unlock_sock(sk); + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR); } void llc_conn_rej_tmr_cb(unsigned long timeout_data) { - struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - bh_lock_sock(sk); - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_REJ_TMR; - llc_process_tmr_ev(sk, skb); - } - bh_unlock_sock(sk); + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR); } int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 774ccb4f64020dad40d38efa63685220e1f245cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:53:35 -0300 Subject: [LLC]: Remove unneeded temp net_device variables Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 66f55e514b56..2975d88eeb0a 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -404,7 +404,6 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; - struct net_device *dev; int rc = -EINVAL; lock_sock(sk); @@ -422,7 +421,6 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, llc->daddr.lsap = addr->sllc_sap; memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); } - dev = llc->dev; if (sk->sk_type != SOCK_STREAM) goto out; rc = -EALREADY; @@ -431,7 +429,7 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); - rc = llc_establish_connection(sk, dev->dev_addr, + rc = llc_establish_connection(sk, llc->dev->dev_addr, addr->sllc_mac, addr->sllc_sap); if (rc) { dprintk("%s: llc_ui_send_conn failed :-(\n", __FUNCTION__); @@ -740,7 +738,6 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_llc *addr = (struct sockaddr_llc *)msg->msg_name; int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; - struct net_device *dev; struct sk_buff *skb; size_t size = 0; int rc = -EINVAL, copied = 0, hdrlen; @@ -763,11 +760,10 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, if (rc) goto release; } - dev = llc->dev; - hdrlen = dev->hard_header_len + llc_ui_header_len(sk, addr); + hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > dev->mtu) - size = dev->mtu; + if (size > llc->dev->mtu) + size = llc->dev->mtu; copied = size - hdrlen; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); @@ -775,7 +771,7 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto release; skb->sk = sk; - skb->dev = dev; + skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); rc = memcpy_fromiovec(skb_put(skb, copied), msg->msg_iov, copied); -- cgit v1.2.3 From 5a770c0262262e96979fe05d5c2fa1d1f409dbdc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:56:26 -0300 Subject: [LLC]: Update comments for llc_ui_bind and llc_ui_autobind to match new behaviour Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2975d88eeb0a..81a53791ed9c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -231,20 +231,13 @@ out: } /** - * llc_ui_autobind - Bind a socket to a specific address. - * @sk: Socket to bind an address to. - * @addr: Address the user wants the socket bound to. + * llc_ui_autobind - automatically bind a socket to a sap + * @sock: socket to bind + * @addr: address to connect to + * + * Used by llc_ui_connect and llc_ui_sendmsg when the user hasn't + * specifically used llc_ui_bind to bind to an specific address/sap * - * Bind a socket to a specific address. For llc a user is able to bind to - * a specific sap only or mac + sap. If the user only specifies a sap and - * a null dmac (all zeros) the user is attempting to bind to an entire - * sap. This will stop anyone else on the local system from using that - * sap. If someone else has a mac + sap open the bind to null + sap will - * fail. - * If the user desires to bind to a specific mac + sap, it is possible to - * have multiple sap connections via multiple macs. - * Bind and autobind for that matter must enforce the correct sap usage - * otherwise all hell will break loose. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) @@ -285,11 +278,7 @@ out: * @addrlen: Length of the uaddr structure. * * Bind a socket to a specific address. For llc a user is able to bind to - * a specific sap only or mac + sap. If the user only specifies a sap and - * a null dmac (all zeros) the user is attempting to bind to an entire - * sap. This will stop anyone else on the local system from using that - * sap. If someone else has a mac + sap open the bind to null + sap will - * fail. + * a specific sap only or mac + sap. * If the user desires to bind to a specific mac + sap, it is possible to * have multiple sap connections via multiple macs. * Bind and autobind for that matter must enforce the correct sap usage -- cgit v1.2.3 From 0eb8017242cb7e8b18af4751b03646436b8f90e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:57:55 -0300 Subject: [LLC]: Mark llc_find_next_offset as __init, saving some more bytes Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_conn.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 4c644bc70eae..1f659e56d8e2 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -544,14 +544,14 @@ u8 llc_data_accept_state(u8 state) } /** - * find_next_offset - finds offset for next category of transitions + * llc_find_next_offset - finds offset for next category of transitions * @state: state table. * @offset: start offset. * * Finds offset of next category of transitions in transition table. * Returns the start index of next category. */ -static u16 find_next_offset(struct llc_conn_state *state, u16 offset) +static u16 __init llc_find_next_offset(struct llc_conn_state *state, u16 offset) { u16 cnt = 0; struct llc_conn_state_trans **next_trans; @@ -578,8 +578,8 @@ void __init llc_build_offset_table(void) next_offset = 0; for (ev_type = 0; ev_type < NBR_CONN_EV; ev_type++) { llc_offset_table[state][ev_type] = next_offset; - next_offset += find_next_offset(curr_state, - next_offset) + 1; + next_offset += llc_find_next_offset(curr_state, + next_offset) + 1; } } } -- cgit v1.2.3 From af426d327c38bcb8cbb87c60134d42d2e93b20cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 03:59:22 -0300 Subject: [LLC]: Help the compiler with likely/unlikely, saving some more bytes Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 37 +++++++++++++++++++------------------ net/llc/llc_conn.c | 12 ++++++------ net/llc/llc_input.c | 2 +- 3 files changed, 26 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 81a53791ed9c..aed61e6376ed 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -155,7 +155,7 @@ static int llc_ui_create(struct socket *sock, int protocol) struct sock *sk; int rc = -ESOCKTNOSUPPORT; - if (sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM) { + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { @@ -177,7 +177,7 @@ static int llc_ui_release(struct socket *sock) struct sock *sk = sock->sk; struct llc_sock *llc; - if (!sk) + if (unlikely(sk == NULL)) goto out; sock_hold(sk); lock_sock(sk); @@ -294,10 +294,10 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) int rc = -EINVAL; dprintk("%s: binding %02X\n", __FUNCTION__, addr->sllc_sap); - if (!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)) + if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (addr->sllc_family != AF_LLC) + if (unlikely(addr->sllc_family != AF_LLC)) goto out; if (!addr->sllc_sap) { rc = -EUSERS; @@ -358,7 +358,7 @@ static int llc_ui_shutdown(struct socket *sock, int how) int rc = -ENOTCONN; lock_sock(sk); - if (sk->sk_state != TCP_ESTABLISHED) + if (unlikely(sk->sk_state != TCP_ESTABLISHED)) goto out; rc = -EINVAL; if (how != 2) @@ -396,10 +396,15 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, int rc = -EINVAL; lock_sock(sk); - if (addrlen != sizeof(*addr)) + if (unlikely(addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (addr->sllc_family != AF_LLC) + if (unlikely(addr->sllc_family != AF_LLC)) + goto out; + if (unlikely(sk->sk_type != SOCK_STREAM)) + goto out; + rc = -EALREADY; + if (unlikely(sock->state == SS_CONNECTING)) goto out; /* bind connection to sap if user hasn't done it. */ if (sock_flag(sk, SOCK_ZAPPED)) { @@ -410,11 +415,6 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, llc->daddr.lsap = addr->sllc_sap; memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); } - if (sk->sk_type != SOCK_STREAM) - goto out; - rc = -EALREADY; - if (sock->state == SS_CONNECTING) - goto out; sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); @@ -448,10 +448,10 @@ static int llc_ui_listen(struct socket *sock, int backlog) int rc = -EINVAL; lock_sock(sk); - if (sock->state != SS_UNCONNECTED) + if (unlikely(sock->state != SS_UNCONNECTED)) goto out; rc = -EOPNOTSUPP; - if (sk->sk_type != SOCK_STREAM) + if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EAGAIN; if (sock_flag(sk, SOCK_ZAPPED)) @@ -614,10 +614,11 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) dprintk("%s: accepting on %02X\n", __FUNCTION__, llc_sk(sk)->laddr.lsap); lock_sock(sk); - if (sk->sk_type != SOCK_STREAM) + if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EINVAL; - if (sock->state != SS_UNCONNECTED || sk->sk_state != TCP_LISTEN) + if (unlikely(sock->state != SS_UNCONNECTED || + sk->sk_state != TCP_LISTEN)) goto out; /* wait for a connection to arrive. */ rc = llc_ui_wait_for_data(sk, sk->sk_rcvtimeo); @@ -880,7 +881,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, int rc = -EINVAL, opt; lock_sock(sk); - if (level != SOL_LLC || optlen != sizeof(int)) + if (unlikely(level != SOL_LLC || optlen != sizeof(int))) goto out; rc = get_user(opt, (int __user *)optval); if (rc) @@ -955,7 +956,7 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, int val = 0, len = 0, rc = -EINVAL; lock_sock(sk); - if (level != SOL_LLC) + if (unlikely(level != SOL_LLC)) goto out; rc = get_user(len, optlen); if (rc) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 1f659e56d8e2..ce7b893ed1ab 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -64,12 +64,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) skb_get(skb); ev->ind_prim = ev->cfm_prim = 0; rc = llc_conn_service(sk, skb); /* sending event to state machine */ - if (rc) { + if (unlikely(rc != 0)) { printk(KERN_ERR "%s: llc_conn_service failed\n", __FUNCTION__); goto out_kfree_skb; } - if (!ev->ind_prim && !ev->cfm_prim) { + if (unlikely(!ev->ind_prim && !ev->cfm_prim)) { /* indicate or confirm not required */ /* XXX this is not very pretty, perhaps we should store * XXX indicate/confirm-needed state in the llc_conn_state_ev @@ -80,7 +80,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) goto out_skb_put; } - if (ev->ind_prim && ev->cfm_prim) /* Paranoia */ + if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */ skb_get(skb); switch (ev->ind_prim) { @@ -762,14 +762,14 @@ static int llc_backlog_rcv(struct sock *sk, struct sk_buff *skb) int rc = 0; struct llc_sock *llc = llc_sk(sk); - if (llc_backlog_type(skb) == LLC_PACKET) { - if (llc->state > 1) /* not closed */ + if (likely(llc_backlog_type(skb) == LLC_PACKET)) { + if (likely(llc->state > 1)) /* not closed */ rc = llc_conn_rcv(sk, skb); else goto out_kfree_skb; } else if (llc_backlog_type(skb) == LLC_EVENT) { /* timer expiration event */ - if (llc->state > 1) /* not closed */ + if (likely(llc->state > 1)) /* not closed */ rc = llc_conn_state_process(sk, skb); else goto out_kfree_skb; diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 13b46240b7a1..60c1acac7c97 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -101,7 +101,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) u8 llc_len = 2; struct llc_pdu_sn *pdu; - if (!pskb_may_pull(skb, sizeof(*pdu))) + if (unlikely(!pskb_may_pull(skb, sizeof(*pdu)))) return 0; pdu = (struct llc_pdu_sn *)skb->data; -- cgit v1.2.3 From b9441fc3375a6637a81bc1635c5e12da4dc7acc6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:09:45 -0300 Subject: [LLC]: Use const in llc_c_ev.c Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_c_ev.c | 134 +++++++++++++++++++++++++++-------------------------- 1 file changed, 68 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c index d5bdb53a348f..c53b82324268 100644 --- a/net/llc/llc_c_ev.c +++ b/net/llc/llc_c_ev.c @@ -99,7 +99,7 @@ out: int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_CONN_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -107,7 +107,7 @@ int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_DATA_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -115,7 +115,7 @@ int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_disc_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_DISC_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -123,7 +123,7 @@ int llc_conn_ev_disc_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_RESET_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -131,7 +131,7 @@ int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type == LLC_CONN_EV_TYPE_SIMPLE && ev->prim_type == LLC_CONN_EV_LOCAL_BUSY_DETECTED ? 0 : 1; @@ -139,7 +139,7 @@ int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_local_busy_cleared(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type == LLC_CONN_EV_TYPE_SIMPLE && ev->prim_type == LLC_CONN_EV_LOCAL_BUSY_CLEARED ? 0 : 1; @@ -152,7 +152,7 @@ int llc_conn_ev_rx_bad_pdu(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_disc_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_DISC ? 0 : 1; @@ -160,7 +160,7 @@ int llc_conn_ev_rx_disc_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_dm_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_DM ? 0 : 1; @@ -168,7 +168,7 @@ int llc_conn_ev_rx_dm_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_frmr_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_FRMR ? 0 : 1; @@ -176,7 +176,7 @@ int llc_conn_ev_rx_frmr_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -186,7 +186,7 @@ int llc_conn_ev_rx_i_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -197,9 +197,9 @@ int llc_conn_ev_rx_i_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_0(pdu) && ns != vr && @@ -209,9 +209,9 @@ int llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_1(pdu) && ns != vr && @@ -221,10 +221,11 @@ int llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn * pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); - u16 rc = LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr && + const struct llc_pdu_sn * pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); + const u16 rc = LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && + ns != vr && llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1; if (!rc) dprintk("%s: matched, state=%d, ns=%d, vr=%d\n", @@ -234,7 +235,7 @@ int llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -244,7 +245,7 @@ int llc_conn_ev_rx_i_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_1(pdu) && @@ -253,7 +254,7 @@ int llc_conn_ev_rx_i_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -263,9 +264,9 @@ int llc_conn_ev_rx_i_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_0(pdu) && ns != vr && @@ -275,9 +276,9 @@ int llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_1(pdu) && ns != vr && @@ -287,9 +288,9 @@ int llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr && !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1; @@ -298,10 +299,11 @@ int llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); - u16 rc = LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr && + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); + const u16 rc = LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && + ns != vr && llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1; if (!rc) dprintk("%s: matched, state=%d, ns=%d, vr=%d\n", @@ -311,7 +313,7 @@ int llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns(struct sock *sk, int llc_conn_ev_rx_rej_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -320,7 +322,7 @@ int llc_conn_ev_rx_rej_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -329,7 +331,7 @@ int llc_conn_ev_rx_rej_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -338,7 +340,7 @@ int llc_conn_ev_rx_rej_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -347,7 +349,7 @@ int llc_conn_ev_rx_rej_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_REJ ? 0 : 1; @@ -355,7 +357,7 @@ int llc_conn_ev_rx_rej_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -364,7 +366,7 @@ int llc_conn_ev_rx_rnr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -373,7 +375,7 @@ int llc_conn_ev_rx_rnr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -382,7 +384,7 @@ int llc_conn_ev_rx_rnr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -391,7 +393,7 @@ int llc_conn_ev_rx_rnr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -400,7 +402,7 @@ int llc_conn_ev_rx_rr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -409,7 +411,7 @@ int llc_conn_ev_rx_rr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && @@ -419,7 +421,7 @@ int llc_conn_ev_rx_rr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && @@ -429,7 +431,7 @@ int llc_conn_ev_rx_rr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_sabme_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_SABME ? 0 : 1; @@ -446,7 +448,7 @@ int llc_conn_ev_rx_ua_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); if (LLC_PDU_IS_CMD(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) { @@ -461,7 +463,7 @@ int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); if (LLC_PDU_IS_CMD(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) @@ -480,7 +482,7 @@ int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); if (LLC_PDU_IS_RSP(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) { @@ -502,7 +504,7 @@ int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_xxx_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); if (LLC_PDU_IS_RSP(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) @@ -524,9 +526,9 @@ int llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vs = llc_sk(sk)->vS; - u8 nr = LLC_I_GET_NR(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vs = llc_sk(sk)->vS; + const u8 nr = LLC_I_GET_NR(pdu); if (LLC_PDU_IS_CMD(pdu) && (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) && @@ -542,9 +544,9 @@ int llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vs = llc_sk(sk)->vS; - u8 nr = LLC_I_GET_NR(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vs = llc_sk(sk)->vS; + const u8 nr = LLC_I_GET_NR(pdu); if (LLC_PDU_IS_RSP(pdu) && (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) && @@ -563,28 +565,28 @@ int llc_conn_ev_rx_any_frame(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_p_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_P_TMR; } int llc_conn_ev_ack_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_ACK_TMR; } int llc_conn_ev_rej_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_REJ_TMR; } int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_BUSY_TMR; } @@ -596,7 +598,7 @@ int llc_conn_ev_init_p_f_cycle(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_tx_buffer_full(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type == LLC_CONN_EV_TYPE_SIMPLE && ev->prim_type == LLC_CONN_EV_TX_BUFF_FULL ? 0 : 1; -- cgit v1.2.3 From 72b1ad4a7e239a2224e87194654acba56fc477d4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:19:52 -0300 Subject: [LLC]: Remove unused functions from llc_c_ev.c Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_c_ev.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'net') diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c index c53b82324268..c5deda246614 100644 --- a/net/llc/llc_c_ev.c +++ b/net/llc/llc_c_ev.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -46,8 +47,6 @@ #define dprintk(args...) #endif -extern u16 llc_circular_between(u8 a, u8 b, u8 c); - /** * llc_util_ns_inside_rx_window - check if sequence number is in rx window * @ns: sequence number of received pdu. @@ -479,28 +478,6 @@ int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) return rc; } -int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) -{ - u16 rc = 1; - const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - - if (LLC_PDU_IS_RSP(pdu)) { - if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) { - if (LLC_I_PF_IS_1(pdu)) - rc = 0; - } else if (LLC_PDU_TYPE_IS_U(pdu)) - switch (LLC_U_PDU_RSP(pdu)) { - case LLC_2_PDU_RSP_UA: - case LLC_2_PDU_RSP_DM: - case LLC_2_PDU_RSP_FRMR: - if (LLC_U_PF_IS_1(pdu)) - rc = 0; - break; - } - } - return rc; -} - int llc_conn_ev_rx_xxx_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; -- cgit v1.2.3 From b35bd11019ed1084a36632f1c1d936244d9cfb5b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:22:39 -0300 Subject: [LLC]: Convert llc_ui_wait_for_ functions to use prepare_to_wait/finish_wait And make it look more like the similar routines in the TCP/IP source code. Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 147 +++++++++++++++++++++++++------------------------------ 1 file changed, 67 insertions(+), 80 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index aed61e6376ed..436c8db67f55 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -426,12 +426,30 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_CLOSE; goto out; } - rc = llc_ui_wait_for_conn(sk, sk->sk_rcvtimeo); - if (rc) - dprintk("%s: llc_ui_wait_for_conn failed=%d\n", __FUNCTION__, rc); + + if (sk->sk_state == TCP_SYN_SENT) { + const int timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + + if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) + goto out; + + rc = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out; + } + + if (sk->sk_state == TCP_CLOSE) + goto sock_error; + + sock->state = SS_CONNECTED; + rc = 0; out: release_sock(sk); return rc; +sock_error: + rc = sock_error(sk) ? : -ECONNABORTED; + sock->state = SS_UNCONNECTED; + goto out; } /** @@ -472,117 +490,88 @@ out: static int llc_ui_wait_for_disc(struct sock *sk, int timeout) { - DECLARE_WAITQUEUE(wait, current); - int rc; + DEFINE_WAIT(wait); + int rc = 0; - add_wait_queue_exclusive(sk->sk_sleep, &wait); - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - rc = 0; - if (sk->sk_state != TCP_CLOSE) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else - break; + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + while (sk->sk_state != TCP_CLOSE) { + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; + rc = 0; + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + finish_wait(sk->sk_sleep, &wait); return rc; } static int llc_ui_wait_for_conn(struct sock *sk, int timeout) { - DECLARE_WAITQUEUE(wait, current); - int rc; + DEFINE_WAIT(wait); - add_wait_queue_exclusive(sk->sk_sleep, &wait); - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - rc = -EAGAIN; - if (sk->sk_state == TCP_CLOSE) - break; - rc = 0; - if (sk->sk_state != TCP_ESTABLISHED) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else - break; - rc = -ERESTARTSYS; - if (signal_pending(current)) - break; - rc = -EAGAIN; - if (!timeout) + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state == TCP_SYN_SENT) { + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current) || !timeout) break; + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return rc; + finish_wait(sk->sk_sleep, &wait); + return timeout; } static int llc_ui_wait_for_data(struct sock *sk, int timeout) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); int rc = 0; - add_wait_queue_exclusive(sk->sk_sleep, &wait); for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); if (sk->sk_shutdown & RCV_SHUTDOWN) break; - /* - * Well, if we have backlog, try to process it now. - */ - if (sk->sk_backlog.tail) { - release_sock(sk); - lock_sock(sk); - } - rc = 0; - if (skb_queue_empty(&sk->sk_receive_queue)) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else + if (!skb_queue_empty(&sk->sk_receive_queue)) break; + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; + rc = 0; } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + finish_wait(sk->sk_sleep, &wait); return rc; } static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT(wait); struct llc_sock *llc = llc_sk(sk); int rc; - add_wait_queue_exclusive(sk->sk_sleep, &wait); for (;;) { - dprintk("%s: looping...\n", __FUNCTION__); - __set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); rc = -ENOTCONN; if (sk->sk_shutdown & RCV_SHUTDOWN) break; rc = 0; - if (llc_data_accept_state(llc->state) || llc->p_flag) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else + if (!llc_data_accept_state(llc->state) && !llc->p_flag) break; + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); rc = -ERESTARTSYS; if (signal_pending(current)) break; @@ -590,8 +579,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout) if (!timeout) break; } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + finish_wait(sk->sk_sleep, &wait); return rc; } @@ -621,9 +609,11 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) sk->sk_state != TCP_LISTEN)) goto out; /* wait for a connection to arrive. */ - rc = llc_ui_wait_for_data(sk, sk->sk_rcvtimeo); - if (rc) - goto out; + if (skb_queue_empty(&sk->sk_receive_queue)) { + rc = llc_ui_wait_for_data(sk, sk->sk_rcvtimeo); + if (rc) + goto out; + } dprintk("%s: got a new connection on %02X\n", __FUNCTION__, llc_sk(sk)->laddr.lsap); skb = skb_dequeue(&sk->sk_receive_queue); @@ -672,19 +662,16 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name; struct sk_buff *skb; size_t copied = 0; - int rc = -ENOMEM, timeout; + int rc = -ENOMEM; int noblock = flags & MSG_DONTWAIT; dprintk("%s: receiving in %02X from %02X\n", __FUNCTION__, llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); lock_sock(sk); - timeout = sock_rcvtimeo(sk, noblock); - rc = llc_ui_wait_for_data(sk, timeout); - if (rc) { - dprintk("%s: llc_ui_wait_for_data failed recv " - "in %02X from %02X\n", __FUNCTION__, - llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); - goto out; + if (skb_queue_empty(&sk->sk_receive_queue)) { + rc = llc_ui_wait_for_data(sk, sock_rcvtimeo(sk, noblock)); + if (rc) + goto out; } skb = skb_dequeue(&sk->sk_receive_queue); if (!skb) /* shutdown */ -- cgit v1.2.3 From 54fb7f25f19a4539d3ec012e410439913650dc06 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:26:14 -0300 Subject: [LLC]: Use the sk_wait_event primitive Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 65 +++++++++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 436c8db67f55..95444f227510 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -37,10 +37,10 @@ static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static struct proto_ops llc_ui_ops; -static int llc_ui_wait_for_conn(struct sock *sk, int timeout); -static int llc_ui_wait_for_disc(struct sock *sk, int timeout); -static int llc_ui_wait_for_data(struct sock *sk, int timeout); -static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout); +static int llc_ui_wait_for_conn(struct sock *sk, long timeout); +static int llc_ui_wait_for_disc(struct sock *sk, long timeout); +static int llc_ui_wait_for_data(struct sock *sk, long timeout); +static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #if 0 #define dprintk(args...) printk(KERN_DEBUG args) @@ -117,7 +117,7 @@ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) int rc = 0; if (llc_data_accept_state(llc->state) || llc->p_flag) { - int timeout = sock_sndtimeo(sk, noblock); + long timeout = sock_sndtimeo(sk, noblock); rc = llc_ui_wait_for_busy_core(sk, timeout); } @@ -428,7 +428,7 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, } if (sk->sk_state == TCP_SYN_SENT) { - const int timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) goto out; @@ -488,16 +488,15 @@ out: return rc; } -static int llc_ui_wait_for_disc(struct sock *sk, int timeout) +static int llc_ui_wait_for_disc(struct sock *sk, long timeout) { DEFINE_WAIT(wait); int rc = 0; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != TCP_CLOSE) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) + break; rc = -ERESTARTSYS; if (signal_pending(current)) break; @@ -505,44 +504,37 @@ static int llc_ui_wait_for_disc(struct sock *sk, int timeout) if (!timeout) break; rc = 0; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); } finish_wait(sk->sk_sleep, &wait); return rc; } -static int llc_ui_wait_for_conn(struct sock *sk, int timeout) +static int llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - - while (sk->sk_state == TCP_SYN_SENT) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) + break; if (signal_pending(current) || !timeout) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); } finish_wait(sk->sk_sleep, &wait); return timeout; } -static int llc_ui_wait_for_data(struct sock *sk, int timeout) +static int llc_ui_wait_for_data(struct sock *sk, long timeout) { DEFINE_WAIT(wait); int rc = 0; - for (;;) { + while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (sk->sk_shutdown & RCV_SHUTDOWN) - break; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (sk_wait_event(sk, &timeout, + (sk->sk_shutdown & RCV_SHUTDOWN) || + (!skb_queue_empty(&sk->sk_receive_queue)))) break; - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); rc = -ERESTARTSYS; if (signal_pending(current)) break; @@ -555,23 +547,20 @@ static int llc_ui_wait_for_data(struct sock *sk, int timeout) return rc; } -static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout) +static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { DEFINE_WAIT(wait); struct llc_sock *llc = llc_sk(sk); int rc; - for (;;) { + while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - rc = -ENOTCONN; - if (sk->sk_shutdown & RCV_SHUTDOWN) - break; rc = 0; - if (!llc_data_accept_state(llc->state) && !llc->p_flag) + if (sk_wait_event(sk, &timeout, + (sk->sk_shutdown & RCV_SHUTDOWN) || + (!llc_data_accept_state(llc->state) && + !llc->p_flag))) break; - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); rc = -ERESTARTSYS; if (signal_pending(current)) break; -- cgit v1.2.3 From 590232a7150674b2036291eaefce085f3f9659c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:30:44 -0300 Subject: [LLC]: Add sysctl support for the LLC timeouts Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/sysctl.h | 26 ++++++++- include/net/llc.h | 7 +++ include/net/llc_conn.h | 10 ++-- net/llc/Makefile | 1 + net/llc/af_llc.c | 47 +++++++++++----- net/llc/llc_c_ac.c | 12 ++--- net/llc/llc_conn.c | 13 +++-- net/llc/llc_station.c | 11 ++-- net/llc/sysctl_net_llc.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 232 insertions(+), 31 deletions(-) create mode 100644 net/llc/sysctl_net_llc.c (limited to 'net') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3a29a9f9b451..fc8e367f671e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -202,7 +202,8 @@ enum NET_TR=14, NET_DECNET=15, NET_ECONET=16, - NET_SCTP=17, + NET_SCTP=17, + NET_LLC=18, }; /* /proc/sys/kernel/random */ @@ -522,6 +523,29 @@ enum { NET_IPX_FORWARDING=2 }; +/* /proc/sys/net/llc */ +enum { + NET_LLC2=1, + NET_LLC_STATION=2, +}; + +/* /proc/sys/net/llc/llc2 */ +enum { + NET_LLC2_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/station */ +enum { + NET_LLC_STATION_ACK_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/llc2/timeout */ +enum { + NET_LLC2_ACK_TIMEOUT=1, + NET_LLC2_P_TIMEOUT=2, + NET_LLC2_REJ_TIMEOUT=3, + NET_LLC2_BUSY_TIMEOUT=4, +}; /* /proc/sys/net/appletalk */ enum { diff --git a/include/net/llc.h b/include/net/llc.h index 71769a5aeef3..8b8e2be289b1 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -98,4 +98,11 @@ extern void llc_proc_exit(void); #define llc_proc_init() (0) #define llc_proc_exit() do { } while(0) #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_SYSCTL +extern int llc_sysctl_init(void); +extern void llc_sysctl_exit(void); +#else +#define llc_sysctl_init() (0) +#define llc_sysctl_exit() do { } while(0) +#endif /* CONFIG_SYSCTL */ #endif /* LLC_H */ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 8ad3bc2c23d7..8a8ff4810135 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -19,14 +19,14 @@ #define LLC_EVENT 1 #define LLC_PACKET 2 -#define LLC_P_TIME 2 -#define LLC_ACK_TIME 1 -#define LLC_REJ_TIME 3 -#define LLC_BUSY_TIME 3 +#define LLC2_P_TIME 2 +#define LLC2_ACK_TIME 1 +#define LLC2_REJ_TIME 3 +#define LLC2_BUSY_TIME 3 struct llc_timer { struct timer_list timer; - u16 expire; /* timer expire time */ + unsigned long expire; /* timer expire time */ }; struct llc_sock { diff --git a/net/llc/Makefile b/net/llc/Makefile index 5ebd4ed2bd42..4e260cff3c5d 100644 --- a/net/llc/Makefile +++ b/net/llc/Makefile @@ -22,3 +22,4 @@ llc2-y := llc_if.o llc_c_ev.o llc_c_ac.o llc_conn.o llc_c_st.o llc_pdu.o \ llc_sap.o llc_s_ac.o llc_s_ev.o llc_s_st.o af_llc.o llc_station.o llc2-$(CONFIG_PROC_FS) += llc_proc.o +llc2-$(CONFIG_SYSCTL) += sysctl_net_llc.o diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 95444f227510..ef125345a2db 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -877,22 +877,22 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, case LLC_OPT_ACK_TMR_EXP: if (opt > LLC_OPT_MAX_ACK_TMR_EXP) goto out; - llc->ack_timer.expire = opt; + llc->ack_timer.expire = opt * HZ; break; case LLC_OPT_P_TMR_EXP: if (opt > LLC_OPT_MAX_P_TMR_EXP) goto out; - llc->pf_cycle_timer.expire = opt; + llc->pf_cycle_timer.expire = opt * HZ; break; case LLC_OPT_REJ_TMR_EXP: if (opt > LLC_OPT_MAX_REJ_TMR_EXP) goto out; - llc->rej_sent_timer.expire = opt; + llc->rej_sent_timer.expire = opt * HZ; break; case LLC_OPT_BUSY_TMR_EXP: if (opt > LLC_OPT_MAX_BUSY_TMR_EXP) goto out; - llc->busy_state_timer.expire = opt; + llc->busy_state_timer.expire = opt * HZ; break; case LLC_OPT_TX_WIN: if (opt > LLC_OPT_MAX_WIN) @@ -942,17 +942,17 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, goto out; switch (optname) { case LLC_OPT_RETRY: - val = llc->n2; break; + val = llc->n2; break; case LLC_OPT_SIZE: - val = llc->n1; break; + val = llc->n1; break; case LLC_OPT_ACK_TMR_EXP: - val = llc->ack_timer.expire; break; + val = llc->ack_timer.expire / HZ; break; case LLC_OPT_P_TMR_EXP: - val = llc->pf_cycle_timer.expire; break; + val = llc->pf_cycle_timer.expire / HZ; break; case LLC_OPT_REJ_TMR_EXP: - val = llc->rej_sent_timer.expire; break; + val = llc->rej_sent_timer.expire / HZ; break; case LLC_OPT_BUSY_TMR_EXP: - val = llc->busy_state_timer.expire; break; + val = llc->busy_state_timer.expire / HZ; break; case LLC_OPT_TX_WIN: val = llc->k; break; case LLC_OPT_RX_WIN: @@ -999,6 +999,13 @@ static struct proto_ops llc_ui_ops = { extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); +static char llc_proc_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; +static char llc_sysctl_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the sysctl entries\n"; +static char llc_sock_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the network family\n"; + static int __init llc2_init(void) { int rc = proto_register(&llc_proto, 0); @@ -1010,13 +1017,28 @@ static int __init llc2_init(void) llc_station_init(); llc_ui_sap_last_autoport = LLC_SAP_DYN_START; rc = llc_proc_init(); - if (rc != 0) + if (rc != 0) { + printk(llc_proc_err_msg); goto out_unregister_llc_proto; - sock_register(&llc_ui_family_ops); + } + rc = llc_sysctl_init(); + if (rc) { + printk(llc_sysctl_err_msg); + goto out_proc; + } + rc = sock_register(&llc_ui_family_ops); + if (rc) { + printk(llc_sock_err_msg); + goto out_sysctl; + } llc_add_pack(LLC_DEST_SAP, llc_sap_handler); llc_add_pack(LLC_DEST_CONN, llc_conn_handler); out: return rc; +out_sysctl: + llc_sysctl_exit(); +out_proc: + llc_proc_exit(); out_unregister_llc_proto: proto_unregister(&llc_proto); goto out; @@ -1029,6 +1051,7 @@ static void __exit llc2_exit(void) llc_remove_pack(LLC_DEST_CONN); sock_unregister(PF_LLC); llc_proc_exit(); + llc_sysctl_exit(); proto_unregister(&llc_proto); } diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 50c827e13a9f..a4daa91003dd 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -620,7 +620,7 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb) if (!llc->remote_busy_flag) { llc->remote_busy_flag = 1; mod_timer(&llc->busy_state_timer.timer, - jiffies + llc->busy_state_timer.expire * HZ); + jiffies + llc->busy_state_timer.expire); } return 0; } @@ -853,7 +853,7 @@ int llc_conn_ac_start_p_timer(struct sock *sk, struct sk_buff *skb) llc_conn_set_p_flag(sk, 1); mod_timer(&llc->pf_cycle_timer.timer, - jiffies + llc->pf_cycle_timer.expire * HZ); + jiffies + llc->pf_cycle_timer.expire); return 0; } @@ -1131,7 +1131,7 @@ int llc_conn_ac_start_ack_timer(struct sock *sk, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(sk); - mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire * HZ); + mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire); return 0; } @@ -1140,7 +1140,7 @@ int llc_conn_ac_start_rej_timer(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); mod_timer(&llc->rej_sent_timer.timer, - jiffies + llc->rej_sent_timer.expire * HZ); + jiffies + llc->rej_sent_timer.expire); return 0; } @@ -1151,7 +1151,7 @@ int llc_conn_ac_start_ack_tmr_if_not_running(struct sock *sk, if (!timer_pending(&llc->ack_timer.timer)) mod_timer(&llc->ack_timer.timer, - jiffies + llc->ack_timer.expire * HZ); + jiffies + llc->ack_timer.expire); return 0; } @@ -1199,7 +1199,7 @@ int llc_conn_ac_upd_nr_received(struct sock *sk, struct sk_buff *skb) } if (unacked) mod_timer(&llc->ack_timer.timer, - jiffies + llc->ack_timer.expire * HZ); + jiffies + llc->ack_timer.expire); } else if (llc->failed_data_req) { u8 f_bit; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index ce7b893ed1ab..d3783f8ee481 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -40,6 +40,11 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, /* Offset table on connection states transition diagram */ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; +int sysctl_llc2_ack_timeout = LLC2_ACK_TIME * HZ; +int sysctl_llc2_p_timeout = LLC2_P_TIME * HZ; +int sysctl_llc2_rej_timeout = LLC2_REJ_TIME * HZ; +int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; + /** * llc_conn_state_process - sends event to connection state machine * @sk: connection @@ -799,22 +804,22 @@ static void llc_sk_init(struct sock* sk) llc->dec_step = llc->connect_step = 1; init_timer(&llc->ack_timer.timer); - llc->ack_timer.expire = LLC_ACK_TIME; + llc->ack_timer.expire = sysctl_llc2_ack_timeout; llc->ack_timer.timer.data = (unsigned long)sk; llc->ack_timer.timer.function = llc_conn_ack_tmr_cb; init_timer(&llc->pf_cycle_timer.timer); - llc->pf_cycle_timer.expire = LLC_P_TIME; + llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; llc->pf_cycle_timer.timer.data = (unsigned long)sk; llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb; init_timer(&llc->rej_sent_timer.timer); - llc->rej_sent_timer.expire = LLC_REJ_TIME; + llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; llc->rej_sent_timer.timer.data = (unsigned long)sk; llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb; init_timer(&llc->busy_state_timer.timer); - llc->busy_state_timer.expire = LLC_BUSY_TIME; + llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; llc->busy_state_timer.timer.data = (unsigned long)sk; llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 85a7ac276141..2d764b0382ce 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -50,6 +50,10 @@ struct llc_station { struct sk_buff_head mac_pdu_q; }; +#define LLC_STATION_ACK_TIME (3 * HZ) + +int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME; + /* Types of events (possible values in 'ev->type') */ #define LLC_STATION_EV_TYPE_SIMPLE 1 #define LLC_STATION_EV_TYPE_CONDITION 2 @@ -218,7 +222,8 @@ static void llc_station_send_pdu(struct sk_buff *skb) static int llc_station_ac_start_ack_timer(struct sk_buff *skb) { - mod_timer(&llc_main_station.ack_timer, jiffies + LLC_ACK_TIME * HZ); + mod_timer(&llc_main_station.ack_timer, + jiffies + sysctl_llc_station_ack_timeout); return 0; } @@ -687,7 +692,8 @@ int __init llc_station_init(void) init_timer(&llc_main_station.ack_timer); llc_main_station.ack_timer.data = (unsigned long)&llc_main_station; llc_main_station.ack_timer.function = llc_station_ack_tmr_cb; - + llc_main_station.ack_timer.expires = jiffies + + sysctl_llc_station_ack_timeout; skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; @@ -695,7 +701,6 @@ int __init llc_station_init(void) llc_set_station_handler(llc_station_rcv); ev = llc_station_ev(skb); memset(ev, 0, sizeof(*ev)); - llc_main_station.ack_timer.expires = jiffies + 3 * HZ; llc_main_station.maximum_retry = 1; llc_main_station.state = LLC_STATION_STATE_DOWN; ev->type = LLC_STATION_EV_TYPE_SIMPLE; diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c new file mode 100644 index 000000000000..4d99d2f27a21 --- /dev/null +++ b/net/llc/sysctl_net_llc.c @@ -0,0 +1,136 @@ +/* + * sysctl_net_llc.c: sysctl interface to LLC net subsystem. + * + * Arnaldo Carvalho de Melo + */ + +#include +#include +#include +#include + +#ifndef CONFIG_SYSCTL +#error This file should not be compiled without CONFIG_SYSCTL defined +#endif + +extern int sysctl_llc2_ack_timeout; +extern int sysctl_llc2_busy_timeout; +extern int sysctl_llc2_p_timeout; +extern int sysctl_llc2_rej_timeout; +extern int sysctl_llc_station_ack_timeout; + +static struct ctl_table llc2_timeout_table[] = { + { + .ctl_name = NET_LLC2_ACK_TIMEOUT, + .procname = "ack", + .data = &sysctl_llc2_ack_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_BUSY_TIMEOUT, + .procname = "busy", + .data = &sysctl_llc2_busy_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_P_TIMEOUT, + .procname = "p", + .data = &sysctl_llc2_p_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_REJ_TIMEOUT, + .procname = "rej", + .data = &sysctl_llc2_rej_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { 0 }, +}; + +static struct ctl_table llc_station_table[] = { + { + .ctl_name = NET_LLC_STATION_ACK_TIMEOUT, + .procname = "ack_timeout", + .data = &sysctl_llc_station_ack_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { 0 }, +}; + +static struct ctl_table llc2_dir_timeout_table[] = { + { + .ctl_name = NET_LLC2, + .procname = "timeout", + .mode = 0555, + .child = llc2_timeout_table, + }, + { 0 }, +}; + +static struct ctl_table llc_table[] = { + { + .ctl_name = NET_LLC2, + .procname = "llc2", + .mode = 0555, + .child = llc2_dir_timeout_table, + }, + { + .ctl_name = NET_LLC_STATION, + .procname = "station", + .mode = 0555, + .child = llc_station_table, + }, + { 0 }, +}; + +static struct ctl_table llc_dir_table[] = { + { + .ctl_name = NET_LLC, + .procname = "llc", + .mode = 0555, + .child = llc_table, + }, + { 0 }, +}; + +static struct ctl_table llc_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = llc_dir_table, + }, + { 0 }, +}; + +static struct ctl_table_header *llc_table_header; + +int __init llc_sysctl_init(void) +{ + llc_table_header = register_sysctl_table(llc_root_table, 1); + + return llc_table_header ? 0 : -ENOMEM; +} + +void llc_sysctl_exit(void) +{ + if (llc_table_header) { + unregister_sysctl_table(llc_table_header); + llc_table_header = NULL; + } +} -- cgit v1.2.3 From 249ff1c6d35fd32ca945967c3f0b948210a96baa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:32:10 -0300 Subject: [LLC]: Use some more likely/unlikely Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 10 ++++------ net/llc/llc_c_ac.c | 52 +++++++++++++++++++++++++-------------------------- net/llc/llc_if.c | 9 +++------ net/llc/llc_output.c | 2 +- net/llc/llc_s_ac.c | 10 +++++----- net/llc/llc_station.c | 6 +++--- 6 files changed, 42 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index ef125345a2db..7e9cf3214b88 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -116,12 +116,12 @@ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) struct llc_sock* llc = llc_sk(sk); int rc = 0; - if (llc_data_accept_state(llc->state) || llc->p_flag) { + if (unlikely(llc_data_accept_state(llc->state) || llc->p_flag)) { long timeout = sock_sndtimeo(sk, noblock); rc = llc_ui_wait_for_busy_core(sk, timeout); } - if (!rc) + if (unlikely(!rc)) rc = llc_build_and_send_pkt(sk, skb); return rc; } @@ -762,15 +762,13 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); - if (rc) - dprintk("%s: llc_ui_send_data failed: %d\n", __FUNCTION__, rc); out: - if (rc) + if (rc) { kfree_skb(skb); release: - if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __FUNCTION__, llc->laddr.lsap, llc->daddr.lsap, rc); + } release_sock(sk); return rc ? : copied; } diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index a4daa91003dd..9d9b6c8aeb44 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -221,7 +221,7 @@ int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_disc_cmd(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); llc_conn_ac_set_p_flag_1(sk, skb); @@ -248,7 +248,7 @@ int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_dm_rsp(nskb, f_bit); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -272,7 +272,7 @@ int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_dm_rsp(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -305,7 +305,7 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -331,7 +331,7 @@ int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_frmr_rsp(nskb, pdu, 0, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -360,7 +360,7 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -381,7 +381,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -398,7 +398,7 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -415,7 +415,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -447,7 +447,7 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) + if (likely(!rc)) llc_conn_send_pdu(sk, nskb); else kfree_skb(skb); @@ -482,7 +482,7 @@ int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rej_cmd(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -506,7 +506,7 @@ int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rej_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -530,7 +530,7 @@ int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rej_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -554,7 +554,7 @@ int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rnr_cmd(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -578,7 +578,7 @@ int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -602,7 +602,7 @@ int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -638,7 +638,7 @@ int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -662,7 +662,7 @@ int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rr_cmd(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -687,7 +687,7 @@ int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -711,7 +711,7 @@ int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -735,7 +735,7 @@ int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -759,7 +759,7 @@ int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -796,7 +796,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_sabme_cmd(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, dmac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); llc_conn_set_p_flag(sk, 1); @@ -824,7 +824,7 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_ua_rsp(nskb, f_bit); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -924,7 +924,7 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -978,7 +978,7 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, llc->ack_pf, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 0f84f66018e4..764dbd704051 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -47,14 +47,11 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) int rc = -ECONNABORTED; struct llc_sock *llc = llc_sk(sk); - if (llc->state == LLC_CONN_STATE_ADM) + if (unlikely(llc->state == LLC_CONN_STATE_ADM)) goto out; rc = -EBUSY; - if (llc_data_accept_state(llc->state)) { /* data_conn_refuse */ - llc->failed_data_req = 1; - goto out; - } - if (llc->p_flag) { + if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */ + llc->p_flag)) { llc->failed_data_req = 1; goto out; } diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index ab5784cf163e..b4d55b6abb67 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -98,7 +98,7 @@ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, dsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 270b2f2030cd..6acdebf03b2b 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -58,7 +58,7 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } @@ -81,7 +81,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } @@ -110,7 +110,7 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(nskb, mac_sa, mac_da); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(nskb); out: return rc; @@ -134,7 +134,7 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } @@ -155,7 +155,7 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); rc = llc_mac_hdr_init(nskb, mac_sa, mac_da); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(nskb); out: return rc; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 2d764b0382ce..ec4693fe312f 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -261,7 +261,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127); rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, llc_station_mac_sa); - if (rc) + if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); out: @@ -285,7 +285,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 127); rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da); - if (rc) + if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); out: @@ -309,7 +309,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da); - if (rc) + if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); out: -- cgit v1.2.3 From afdbe35787ea3390af0f1dd38b3dd9d8a8d313e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:37:07 -0300 Subject: [LLC]: Use sk_wait_data Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7e9cf3214b88..f536369cdb52 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -39,7 +39,6 @@ static struct proto_ops llc_ui_ops; static int llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); -static int llc_ui_wait_for_data(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #if 0 @@ -524,16 +523,19 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout) return timeout; } -static int llc_ui_wait_for_data(struct sock *sk, long timeout) +static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { DEFINE_WAIT(wait); - int rc = 0; + struct llc_sock *llc = llc_sk(sk); + int rc; while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || - (!skb_queue_empty(&sk->sk_receive_queue)))) + (!llc_data_accept_state(llc->state) && + !llc->p_flag))) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -541,34 +543,36 @@ static int llc_ui_wait_for_data(struct sock *sk, long timeout) rc = -EAGAIN; if (!timeout) break; - rc = 0; } finish_wait(sk->sk_sleep, &wait); return rc; } -static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) +int llc_wait_data(struct sock *sk, long timeo) { - DEFINE_WAIT(wait); - struct llc_sock *llc = llc_sk(sk); int rc; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + /* + * POSIX 1003.1g mandates this order. + */ + if (sk->sk_err) { + rc = sock_error(sk); + break; + } rc = 0; - if (sk_wait_event(sk, &timeout, - (sk->sk_shutdown & RCV_SHUTDOWN) || - (!llc_data_accept_state(llc->state) && - !llc->p_flag))) + if (sk->sk_shutdown & RCV_SHUTDOWN) break; - rc = -ERESTARTSYS; + rc = -EAGAIN; + if (!timeo) + break; + rc = sock_intr_errno(timeo); if (signal_pending(current)) break; - rc = -EAGAIN; - if (!timeout) + rc = 0; + if (sk_wait_data(sk, &timeo)) break; } - finish_wait(sk->sk_sleep, &wait); return rc; } @@ -599,7 +603,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) goto out; /* wait for a connection to arrive. */ if (skb_queue_empty(&sk->sk_receive_queue)) { - rc = llc_ui_wait_for_data(sk, sk->sk_rcvtimeo); + rc = llc_wait_data(sk, sk->sk_rcvtimeo); if (rc) goto out; } @@ -658,7 +662,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); lock_sock(sk); if (skb_queue_empty(&sk->sk_receive_queue)) { - rc = llc_ui_wait_for_data(sk, sock_rcvtimeo(sk, noblock)); + rc = llc_wait_data(sk, sock_rcvtimeo(sk, noblock)); if (rc) goto out; } -- cgit v1.2.3 From 04e4223f44b89e50f275cb6b95a58ebe2c4909be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:40:59 -0300 Subject: [LLC]: Do better struct sock accounting on skbs Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc_sap.h | 6 ++++-- net/llc/af_llc.c | 2 -- net/llc/llc_c_ac.c | 2 +- net/llc/llc_conn.c | 14 +++++++++----- net/llc/llc_sap.c | 8 ++++---- 5 files changed, 18 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h index 6dd4cdccca42..9dcfcf4a4abb 100644 --- a/include/net/llc_sap.h +++ b/include/net/llc_sap.h @@ -12,11 +12,13 @@ * See the GNU General Public License for more details. */ struct llc_sap; -struct sk_buff; struct net_device; +struct sk_buff; +struct sock; extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb); -extern void llc_save_primitive(struct sk_buff* skb, unsigned char prim); +extern void llc_save_primitive(struct sock *sk, struct sk_buff* skb, + unsigned char prim); extern struct sk_buff *llc_alloc_frame(struct net_device *dev); extern void llc_build_and_send_test_pkt(struct llc_sap *sap, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f536369cdb52..ad9aad807aa8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -628,7 +628,6 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) /* put original socket back into a clean listen state. */ sk->sk_state = TCP_LISTEN; sk->sk_ack_backlog--; - skb->sk = NULL; dprintk("%s: ok success on %02X, client on %02X\n", __FUNCTION__, llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap); frees: @@ -740,7 +739,6 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); if (!skb) goto release; - skb->sk = sk; skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 9d9b6c8aeb44..c1e75103189a 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1335,7 +1335,7 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); - skb->sk = sk; + skb_set_owner_r(skb, sk); ev->type = type; llc_process_tmr_ev(sk, skb); } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index d3783f8ee481..9f0e10e0978d 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -90,8 +90,8 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) switch (ev->ind_prim) { case LLC_DATA_PRIM: - llc_save_primitive(skb, LLC_DATA_PRIM); - if (sock_queue_rcv_skb(sk, skb)) { + llc_save_primitive(sk, skb, LLC_DATA_PRIM); + if (unlikely(sock_queue_rcv_skb(sk, skb))) { /* * shouldn't happen */ @@ -103,6 +103,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) case LLC_CONN_PRIM: { struct sock *parent = skb->sk; + skb_orphan(skb); + /* + * Set the skb->sk to the new struct sock, so that at accept + * type the upper layer can get the newly created struct sock. + */ skb->sk = sk; skb_queue_tail(&parent->sk_receive_queue, skb); sk->sk_state_change(parent); @@ -702,10 +707,9 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) memcpy(&llc->daddr, &saddr, sizeof(llc->daddr)); llc_sap_add_socket(sap, sk); sock_hold(sk); + skb_set_owner_r(skb, parent); sock_put(parent); - skb->sk = parent; - } else - skb->sk = sk; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 0adaa289bf0a..9f064b3a4335 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -45,12 +45,12 @@ struct sk_buff *llc_alloc_frame(struct net_device *dev) return skb; } -void llc_save_primitive(struct sk_buff* skb, u8 prim) +void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) { struct sockaddr_llc *addr = llc_ui_skb_cb(skb); /* save primitive for use by the user. */ - addr->sllc_family = skb->sk->sk_family; + addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; addr->sllc_xid = prim == LLC_XID_PRIM; @@ -190,7 +190,7 @@ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) if (skb->sk->sk_state == TCP_LISTEN) kfree_skb(skb); else { - llc_save_primitive(skb, ev->prim); + llc_save_primitive(skb->sk, skb, ev->prim); /* queue skb to the user. */ if (sock_queue_rcv_skb(skb->sk, skb)) @@ -309,7 +309,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) sk = llc_lookup_dgram(sap, &laddr); if (sk) { - skb->sk = sk; + skb_set_owner_r(skb, sk); llc_sap_rcv(sap, skb); sock_put(sk); } else -- cgit v1.2.3 From 6e2144b76840be09924de1626e2dcd7b315f75b3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:43:05 -0300 Subject: [LLC]: Use refcounting with struct llc_sap Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc.h | 16 +++++++++++++++- include/net/llc_conn.h | 1 - net/802/p8022.c | 2 +- net/802/psnap.c | 2 +- net/llc/af_llc.c | 11 ++++++----- net/llc/llc_c_ac.c | 1 + net/llc/llc_conn.c | 28 ++-------------------------- net/llc/llc_core.c | 34 +++++++++++++++++++++++----------- net/llc/llc_input.c | 9 +++++++-- 9 files changed, 56 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/include/net/llc.h b/include/net/llc.h index 8b8e2be289b1..93e5b443a9a7 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -17,6 +17,8 @@ #include #include +#include + struct net_device; struct packet_type; struct sk_buff; @@ -44,6 +46,7 @@ struct llc_sap { unsigned char state; unsigned char p_bit; unsigned char f_bit; + atomic_t refcnt; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, @@ -81,7 +84,18 @@ extern struct llc_sap *llc_sap_open(unsigned char lsap, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)); -extern void llc_sap_close(struct llc_sap *sap); +static inline void llc_sap_hold(struct llc_sap *sap) +{ + atomic_inc(&sap->refcnt); +} + +static inline void llc_sap_put(struct llc_sap *sap) +{ + extern void llc_sap_close(struct llc_sap *sap); + + if (atomic_dec_and_test(&sap->refcnt)) + llc_sap_close(sap); +} extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 8a8ff4810135..b2889218c76a 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -115,5 +115,4 @@ extern void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk); extern u8 llc_data_accept_state(u8 state); extern void llc_build_offset_table(void); -extern int llc_release_sockets(struct llc_sap *sap); #endif /* LLC_CONN_H */ diff --git a/net/802/p8022.c b/net/802/p8022.c index b24817c63ca8..2530f35241cd 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -56,7 +56,7 @@ struct datalink_proto *register_8022_client(unsigned char type, void unregister_8022_client(struct datalink_proto *proto) { - llc_sap_close(proto->sap); + llc_sap_put(proto->sap); kfree(proto); } diff --git a/net/802/psnap.c b/net/802/psnap.c index ab80b1fab53c..4d638944d933 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -106,7 +106,7 @@ module_init(snap_init); static void __exit snap_exit(void) { - llc_sap_close(snap_sap); + llc_sap_put(snap_sap); } module_exit(snap_exit); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index ad9aad807aa8..a75b8f2aab19 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -21,6 +21,7 @@ * See the GNU General Public License for more details. */ #include +#include #include #include #include @@ -188,10 +189,6 @@ static int llc_ui_release(struct socket *sock) if (!sock_flag(sk, SOCK_ZAPPED)) llc_sap_remove_socket(llc->sap, sk); release_sock(sk); - if (llc->sap && hlist_empty(&llc->sap->sk_list.list)) { - llc_release_sockets(llc->sap); - llc_sap_close(llc->sap); - } if (llc->dev) dev_put(llc->dev); sock_put(sk); @@ -220,6 +217,7 @@ static int llc_ui_autoport(void) llc_ui_sap_last_autoport = i + 2; goto out; } + llc_sap_put(sap); } llc_ui_sap_last_autoport = LLC_SAP_DYN_START; tries++; @@ -310,6 +308,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -EBUSY; /* some other network layer is using the sap */ if (!sap) goto out; + llc_sap_hold(sap); } else { struct llc_addr laddr, daddr; struct sock *ask; @@ -326,7 +325,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) ask = llc_lookup_established(sap, &daddr, &laddr); if (ask) { sock_put(ask); - goto out; + goto out_put; } } llc->laddr.lsap = addr->sllc_sap; @@ -336,6 +335,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) llc_sap_add_socket(sap, sk); sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; +out_put: + llc_sap_put(sap); out: return rc; } diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index c1e75103189a..05236c2cbb9e 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -75,6 +75,7 @@ int llc_conn_ac_conn_ind(struct sock *sk, struct sk_buff *skb) llc->dev = skb->dev; ev->ind_prim = LLC_CONN_PRIM; rc = 0; + llc_sap_put(sap); } return rc; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 9f0e10e0978d..ab9e6d7e2875 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -633,6 +633,7 @@ static int llc_find_offset(int state, int ev_type) */ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) { + llc_sap_hold(sap); write_lock_bh(&sap->sk_list.lock); llc_sk(sk)->sap = sap; sk_add_node(sk, &sap->sk_list.list); @@ -652,6 +653,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) write_lock_bh(&sap->sk_list.lock); sk_del_node_init(sk); write_unlock_bh(&sap->sk_list.lock); + llc_sap_put(sap); } /** @@ -730,32 +732,6 @@ drop: static atomic_t llc_sock_nr; #endif -/** - * llc_release_sockets - releases all sockets in a sap - * @sap: sap to release its sockets - * - * Releases all connections of a sap. Returns 0 if all actions complete - * successfully, nonzero otherwise - */ -int llc_release_sockets(struct llc_sap *sap) -{ - int rc = 0; - struct sock *sk; - struct hlist_node *node; - - write_lock_bh(&sap->sk_list.lock); - - sk_for_each(sk, node, &sap->sk_list.list) { - llc_sk(sk)->state = LLC_CONN_STATE_TEMP; - - if (llc_send_disc(sk)) - rc = 1; - } - - write_unlock_bh(&sap->sk_list.lock); - return rc; -} - /** * llc_backlog_rcv - Processes rx frames and expired timers. * @sk: LLC sock (p8022 connection) diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 9727455bf0e7..9ccff1266b26 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -40,6 +40,7 @@ static struct llc_sap *llc_sap_alloc(void) sap->state = LLC_SAP_STATE_ACTIVE; memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN); rwlock_init(&sap->sk_list.lock); + atomic_set(&sap->refcnt, 1); } return sap; } @@ -52,9 +53,7 @@ static struct llc_sap *llc_sap_alloc(void) */ static void llc_add_sap(struct llc_sap *sap) { - write_lock_bh(&llc_sap_list_lock); list_add_tail(&sap->node, &llc_sap_list); - write_unlock_bh(&llc_sap_list_lock); } /** @@ -70,11 +69,25 @@ static void llc_del_sap(struct llc_sap *sap) write_unlock_bh(&llc_sap_list_lock); } +struct llc_sap *__llc_sap_find(unsigned char sap_value) +{ + struct llc_sap* sap; + + list_for_each_entry(sap, &llc_sap_list, node) + if (sap->laddr.lsap == sap_value) + goto out; + sap = NULL; +out: + return sap; +} + /** * llc_sap_find - searchs a SAP in station * @sap_value: sap to be found * * Searchs for a sap in the sap list of the LLC's station upon the sap ID. + * If the sap is found it will be refcounted and the user will have to do + * a llc_sap_put after use. * Returns the sap or %NULL if not found. */ struct llc_sap *llc_sap_find(unsigned char sap_value) @@ -82,11 +95,9 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) struct llc_sap* sap; read_lock_bh(&llc_sap_list_lock); - list_for_each_entry(sap, &llc_sap_list, node) - if (sap->laddr.lsap == sap_value) - goto out; - sap = NULL; -out: + sap = __llc_sap_find(sap_value); + if (sap) + llc_sap_hold(sap); read_unlock_bh(&llc_sap_list_lock); return sap; } @@ -106,19 +117,20 @@ struct llc_sap *llc_sap_open(unsigned char lsap, struct packet_type *pt, struct net_device *orig_dev)) { - struct llc_sap *sap = llc_sap_find(lsap); + struct llc_sap *sap = NULL; - if (sap) { /* SAP already exists */ - sap = NULL; + write_lock_bh(&llc_sap_list_lock); + if (__llc_sap_find(lsap)) /* SAP already exists */ goto out; - } sap = llc_sap_alloc(); if (!sap) goto out; sap->laddr.lsap = lsap; sap->rcv_func = func; + llc_sap_hold(sap); llc_add_sap(sap); out: + write_unlock_bh(&llc_sap_list_lock); return sap; } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 60c1acac7c97..789eec426451 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -166,17 +166,22 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ if (sap->rcv_func) { sap->rcv_func(skb, dev, pt, orig_dev); - goto out; + goto out_put; } dest = llc_pdu_type(skb); if (unlikely(!dest || !llc_type_handlers[dest - 1])) - goto drop; + goto drop_put; llc_type_handlers[dest - 1](sap, skb); +out_put: + llc_sap_put(sap); out: return 0; drop: kfree_skb(skb); goto out; +drop_put: + kfree_skb(skb); + goto out_put; handle_station: if (!llc_station_handler) goto drop; -- cgit v1.2.3 From cf309e3fb863b7a245b91f816193957f6daf786f Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Thu, 22 Sep 2005 04:44:55 -0300 Subject: [LLC]: Fix for Bugzilla ticket #5156 Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/core/dev.c | 2 ++ net/llc/af_llc.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index c01511e3d0c1..37c881070963 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -574,6 +574,8 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) return dev; } +EXPORT_SYMBOL(dev_getbyhwaddr); + struct net_device *dev_getfirstbyhwtype(unsigned short type) { struct net_device *dev; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index a75b8f2aab19..0607cd3fd12c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -296,6 +296,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; + rc = -ENODEV; + rtnl_lock(); + llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + rtnl_unlock(); + if (!llc->dev) + goto out; if (!addr->sllc_sap) { rc = -EUSERS; addr->sllc_sap = llc_ui_autoport(); -- cgit v1.2.3 From 5564af21ae7900889c5151e5b16bd42cdda11a77 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Thu, 22 Sep 2005 04:46:44 -0300 Subject: [LLC]: Fix for Bugzilla ticket #5157 Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/af_llc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0607cd3fd12c..3361ae900e2b 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -418,9 +418,9 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, rc = llc_ui_autobind(sock, addr); if (rc) goto out; - llc->daddr.lsap = addr->sllc_sap; - memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); } + llc->daddr.lsap = addr->sllc_sap; + memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); -- cgit v1.2.3 From 096f0eb1dff326ddebfedeb128fb48d5b7ca75e1 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Thu, 22 Sep 2005 04:48:46 -0300 Subject: [LLC]: Fix llc_fixup_skb() bug llc_fixup_skb() had a bug dropping 3 bytes packets (like UA frames). Token ring doesn't pad these frames. Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/llc/llc_input.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 789eec426451..8f3addf0724c 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -99,15 +99,19 @@ out: static inline int llc_fixup_skb(struct sk_buff *skb) { u8 llc_len = 2; - struct llc_pdu_sn *pdu; + struct llc_pdu_un *pdu; if (unlikely(!pskb_may_pull(skb, sizeof(*pdu)))) return 0; - pdu = (struct llc_pdu_sn *)skb->data; + pdu = (struct llc_pdu_un *)skb->data; if ((pdu->ctrl_1 & LLC_PDU_TYPE_MASK) == LLC_PDU_TYPE_U) llc_len = 1; llc_len += 2; + + if (unlikely(!pskb_may_pull(skb, llc_len))) + return 0; + skb->h.raw += llc_len; skb_pull(skb, llc_len); if (skb->protocol == htons(ETH_P_802_2)) { -- cgit v1.2.3 From 0519d8fbabc4eb215a8263f29143ccd86c328157 Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Thu, 22 Sep 2005 04:51:56 -0300 Subject: [TR]: Set correct frame type for SNAP packets Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- net/802/tr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/802/tr.c b/net/802/tr.c index 1bb7dc1b85cd..1eaa3d19d8bf 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -238,7 +238,7 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) return trllc->ethertype; } - return ntohs(ETH_P_802_2); + return ntohs(ETH_P_TR_802_2); } /* -- cgit v1.2.3 From 2928c19e1086e2f1e90d05931437ab6f1e4cfdc8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 05:14:33 -0300 Subject: [LLC]: Fix sparse warnings Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc.h | 13 +++++++++++-- include/net/llc_conn.h | 3 ++- net/llc/af_llc.c | 5 +---- net/llc/llc_c_ac.c | 2 +- net/llc/llc_conn.c | 3 ++- net/llc/llc_core.c | 2 +- net/llc/sysctl_net_llc.c | 7 +------ 7 files changed, 19 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/llc.h b/include/net/llc.h index 93e5b443a9a7..1adb2ef3f6f7 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -89,10 +89,10 @@ static inline void llc_sap_hold(struct llc_sap *sap) atomic_inc(&sap->refcnt); } +extern void llc_sap_close(struct llc_sap *sap); + static inline void llc_sap_put(struct llc_sap *sap) { - extern void llc_sap_close(struct llc_sap *sap); - if (atomic_dec_and_test(&sap->refcnt)) llc_sap_close(sap); } @@ -102,6 +102,9 @@ extern struct llc_sap *llc_sap_find(unsigned char sap_value); extern int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, unsigned char *dmac, unsigned char dsap); +extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); +extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); + extern int llc_station_init(void); extern void llc_station_exit(void); @@ -115,6 +118,12 @@ extern void llc_proc_exit(void); #ifdef CONFIG_SYSCTL extern int llc_sysctl_init(void); extern void llc_sysctl_exit(void); + +extern int sysctl_llc2_ack_timeout; +extern int sysctl_llc2_busy_timeout; +extern int sysctl_llc2_p_timeout; +extern int sysctl_llc2_rej_timeout; +extern int sysctl_llc_station_ack_timeout; #else #define llc_sysctl_init() (0) #define llc_sysctl_exit() do { } while(0) diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index b2889218c76a..e44f494bbef1 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -92,7 +92,8 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) return skb->cb[sizeof(skb->cb) - 1]; } -extern struct sock *llc_sk_alloc(int family, int priority, struct proto *prot); +extern struct sock *llc_sk_alloc(int family, unsigned int __nocast priority, + struct proto *prot); extern void llc_sk_free(struct sock *sk); extern void llc_sk_reset(struct sock *sk); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 3361ae900e2b..7aa51eb79b13 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -555,7 +555,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) return rc; } -int llc_wait_data(struct sock *sk, long timeo) +static int llc_wait_data(struct sock *sk, long timeo) { int rc; @@ -1003,9 +1003,6 @@ static struct proto_ops llc_ui_ops = { .sendpage = sock_no_sendpage, }; -extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); -extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); - static char llc_proc_err_msg[] __initdata = KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; static char llc_sysctl_err_msg[] __initdata = diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 05236c2cbb9e..8f7b46d20638 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1321,7 +1321,7 @@ int llc_conn_ac_set_vs_nr(struct sock *sk, struct sk_buff *skb) return 0; } -int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) { llc_sk(sk)->vS = (llc_sk(sk)->vS + 1) % 128; return 0; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index ab9e6d7e2875..76f94e0d840d 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -819,7 +819,8 @@ static void llc_sk_init(struct sock* sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, int priority, struct proto *prot) +struct sock *llc_sk_alloc(int family, unsigned int __nocast priority, + struct proto *prot) { struct sock *sk = sk_alloc(family, priority, prot, 1); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 9ccff1266b26..ab0fcd32fd84 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -69,7 +69,7 @@ static void llc_del_sap(struct llc_sap *sap) write_unlock_bh(&llc_sap_list_lock); } -struct llc_sap *__llc_sap_find(unsigned char sap_value) +static struct llc_sap *__llc_sap_find(unsigned char sap_value) { struct llc_sap* sap; diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 4d99d2f27a21..d1eaddb13633 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -8,17 +8,12 @@ #include #include #include +#include #ifndef CONFIG_SYSCTL #error This file should not be compiled without CONFIG_SYSCTL defined #endif -extern int sysctl_llc2_ack_timeout; -extern int sysctl_llc2_busy_timeout; -extern int sysctl_llc2_p_timeout; -extern int sysctl_llc2_rej_timeout; -extern int sysctl_llc_station_ack_timeout; - static struct ctl_table llc2_timeout_table[] = { { .ctl_name = NET_LLC2_ACK_TIMEOUT, -- cgit v1.2.3 From d389424e00f9097cd24b3df4ca0ab7221f140eeb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 07:57:21 -0300 Subject: [LLC]: Fix the accept path Borrowing the structure of TCP/IP for this. On the receive of new connections I was bh_lock_socking the _new_ sock, not the listening one, duh, now it survives the ssh connections storm I've been using to test this specific bug. Also fixes send side skb sock accounting. Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc_sap.h | 3 +- net/llc/llc_c_ac.c | 64 +++++++++-------------- net/llc/llc_conn.c | 142 +++++++++++++++++++++++++++++++++----------------- net/llc/llc_if.c | 2 + net/llc/llc_s_ac.c | 4 +- net/llc/llc_sap.c | 4 +- net/llc/llc_station.c | 6 +-- 7 files changed, 132 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h index 9dcfcf4a4abb..2c56dbece729 100644 --- a/include/net/llc_sap.h +++ b/include/net/llc_sap.h @@ -19,7 +19,8 @@ struct sock; extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb); extern void llc_save_primitive(struct sock *sk, struct sk_buff* skb, unsigned char prim); -extern struct sk_buff *llc_alloc_frame(struct net_device *dev); +extern struct sk_buff *llc_alloc_frame(struct sock *sk, + struct net_device *dev); extern void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 8f7b46d20638..b0bcfb1f12dd 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -60,24 +60,10 @@ int llc_conn_ac_clear_remote_busy(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_conn_ind(struct sock *sk, struct sk_buff *skb) { - int rc = -ENOTCONN; - u8 dsap; - struct llc_sap *sap; - - llc_pdu_decode_dsap(skb, &dsap); - sap = llc_sap_find(dsap); - if (sap) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - struct llc_sock *llc = llc_sk(sk); + struct llc_conn_state_ev *ev = llc_conn_ev(skb); - llc_pdu_decode_sa(skb, llc->daddr.mac); - llc_pdu_decode_da(skb, llc->laddr.mac); - llc->dev = skb->dev; - ev->ind_prim = LLC_CONN_PRIM; - rc = 0; - llc_sap_put(sap); - } - return rc; + ev->ind_prim = LLC_CONN_PRIM; + return 0; } int llc_conn_ac_conn_confirm(struct sock *sk, struct sk_buff *skb) @@ -213,7 +199,7 @@ int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -238,7 +224,7 @@ int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -264,7 +250,7 @@ int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -297,7 +283,7 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb) llc_pdu_decode_pf_bit(skb, &f_bit); else f_bit = 0; - nskb = llc_alloc_frame(llc->dev); + nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -321,7 +307,7 @@ int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -351,7 +337,7 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); llc_pdu_decode_pf_bit(skb, &f_bit); - nskb = llc_alloc_frame(llc->dev); + nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); @@ -439,7 +425,7 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk, struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -474,7 +460,7 @@ int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -498,7 +484,7 @@ int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -522,7 +508,7 @@ int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -546,7 +532,7 @@ int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -570,7 +556,7 @@ int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -594,7 +580,7 @@ int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -630,7 +616,7 @@ int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -654,7 +640,7 @@ int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -678,7 +664,7 @@ int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -703,7 +689,7 @@ int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -727,7 +713,7 @@ int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -751,7 +737,7 @@ int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -785,7 +771,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -814,7 +800,7 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) u8 f_bit; int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); llc_pdu_decode_pf_bit(skb, &f_bit); if (nskb) { @@ -970,7 +956,7 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 76f94e0d840d..e10ce5adb104 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -58,7 +58,7 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) { int rc; - struct llc_sock *llc = llc_sk(sk); + struct llc_sock *llc = llc_sk(skb->sk); struct llc_conn_state_ev *ev = llc_conn_ev(skb); /* @@ -68,7 +68,10 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) */ skb_get(skb); ev->ind_prim = ev->cfm_prim = 0; - rc = llc_conn_service(sk, skb); /* sending event to state machine */ + /* + * Send event to state machine + */ + rc = llc_conn_service(skb->sk, skb); if (unlikely(rc != 0)) { printk(KERN_ERR "%s: llc_conn_service failed\n", __FUNCTION__); goto out_kfree_skb; @@ -100,18 +103,14 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } break; - case LLC_CONN_PRIM: { - struct sock *parent = skb->sk; - - skb_orphan(skb); + case LLC_CONN_PRIM: /* - * Set the skb->sk to the new struct sock, so that at accept - * type the upper layer can get the newly created struct sock. + * Can't be sock_queue_rcv_skb, because we have to leave the + * skb->sk pointing to the newly created struct sock in + * llc_conn_handler. -acme */ - skb->sk = sk; - skb_queue_tail(&parent->sk_receive_queue, skb); - sk->sk_state_change(parent); - } + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_state_change(sk); break; case LLC_DISC_PRIM: sock_hold(sk); @@ -475,7 +474,7 @@ static int llc_exec_conn_trans_actions(struct sock *sk, } /** - * llc_lookup_established - Finds connection for the remote/local sap/mac + * __llc_lookup_established - Finds connection for the remote/local sap/mac * @sap: SAP * @daddr: address of remote LLC (MAC + SAP) * @laddr: address of local LLC (MAC + SAP) @@ -483,14 +482,16 @@ static int llc_exec_conn_trans_actions(struct sock *sk, * Search connection list of the SAP and finds connection using the remote * mac, remote sap, local mac, and local sap. Returns pointer for * connection found, %NULL otherwise. + * Caller has to make sure local_bh is disabled. */ -struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, - struct llc_addr *laddr) +static struct sock *__llc_lookup_established(struct llc_sap *sap, + struct llc_addr *daddr, + struct llc_addr *laddr) { struct sock *rc; struct hlist_node *node; - read_lock_bh(&sap->sk_list.lock); + read_lock(&sap->sk_list.lock); sk_for_each(rc, node, &sap->sk_list.list) { struct llc_sock *llc = llc_sk(rc); @@ -504,10 +505,22 @@ struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, } rc = NULL; found: - read_unlock_bh(&sap->sk_list.lock); + read_unlock(&sap->sk_list.lock); return rc; } +struct sock *llc_lookup_established(struct llc_sap *sap, + struct llc_addr *daddr, + struct llc_addr *laddr) +{ + struct sock *sk; + + local_bh_disable(); + sk = __llc_lookup_established(sap, daddr, laddr); + local_bh_enable(); + return sk; +} + /** * llc_lookup_listener - Finds listener for local MAC + SAP * @sap: SAP @@ -516,6 +529,7 @@ found: * Search connection list of the SAP and finds connection listening on * local mac, and local sap. Returns pointer for parent socket found, * %NULL otherwise. + * Caller has to make sure local_bh is disabled. */ static struct sock *llc_lookup_listener(struct llc_sap *sap, struct llc_addr *laddr) @@ -523,7 +537,7 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap, struct sock *rc; struct hlist_node *node; - read_lock_bh(&sap->sk_list.lock); + read_lock(&sap->sk_list.lock); sk_for_each(rc, node, &sap->sk_list.list) { struct llc_sock *llc = llc_sk(rc); @@ -537,10 +551,19 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap, } rc = NULL; found: - read_unlock_bh(&sap->sk_list.lock); + read_unlock(&sap->sk_list.lock); return rc; } +static struct sock *__llc_lookup(struct llc_sap *sap, + struct llc_addr *daddr, + struct llc_addr *laddr) +{ + struct sock *sk = __llc_lookup_established(sap, daddr, laddr); + + return sk ? : llc_lookup_listener(sap, laddr); +} + /** * llc_data_accept_state - designates if in this state data can be sent. * @state: state of connection. @@ -666,15 +689,34 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); - struct llc_sock *llc = llc_sk(sk); - if (!llc->dev) - llc->dev = skb->dev; ev->type = LLC_CONN_EV_TYPE_PDU; ev->reason = 0; return llc_conn_state_process(sk, skb); } +static struct sock *llc_create_incoming_sock(struct sock *sk, + struct net_device *dev, + struct llc_addr *saddr, + struct llc_addr *daddr) +{ + struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + sk->sk_prot); + struct llc_sock *newllc, *llc = llc_sk(sk); + + if (!newsk) + goto out; + newllc = llc_sk(newsk); + memcpy(&newllc->laddr, daddr, sizeof(newllc->laddr)); + memcpy(&newllc->daddr, saddr, sizeof(newllc->daddr)); + newllc->dev = dev; + dev_hold(dev); + llc_sap_add_socket(llc->sap, newsk); + llc_sap_hold(llc->sap); +out: + return newsk; +} + void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr saddr, daddr; @@ -685,34 +727,35 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_da(skb, daddr.mac); llc_pdu_decode_dsap(skb, &daddr.lsap); - sk = llc_lookup_established(sap, &saddr, &daddr); - if (!sk) { + sk = __llc_lookup(sap, &saddr, &daddr); + if (!sk) + goto drop; + + bh_lock_sock(sk); + /* + * This has to be done here and not at the upper layer ->accept + * method because of the way the PROCOM state machine works: + * it needs to set several state variables (see, for instance, + * llc_adm_actions_2 in net/llc/llc_c_st.c) and send a packet to + * the originator of the new connection, and this state has to be + * in the newly created struct sock private area. -acme + */ + if (unlikely(sk->sk_state == TCP_LISTEN)) { + struct sock *newsk = llc_create_incoming_sock(sk, skb->dev, + &saddr, &daddr); + if (!newsk) + goto drop_unlock; + skb_set_owner_r(skb, newsk); + } else { /* - * Didn't find an active connection; verify if there - * is a listening socket for this llc addr + * Can't be skb_set_owner_r, this will be done at the + * llc_conn_state_process function, later on, when we will use + * skb_queue_rcv_skb to send it to upper layers, this is + * another trick required to cope with how the PROCOM state + * machine works. -acme */ - struct llc_sock *llc; - struct sock *parent = llc_lookup_listener(sap, &daddr); - - if (!parent) { - dprintk("llc_lookup_listener failed!\n"); - goto drop; - } - - sk = llc_sk_alloc(parent->sk_family, GFP_ATOMIC, parent->sk_prot); - if (!sk) { - sock_put(parent); - goto drop; - } - llc = llc_sk(sk); - memcpy(&llc->laddr, &daddr, sizeof(llc->laddr)); - memcpy(&llc->daddr, &saddr, sizeof(llc->daddr)); - llc_sap_add_socket(sap, sk); - sock_hold(sk); - skb_set_owner_r(skb, parent); - sock_put(parent); + skb->sk = sk; } - bh_lock_sock(sk); if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); else { @@ -720,11 +763,16 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) llc_set_backlog_type(skb, LLC_PACKET); sk_add_backlog(sk, skb); } +out: bh_unlock_sock(sk); sock_put(sk); return; drop: kfree_skb(skb); + return; +drop_unlock: + kfree_skb(skb); + goto out; } #undef LLC_REFCNT_DEBUG diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 764dbd704051..ba90f7f0801a 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -107,6 +107,7 @@ int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap) ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_CONN_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; + skb_set_owner_w(skb, sk); rc = llc_conn_state_process(sk, skb); } out_put: @@ -141,6 +142,7 @@ int llc_send_disc(struct sock *sk) skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; + skb_set_owner_w(skb, sk); sk->sk_state = TCP_CLOSING; ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 6acdebf03b2b..bb3580fb8cfe 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -103,7 +103,7 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(skb->dev); + nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, @@ -148,7 +148,7 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(skb->dev); + nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 9f064b3a4335..e6d538937f93 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -31,7 +31,7 @@ * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ -struct sk_buff *llc_alloc_frame(struct net_device *dev) +struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev) { struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); @@ -41,6 +41,8 @@ struct sk_buff *llc_alloc_frame(struct net_device *dev) skb->protocol = htons(ETH_P_802_2); skb->dev = dev; skb->mac.raw = skb->head; + if (sk != NULL) + skb_set_owner_w(skb, sk); } return skb; } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index ec4693fe312f..f37dbf8ef126 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -254,7 +254,7 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb) static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) { int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(skb->dev); + struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; @@ -275,7 +275,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff* nskb = llc_alloc_frame(skb->dev); + struct sk_buff* nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; @@ -299,7 +299,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(skb->dev); + struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; -- cgit v1.2.3 From 8420e1b541fe92aee1d8d4d25d9e33eaca756a7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 08:29:08 -0300 Subject: [LLC]: fix llc_ui_recvmsg, making it behave like tcp_recvmsg In fact it is an exact copy of the parts that makes sense to LLC :-) Signed-off-by: Arnaldo Carvalho de Melo --- include/net/llc_conn.h | 1 + net/llc/af_llc.c | 180 ++++++++++++++++++++++++++++++++++++++++--------- net/llc/llc_conn.c | 2 +- net/llc/llc_proc.c | 2 +- net/llc/llc_sap.c | 5 +- 5 files changed, 154 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index e44f494bbef1..54852ff6033b 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -38,6 +38,7 @@ struct llc_sock { struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ + u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; u8 first_pdu_Ns; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7aa51eb79b13..59d02cbbeb9e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -648,53 +648,167 @@ out: * llc_ui_recvmsg - copy received data to the socket user. * @sock: Socket to copy data from. * @msg: Various user space related information. - * @size: Size of user buffer. + * @len: Size of user buffer. * @flags: User specified flags. * * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) + struct msghdr *msg, size_t len, int flags) { - struct sock *sk = sock->sk; struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name; - struct sk_buff *skb; + const int nonblock = flags & MSG_DONTWAIT; + struct sk_buff *skb = NULL; + struct sock *sk = sock->sk; + struct llc_sock *llc = llc_sk(sk); size_t copied = 0; - int rc = -ENOMEM; - int noblock = flags & MSG_DONTWAIT; + u32 peek_seq = 0; + u32 *seq; + unsigned long used; + int target; /* Read at least this many bytes */ + long timeo; - dprintk("%s: receiving in %02X from %02X\n", __FUNCTION__, - llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); lock_sock(sk); - if (skb_queue_empty(&sk->sk_receive_queue)) { - rc = llc_wait_data(sk, sock_rcvtimeo(sk, noblock)); - if (rc) - goto out; - } - skb = skb_dequeue(&sk->sk_receive_queue); - if (!skb) /* shutdown */ + copied = -ENOTCONN; + if (sk->sk_state == TCP_LISTEN) goto out; - copied = skb->len; - if (copied > size) - copied = size; - rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (rc) - goto dgram_free; - if (skb->len > copied) { - skb_pull(skb, copied); - skb_queue_head(&sk->sk_receive_queue, skb); - } - if (uaddr) - memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); - msg->msg_namelen = sizeof(*uaddr); - if (!skb->next) { -dgram_free: - kfree_skb(skb); - } + + timeo = sock_rcvtimeo(sk, nonblock); + + seq = &llc->copied_seq; + if (flags & MSG_PEEK) { + peek_seq = llc->copied_seq; + seq = &peek_seq; + } + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + copied = 0; + + do { + u32 offset; + + /* + * We need to check signals first, to get correct SIGURG + * handling. FIXME: Need to check this doesn't impact 1003.1g + * and move it down to the bottom of the loop + */ + if (signal_pending(current)) { + if (copied) + break; + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + break; + } + + /* Next get a buffer. */ + + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + offset = *seq; + goto found_ok_skb; + } + /* Well, if we have backlog, try to process it now yet. */ + + if (copied >= target && !sk->sk_backlog.tail) + break; + + if (copied) { + if (sk->sk_err || + sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || + (flags & MSG_PEEK)) + break; + } else { + if (sock_flag(sk, SOCK_DONE)) + break; + + if (sk->sk_err) { + copied = sock_error(sk); + break; + } + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + if (sk->sk_state == TCP_CLOSE) { + if (!sock_flag(sk, SOCK_DONE)) { + /* + * This occurs when user tries to read + * from never connected socket. + */ + copied = -ENOTCONN; + break; + } + break; + } + if (!timeo) { + copied = -EAGAIN; + break; + } + } + + if (copied >= target) { /* Do not sleep, just process backlog. */ + release_sock(sk); + lock_sock(sk); + } else + sk_wait_data(sk, &timeo); + + if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { + if (net_ratelimit()) + printk(KERN_DEBUG "LLC(%s:%d): Application " + "bug, race in MSG_PEEK.\n", + current->comm, current->pid); + peek_seq = llc->copied_seq; + } + continue; + found_ok_skb: + /* Ok so how much can we use? */ + used = skb->len - offset; + if (len < used) + used = len; + + if (!(flags & MSG_TRUNC)) { + int rc = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (rc) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + } + + *seq += used; + copied += used; + len -= used; + + if (used + offset < skb->len) + continue; + + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb); + *seq = 0; + } + } while (len > 0); + + /* + * According to UNIX98, msg_name/msg_namelen are ignored + * on connected socket. -ANK + * But... af_llc still doesn't have separate sets of methods for + * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will + * eventually fix this tho :-) -acme + */ + if (sk->sk_type == SOCK_DGRAM) + goto copy_uaddr; out: release_sock(sk); - return rc ? : copied; + return copied; +copy_uaddr: + if (uaddr != NULL && skb != NULL) { + memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); + msg->msg_namelen = sizeof(*uaddr); + } + goto out; } /** diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index e10ce5adb104..042b24a8ca4c 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -120,8 +120,8 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) sk->sk_socket->state = SS_UNCONNECTED; sk->sk_state = TCP_CLOSE; if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); + sk->sk_state_change(sk); } } kfree_skb(skb); diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 36e8db3fa1a2..bd531cb235a7 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -134,7 +134,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, atomic_read(&sk->sk_wmem_alloc), - atomic_read(&sk->sk_rmem_alloc), + atomic_read(&sk->sk_rmem_alloc) - llc->copied_seq, sk->sk_state, sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, llc->link); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e6d538937f93..4029ceee9b91 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -49,9 +49,12 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev) void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) { - struct sockaddr_llc *addr = llc_ui_skb_cb(skb); + struct sockaddr_llc *addr; + if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */ + return; /* save primitive for use by the user. */ + addr = llc_ui_skb_cb(skb); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; -- cgit v1.2.3 From 83ca28befc43e93849e79c564cda10e39d983e75 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Sep 2005 23:32:56 -0700 Subject: [TCP]: Adjust Reno SACK estimate in tcp_fragment Since the introduction of TSO pcount a year ago, it has been possible for tcp_fragment() to cause packets_out to decrease. Prior to that, tcp_retrans_try_collapse() was the only way for that to happen on the retransmission path. When this happens with Reno, it is possible for sasked_out to become invalid because it is only an estimate and not tied to any particular packet on the retransmission queue. Therefore we need to adjust sacked_out as well as left_out in the Reno case. The following patch does exactly that. This bug is pretty difficult to trigger in practice though since you need a SACKless peer with a retransmission that occurs just as the cached MTU value expires. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5dd6dd7d091e..d6e3d269e906 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -509,7 +509,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tp->lost_out -= diff; tp->left_out -= diff; } + if (diff > 0) { + /* Adjust Reno SACK estimate. */ + if (!tp->rx_opt.sack_ok) { + tp->sacked_out -= diff; + if ((int)tp->sacked_out < 0) + tp->sacked_out = 0; + tcp_sync_left_out(tp); + } + tp->fackets_out -= diff; if ((int)tp->fackets_out < 0) tp->fackets_out = 0; -- cgit v1.2.3 From 2a7bc3c94cbca32d3d6e03e7d5a71662f591d28a Mon Sep 17 00:00:00 2001 From: Vlad Drukker Date: Thu, 22 Sep 2005 23:35:34 -0700 Subject: [BRIDGE]: TSO fix in br_dev_queue_push_xmit Signed-off-by: Vlad Drukker Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 069253f830c1..2d24fb400e0c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,7 +31,8 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - if (skb->len > skb->dev->mtu) + /* drop mtu oversized packets except tso */ + if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER -- cgit v1.2.3 From 0ae5d253adcc467b1c52b512bbca9419eb438409 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 22 Sep 2005 23:44:58 -0700 Subject: [NETFILTER] fix DEBUG statement in PPTP helper As noted by Alexey Dobriyan, the DEBUGP statement prints the wrong callID. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 79db5b70d5f6..117587978700 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -485,7 +485,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, if (info->pns_call_id != ntohs(*pcid)) { DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(*cid)); + pptp_msg_name[msg], ntohs(*pcid)); break; } -- cgit v1.2.3 From 67497205b12e3cb408259cc09b50c3a9d12cd935 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 22 Sep 2005 23:45:24 -0700 Subject: [NETFILTER] Fix sparse endian warnings in pptp helper Signed-off-by: Alexey Dobriyan Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 118 +++++++++++----------- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 6 +- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 14 +-- 3 files changed, 70 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 389e3851d52f..50a761db5a04 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -60,8 +60,8 @@ struct ip_ct_pptp_expect { struct pptp_pkt_hdr { __u16 packetLength; - __u16 packetType; - __u32 magicCookie; + __be16 packetType; + __be32 magicCookie; }; /* PptpControlMessageType values */ @@ -93,7 +93,7 @@ struct pptp_pkt_hdr { #define PPTP_REMOVE_DEVICE_ERROR 6 struct PptpControlHeader { - __u16 messageType; + __be16 messageType; __u16 reserved; }; @@ -106,13 +106,13 @@ struct PptpControlHeader { #define PPTP_BEARER_CAP_DIGITAL 0x2 struct PptpStartSessionRequest { - __u16 protocolVersion; + __be16 protocolVersion; __u8 reserved1; __u8 reserved2; - __u32 framingCapability; - __u32 bearerCapability; - __u16 maxChannels; - __u16 firmwareRevision; + __be32 framingCapability; + __be32 bearerCapability; + __be16 maxChannels; + __be16 firmwareRevision; __u8 hostName[64]; __u8 vendorString[64]; }; @@ -125,13 +125,13 @@ struct PptpStartSessionRequest { #define PPTP_START_UNKNOWN_PROTOCOL 5 struct PptpStartSessionReply { - __u16 protocolVersion; + __be16 protocolVersion; __u8 resultCode; __u8 generalErrorCode; - __u32 framingCapability; - __u32 bearerCapability; - __u16 maxChannels; - __u16 firmwareRevision; + __be32 framingCapability; + __be32 bearerCapability; + __be16 maxChannels; + __be16 firmwareRevision; __u8 hostName[64]; __u8 vendorString[64]; }; @@ -155,7 +155,7 @@ struct PptpStopSessionReply { }; struct PptpEchoRequest { - __u32 identNumber; + __be32 identNumber; }; /* PptpEchoReplyResultCode */ @@ -163,7 +163,7 @@ struct PptpEchoRequest { #define PPTP_ECHO_GENERAL_ERROR 2 struct PptpEchoReply { - __u32 identNumber; + __be32 identNumber; __u8 resultCode; __u8 generalErrorCode; __u16 reserved; @@ -180,16 +180,16 @@ struct PptpEchoReply { #define PPTP_DONT_CARE_BEARER_TYPE 3 struct PptpOutCallRequest { - __u16 callID; - __u16 callSerialNumber; - __u32 minBPS; - __u32 maxBPS; - __u32 bearerType; - __u32 framingType; - __u16 packetWindow; - __u16 packetProcDelay; + __be16 callID; + __be16 callSerialNumber; + __be32 minBPS; + __be32 maxBPS; + __be32 bearerType; + __be32 framingType; + __be16 packetWindow; + __be16 packetProcDelay; __u16 reserved1; - __u16 phoneNumberLength; + __be16 phoneNumberLength; __u16 reserved2; __u8 phoneNumber[64]; __u8 subAddress[64]; @@ -205,24 +205,24 @@ struct PptpOutCallRequest { #define PPTP_OUTCALL_DONT_ACCEPT 7 struct PptpOutCallReply { - __u16 callID; - __u16 peersCallID; + __be16 callID; + __be16 peersCallID; __u8 resultCode; __u8 generalErrorCode; - __u16 causeCode; - __u32 connectSpeed; - __u16 packetWindow; - __u16 packetProcDelay; - __u32 physChannelID; + __be16 causeCode; + __be32 connectSpeed; + __be16 packetWindow; + __be16 packetProcDelay; + __be32 physChannelID; }; struct PptpInCallRequest { - __u16 callID; - __u16 callSerialNumber; - __u32 callBearerType; - __u32 physChannelID; - __u16 dialedNumberLength; - __u16 dialingNumberLength; + __be16 callID; + __be16 callSerialNumber; + __be32 callBearerType; + __be32 physChannelID; + __be16 dialedNumberLength; + __be16 dialingNumberLength; __u8 dialedNumber[64]; __u8 dialingNumber[64]; __u8 subAddress[64]; @@ -234,54 +234,54 @@ struct PptpInCallRequest { #define PPTP_INCALL_DONT_ACCEPT 3 struct PptpInCallReply { - __u16 callID; - __u16 peersCallID; + __be16 callID; + __be16 peersCallID; __u8 resultCode; __u8 generalErrorCode; - __u16 packetWindow; - __u16 packetProcDelay; + __be16 packetWindow; + __be16 packetProcDelay; __u16 reserved; }; struct PptpInCallConnected { - __u16 peersCallID; + __be16 peersCallID; __u16 reserved; - __u32 connectSpeed; - __u16 packetWindow; - __u16 packetProcDelay; - __u32 callFramingType; + __be32 connectSpeed; + __be16 packetWindow; + __be16 packetProcDelay; + __be32 callFramingType; }; struct PptpClearCallRequest { - __u16 callID; + __be16 callID; __u16 reserved; }; struct PptpCallDisconnectNotify { - __u16 callID; + __be16 callID; __u8 resultCode; __u8 generalErrorCode; - __u16 causeCode; + __be16 causeCode; __u16 reserved; __u8 callStatistics[128]; }; struct PptpWanErrorNotify { - __u16 peersCallID; + __be16 peersCallID; __u16 reserved; - __u32 crcErrors; - __u32 framingErrors; - __u32 hardwareOverRuns; - __u32 bufferOverRuns; - __u32 timeoutErrors; - __u32 alignmentErrors; + __be32 crcErrors; + __be32 framingErrors; + __be32 hardwareOverRuns; + __be32 bufferOverRuns; + __be32 timeoutErrors; + __be32 alignmentErrors; }; struct PptpSetLinkInfo { - __u16 peersCallID; + __be16 peersCallID; __u16 reserved; - __u32 sendAccm; - __u32 recvAccm; + __be32 sendAccm; + __be32 recvAccm; }; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 14dc0f7b6556..20e43f018b7c 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -17,7 +17,7 @@ union ip_conntrack_manip_proto u_int16_t all; struct { - u_int16_t port; + __be16 port; } tcp; struct { u_int16_t port; @@ -29,7 +29,7 @@ union ip_conntrack_manip_proto u_int16_t port; } sctp; struct { - u_int16_t key; /* key is 32bit, pptp only uses 16 */ + __be16 key; /* key is 32bit, pptp only uses 16 */ } gre; }; @@ -65,7 +65,7 @@ struct ip_conntrack_tuple u_int16_t port; } sctp; struct { - u_int16_t key; /* key is 32bit, + __be16 key; /* key is 32bit, * pptp only uses 16 */ } gre; } u; diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 117587978700..8236ee0fb090 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -223,8 +223,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) static inline int exp_gre(struct ip_conntrack *master, u_int32_t seq, - u_int16_t callid, - u_int16_t peer_callid) + __be16 callid, + __be16 peer_callid) { struct ip_conntrack_tuple inv_tuple; struct ip_conntrack_tuple exp_tuples[] = { @@ -263,7 +263,7 @@ exp_gre(struct ip_conntrack *master, exp_orig->mask.src.ip = 0xffffffff; exp_orig->mask.src.u.all = 0; exp_orig->mask.dst.u.all = 0; - exp_orig->mask.dst.u.gre.key = 0xffff; + exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; @@ -340,7 +340,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, unsigned int reqlen; union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg, *cid, *pcid; + u_int16_t msg; + __be16 *cid, *pcid; u_int32_t seq; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); @@ -551,7 +552,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, unsigned int reqlen; union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg, *cid, *pcid; + u_int16_t msg; + __be16 *cid, *pcid; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) @@ -755,7 +757,7 @@ static struct ip_conntrack_helper pptp = { } }, .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = 0xffff } } + .u = { .tcp = { .port = __constant_htons(0xffff) } } }, .dst = { .ip = 0, .u = { .all = 0 }, -- cgit v1.2.3 From 1dfbab59498d6f227c91988bab6c71af049a5333 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 22 Sep 2005 23:46:57 -0700 Subject: [NETFILTER] Fix conntrack event cache deadlock/oops This patch fixes a number of bugs. It cannot be reasonably split up in multiple fixes, since all bugs interact with each other and affect the same function: Bug #1: The event cache code cannot be called while a lock is held. Therefore, the call to ip_conntrack_event_cache() within ip_ct_refresh_acct() needs to be moved outside of the locked section. This fixes a number of 2.6.14-rcX oops and deadlock reports. Bug #2: We used to call ct_add_counters() for unconfirmed connections without holding a lock. Since the add operations are not atomic, we could race with another CPU. Bug #3: ip_ct_refresh_acct() lost REFRESH events in some cases where refresh (and the corresponding event) are desired, but no accounting shall be performed. Both, evenst and accounting implicitly depended on the skb parameter bein non-null. We now re-introduce a non-accounting "ip_ct_refresh()" variant to explicitly state the desired behaviour. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 25 +++++++++++--- net/ipv4/netfilter/ip_conntrack_amanda.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 49 ++++++++++++++------------- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 1 - net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 2 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 2 +- 6 files changed, 49 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index bace72a76cc4..4ced38736813 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -332,11 +332,28 @@ extern void need_ip_conntrack(void); extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig); +extern void __ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies, + int do_acct); + +/* Refresh conntrack for this many jiffies and do accounting */ +static inline void ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); +} + /* Refresh conntrack for this many jiffies */ -extern void ip_ct_refresh_acct(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - unsigned long extra_jiffies); +static inline void ip_ct_refresh(struct ip_conntrack *ct, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); +} /* These are for NAT. Icky. */ /* Update TCP window tracking data when NAT mangles the packet */ diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index dc20881004bc..fa3f914117ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); + ip_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index c1f82e0c81cf..ea65dd3e517a 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1112,45 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -static inline void ct_add_counters(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ -#ifdef CONFIG_IP_NF_CT_ACCT - if (skb) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - } -#endif -} - -/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ -void ip_ct_refresh_acct(struct ip_conntrack *ct, +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + unsigned long extra_jiffies, + int do_acct) { + int do_event = 0; + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + IP_NF_ASSERT(skb); + + write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - ct_add_counters(ct, ctinfo, skb); + do_event = 1; } else { - write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - /* FIXME: We loose some REFRESH events if this function - * is called without an skb. I'll fix this later -HW */ - if (skb) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + do_event = 1; } - ct_add_counters(ct, ctinfo, skb); - write_unlock_bh(&ip_conntrack_lock); } + +#ifdef CONFIG_IP_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + } +#endif + + write_unlock_bh(&ip_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (do_event) + ip_conntrack_event_cache(IPCT_REFRESH, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 8236ee0fb090..926a6684643d 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) DEBUGP("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - /* refresh_acct will not modify counters if skb == NULL */ if (del_timer(&sibling->timeout)) sibling->timeout.function((unsigned long)sibling); ip_conntrack_put(sibling); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 71ef19d126d0..577bac22dcc6 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, ip_conntrack_expect_related(exp); ip_conntrack_expect_put(exp); - ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ); + ip_ct_refresh(ct, *pskb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index d3c7808010ec..dd476b191f4b 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(ip_ct_refresh_acct); +EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); -- cgit v1.2.3 From eb0e0076878a4f9e8e6e7e524ded0d6f7d4a6130 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 22 Sep 2005 23:48:38 -0700 Subject: [SCTP]: Fix SCTP_SHUTDOWN notifications. Fix to allow SCTP_SHUTDOWN notifications to be received on 1-1 style SCTP SOCK_STREAM sockets. Add SCTP_SHUTDOWN notification to the receive queue before updating the state of the association. Signed-off-by: Sridhar Samudrala Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 86073df418f5..505c7de10c50 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2414,6 +2414,17 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT + * When a peer sends a SHUTDOWN, SCTP delivers this notification to + * inform the application that it should cease sending data. + */ + ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); + if (!ev) { + disposition = SCTP_DISPOSITION_NOMEM; + goto out; + } + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + /* Upon the reception of the SHUTDOWN, the peer endpoint shall * - enter the SHUTDOWN-RECEIVED state, * - stop accepting new data from its SCTP user @@ -2439,17 +2450,6 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack)); - /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT - * When a peer sends a SHUTDOWN, SCTP delivers this notification to - * inform the application that it should cease sending data. - */ - ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); - if (!ev) { - disposition = SCTP_DISPOSITION_NOMEM; - goto out; - } - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - out: return disposition; } -- cgit v1.2.3 From d67b24c40f81823abe5c1eb808bba1038969142b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Sep 2005 16:52:03 -0700 Subject: [NETFILTER]: Fix ip[6]t_NFQUEUE Kconfig dependency We have to introduce a separate Kconfig menu entry for the NFQUEUE targets. They cannot "just" depend on nfnetlink_queue, since nfnetlink_queue could be linked into the kernel, whereas iptables can be a module. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 11 +++++++++++ net/ipv4/netfilter/Makefile | 2 +- net/ipv6/netfilter/Kconfig | 11 +++++++++++ net/ipv6/netfilter/Makefile | 2 +- 4 files changed, 24 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3cf9b451675c..2cd7e7d1ac90 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -537,6 +537,17 @@ config IP_NF_TARGET_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_NFQUEUE + tristate "NFQUEUE Target Support" + depends on IP_NF_IPTABLES + help + This Target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + # NAT + specific targets config IP_NF_NAT tristate "Full NAT" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3d45d3c0283c..89002533f2a2 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o +obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o @@ -101,4 +102,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o -obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 216fbe1ac65c..bb7ccfe33f23 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -209,6 +209,17 @@ config IP6_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_NFQUEUE + tristate "NFQUEUE Target Support" + depends on IP_NF_IPTABLES + help + This Target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index bd9a16a5cbba..2b2c370e8b1c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -21,9 +21,9 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o +obj-$(CONFIG_IP6_NF_TARGET_NFQUEUE) += ip6t_NFQUEUE.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o -obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o -- cgit v1.2.3 From 45fc3b11f1d419ed6c636e5ca84472d9805f520e Mon Sep 17 00:00:00 2001 From: Amos Waterland Date: Sat, 24 Sep 2005 16:53:16 -0700 Subject: [NET]: Protect neigh_stat_seq_fops by CONFIG_PROC_FS From: Amos Waterland If CONFIG_PROC_FS is not selected, the compiler emits this warning: net/core/neighbour.c:64: warning: `neigh_stat_seq_fops' defined but not used Which is correct, because neigh_stat_seq_fops is in fact only initialized and used by code that is protected by CONFIG_PROC_FS. So this patch fixes that up. Signed-off-by: Amos Waterland Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 39fc55edf691..5f160082aafc 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -61,7 +61,9 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); static struct neigh_table *neigh_tables; +#ifdef CONFIG_PROC_FS static struct file_operations neigh_stat_seq_fops; +#endif /* Neighbour hash table buckets are protected with rwlock tbl->lock. -- cgit v1.2.3 From 15166fadb0308496bbff50c08ed5fe6a18d5cc4f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 24 Sep 2005 16:54:50 -0700 Subject: [IRDA]: Fix memory leak in irttp_init() Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/irda/irttp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 6602d901f8b1..1df6487609e1 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -100,6 +100,7 @@ int __init irttp_init(void) if (!irttp->tsaps) { IRDA_ERROR("%s: can't allocate IrTTP hashbin!\n", __FUNCTION__); + kfree(irttp); return -ENOMEM; } -- cgit v1.2.3 From 8689c07e47e928f8e329f667df8cf697a37425dd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 24 Sep 2005 16:55:17 -0700 Subject: [IRDA]: *irttp cleanup * Remove useless comment. * Remove useless assertions. * Remove useless comparison. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/irda/irttp.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 1df6487609e1..8aff254cb418 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -38,7 +38,7 @@ #include #include -static struct irttp_cb *irttp = NULL; +static struct irttp_cb *irttp; static void __irttp_close_tsap(struct tsap_cb *self); @@ -86,12 +86,9 @@ static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 }; */ int __init irttp_init(void) { - /* Initialize the irttp structure. */ - if (irttp == NULL) { - irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL); - if (irttp == NULL) - return -ENOMEM; - } + irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL); + if (irttp == NULL) + return -ENOMEM; memset(irttp, 0, sizeof(struct irttp_cb)); irttp->magic = TTP_MAGIC; @@ -116,7 +113,6 @@ int __init irttp_init(void) void __exit irttp_cleanup(void) { /* Check for main structure */ - IRDA_ASSERT(irttp != NULL, return;); IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;); /* @@ -383,7 +379,6 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) struct lsap_cb *lsap; notify_t ttp_notify; - IRDA_ASSERT(irttp != NULL, return NULL;); IRDA_ASSERT(irttp->magic == TTP_MAGIC, return NULL;); /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to @@ -1881,8 +1876,6 @@ static int irttp_seq_open(struct inode *inode, struct file *file) struct seq_file *seq; int rc = -ENOMEM; struct irttp_iter_state *s; - - IRDA_ASSERT(irttp != NULL, return -EINVAL;); s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) -- cgit v1.2.3 From 8ddec7460d2f5db3ac35812c03676b1473d1d668 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 24 Sep 2005 16:56:08 -0700 Subject: [NETFILTER] ip_conntrack: Update event cache when status changes The GRE, SCTP and TCP protocol helpers did not call ip_conntrack_event_cache() when updating ct->status. This patch adds the respective calls. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_proto_gre.c | 1 + net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 1 + net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index de3cb9db6f85..744abb9d377a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -247,6 +247,7 @@ static int gre_packet(struct ip_conntrack *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index a875f35e576d..59a4a0111dd3 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -416,6 +416,7 @@ static int sctp_packet(struct ip_conntrack *conntrack, && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { DEBUGP("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 1985abc59d24..121760d6cc50 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -1014,7 +1014,8 @@ static int tcp_packet(struct ip_conntrack *conntrack, /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); -- cgit v1.2.3 From b9d717a7b413f227ebb2d61d9c118335f7292137 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 26 Sep 2005 14:28:02 -0700 Subject: [NET]: Make sure ctl buffer is aligned properly in sys_sendmsg(). It's on the stack and declared as "unsigned char[]", but pointers and similar can be in here thus we need to give it an explicit alignment attribute. Signed-off-by: Alex Williamson Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index f9264472377f..dbd1a6851edd 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1700,7 +1700,9 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) struct socket *sock; char address[MAX_SOCK_ADDR]; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; - unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */ + unsigned char ctl[sizeof(struct cmsghdr) + 20] + __attribute__ ((aligned (sizeof(__kernel_size_t)))); + /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; struct msghdr msg_sys; int err, ctl_len, iov_size, total_len; -- cgit v1.2.3 From c62dba9011b93fd88fde929848582b2a98309878 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 26 Sep 2005 15:10:16 -0700 Subject: [IPV6]: Fix [Bug 5306] Oops on IPv6 route lookup > Steps to reproduce: > 1. Boot Linux, do NOT setup any IPv6 routes > 2. ip route get 2001::1 (or any unroutable address) Well caught. We never set rt6i_idev on ip6_null_entry. This patch should make the problem go away. Signed-off-by: Herbert Xu Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fea3f4402a0..4e509e52fbc1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3520,6 +3520,8 @@ int __init addrconf_init(void) if (err) return err; + ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + register_netdevice_notifier(&ipv6_dev_notf); #ifdef CONFIG_IPV6_PRIVACY -- cgit v1.2.3 From b85daee0e497c8fe7c4dc3531674ede645b37cdf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 26 Sep 2005 15:23:58 -0700 Subject: [AF_PACKET]: Remove bogus checks added to packet_sendmsg(). These broke existing apps, and the checks are superfluous as the values being verified aren't even used. Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ee865d88183b..6a67a87384cc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -761,12 +761,6 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (dev->hard_header) { int res; err = -EINVAL; - if (saddr) { - if (saddr->sll_halen != dev->addr_len) - goto out_free; - if (saddr->sll_hatype != dev->type) - goto out_free; - } res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (sock->type != SOCK_DGRAM) { skb->tail = skb->data; -- cgit v1.2.3 From 188bab3ae0ed164bc18f98be932512d777dd038b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 26 Sep 2005 15:25:11 -0700 Subject: [NETFILTER]: Fix invalid module autoloading by splitting iptable_nat When you've enabled conntrack and NAT as a module (standard case in all distributions), and you've also enabled the new conntrack netlink interface, loading ip_conntrack_netlink.ko will auto-load iptable_nat.ko. This causes a huge performance penalty, since for every packet you iterate the nat code, even if you don't want it. This patch splits iptable_nat.ko into the NAT core (ip_nat.ko) and the iptables frontend (iptable_nat.ko). Threfore, ip_conntrack_netlink.ko will only pull ip_nat.ko, but not the frontend. ip_nat.ko will "only" allocate some resources, but not affect runtime performance. This separation is also a nice step in anticipation of new packet filters (nf-hipac, ipset, pkttables) being able to use the NAT core. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_core.h | 12 +++++----- net/ipv4/netfilter/Makefile | 5 +++-- net/ipv4/netfilter/ip_nat_core.c | 35 ++++++++++++++++++++---------- net/ipv4/netfilter/ip_nat_helper.c | 4 ++++ net/ipv4/netfilter/ip_nat_standalone.c | 25 ++++----------------- 5 files changed, 40 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 3b50eb91f007..30db23f06b03 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -5,16 +5,14 @@ /* This header used to share core functionality between the standalone NAT module, and the compatibility layer's use of NAT for masquerading. */ -extern int ip_nat_init(void); -extern void ip_nat_cleanup(void); -extern unsigned int nat_packet(struct ip_conntrack *ct, +extern unsigned int ip_nat_packet(struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo, unsigned int hooknum, struct sk_buff **pskb); -extern int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir); +extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir); #endif /* _IP_NAT_CORE_H */ diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 89002533f2a2..dab4b58dd31e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,7 +4,8 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o -iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o @@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index c3ea891d38e7..c5e3abd24672 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -74,12 +74,14 @@ ip_nat_proto_find_get(u_int8_t protonum) return p; } +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); void ip_nat_proto_put(struct ip_nat_protocol *p) { module_put(p->me); } +EXPORT_SYMBOL_GPL(ip_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -111,6 +113,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) return csum_fold(csum_partial((char *)diffs, sizeof(diffs), oldcheck^0xFFFF)); } +EXPORT_SYMBOL(ip_nat_cheat_check); /* Is this tuple already taken? (not by us) */ int @@ -127,6 +130,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } +EXPORT_SYMBOL(ip_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ @@ -347,6 +351,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, return NF_ACCEPT; } +EXPORT_SYMBOL(ip_nat_setup_info); /* Returns true if succeeded. */ static int @@ -387,10 +392,10 @@ manip_pkt(u_int16_t proto, } /* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +unsigned int ip_nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -417,12 +422,13 @@ unsigned int nat_packet(struct ip_conntrack *ct, } return NF_ACCEPT; } +EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; @@ -509,6 +515,7 @@ int icmp_reply_translation(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) @@ -525,6 +532,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto) write_unlock_bh(&ip_nat_lock); return ret; } +EXPORT_SYMBOL(ip_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) @@ -536,6 +544,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) /* Someone could be still looking at the proto in a bh. */ synchronize_net(); } +EXPORT_SYMBOL(ip_nat_protocol_unregister); #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) @@ -582,7 +591,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif -int __init ip_nat_init(void) +static int __init ip_nat_init(void) { size_t i; @@ -624,10 +633,14 @@ static int clean_nat(struct ip_conntrack *i, void *data) return 0; } -/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ -void ip_nat_cleanup(void) +static void __exit ip_nat_cleanup(void) { ip_ct_iterate_cleanup(&clean_nat, NULL); ip_conntrack_destroyed = NULL; vfree(bysource); } + +MODULE_LICENSE("GPL"); + +module_init(ip_nat_init); +module_exit(ip_nat_cleanup); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index d2dd5d313556..5d506e0564d5 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, } return 1; } +EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX @@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_mangle_udp_packet); /* Adjust one found SACK option including checksum correction */ static void @@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_seq_adjust); /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ @@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct, /* hook doesn't matter, but it has to do destination manip */ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } +EXPORT_SYMBOL(ip_nat_follow_master); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 0ff368b131f6..30cd4e18c129 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, + CTINFO2DIR(ctinfo))) return NF_DROP; else return NF_ACCEPT; @@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum, } IP_NF_ASSERT(info); - return nat_packet(ct, ctinfo, hooknum, pskb); + return ip_nat_packet(ct, ctinfo, hooknum, pskb); } static unsigned int @@ -325,15 +325,10 @@ static int init_or_cleanup(int init) printk("ip_nat_init: can't setup rules.\n"); goto cleanup_nothing; } - ret = ip_nat_init(); - if (ret < 0) { - printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_rule_init; - } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { printk("ip_nat_init: can't register in hook.\n"); - goto cleanup_nat; + goto cleanup_rule_init; } ret = nf_register_hook(&ip_nat_out_ops); if (ret < 0) { @@ -374,8 +369,6 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_out_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); - cleanup_nat: - ip_nat_cleanup(); cleanup_rule_init: ip_nat_rule_cleanup(); cleanup_nothing: @@ -395,14 +388,4 @@ static void __exit fini(void) module_init(init); module_exit(fini); -EXPORT_SYMBOL(ip_nat_setup_info); -EXPORT_SYMBOL(ip_nat_protocol_register); -EXPORT_SYMBOL(ip_nat_protocol_unregister); -EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); -EXPORT_SYMBOL_GPL(ip_nat_proto_put); -EXPORT_SYMBOL(ip_nat_cheat_check); -EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); -EXPORT_SYMBOL(ip_nat_mangle_udp_packet); -EXPORT_SYMBOL(ip_nat_used_tuple); -EXPORT_SYMBOL(ip_nat_follow_master); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 667347f1ca7e099f6833551f194cf2bcc778871b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Sep 2005 12:07:44 -0700 Subject: [NEIGH]: Add debugging check when adding timers. If we double-add a neighbour entry timer, which should be impossible but has been reported, dump the current state of the entry so that we can debug this. Signed-off-by: David S. Miller --- net/core/neighbour.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5f160082aafc..4128fc76ac3a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -727,6 +727,13 @@ static __inline__ int neigh_max_probes(struct neighbour *n) p->ucast_probes + p->app_probes + p->mcast_probes); } +static inline void neigh_add_timer(struct neighbour *n, unsigned long when) +{ + if (unlikely(mod_timer(&n->timer, when))) { + printk("NEIGH: BUG, double timer add, state is %x\n", + n->nud_state); + } +} /* Called when a timer expires for a neighbour entry. */ @@ -811,8 +818,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); if (time_before(next, jiffies + HZ/2)) next = jiffies + HZ/2; - neigh->timer.expires = next; - add_timer(&neigh->timer); + neigh_add_timer(neigh, next); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { struct sk_buff *skb = skb_peek(&neigh->arp_queue); @@ -854,8 +860,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; neigh_hold(neigh); - neigh->timer.expires = now + 1; - add_timer(&neigh->timer); + neigh_add_timer(neigh, now + 1); } else { neigh->nud_state = NUD_FAILED; write_unlock_bh(&neigh->lock); @@ -868,8 +873,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); neigh_hold(neigh); neigh->nud_state = NUD_DELAY; - neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; - add_timer(&neigh->timer); + neigh_add_timer(neigh, + jiffies + neigh->parms->delay_probe_time); } if (neigh->nud_state == NUD_INCOMPLETE) { @@ -1015,10 +1020,10 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_del_timer(neigh); if (new & NUD_IN_TIMER) { neigh_hold(neigh); - neigh->timer.expires = jiffies + + neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? - neigh->parms->reachable_time : 0); - add_timer(&neigh->timer); + neigh->parms->reachable_time : + 0))); } neigh->nud_state = new; } -- cgit v1.2.3 From bc8dfcb93970ad7139c976356bfc99d7e251deaf Mon Sep 17 00:00:00 2001 From: Daniel Phillips Date: Tue, 27 Sep 2005 15:22:35 -0700 Subject: [NET]: Use non-recursive algorithm in skb_copy_datagram_iovec() Use iteration instead of recursion. Fraglists within fraglists should never occur, so we BUG check this. Signed-off-by: Daniel Phillips Signed-off-by: David S. Miller --- net/core/datagram.c | 81 +++++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index da9bf71421a7..81987df536eb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -211,74 +211,45 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to, int len) { - int start = skb_headlen(skb); - int i, copy = start - offset; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - if (memcpy_toiovec(to, skb->data + offset, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } + int i, err, fraglen, end = 0; + struct sk_buff *next = skb_shinfo(skb)->frag_list; +next_skb: + fraglen = skb_headlen(skb); + i = -1; - /* Copy paged appendix. Hmm... why does this look so complicated? */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); + while (1) { + int start = end; - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + if ((end += fraglen) > offset) { + int copy = end - offset, o = offset - start; if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_toiovec(to, vaddr + frag->page_offset + - offset - start, copy); - kunmap(page); + if (i == -1) + err = memcpy_toiovec(to, skb->data + o, copy); + else { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + void *p = kmap(page) + frag->page_offset + o; + err = memcpy_toiovec(to, p, copy); + kunmap(page); + } if (err) goto fault; if (!(len -= copy)) return 0; offset += copy; } - start = end; + if (++i >= skb_shinfo(skb)->nr_frags) + break; + fraglen = skb_shinfo(skb)->frags[i].size; } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_iovec(list, - offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } + if (next) { + skb = next; + BUG_ON(skb_shinfo(skb)->frag_list); + next = skb->next; + goto next_skb; } - if (!len) - return 0; - fault: return -EFAULT; } -- cgit v1.2.3 From 2d7ceece08ad940d0ceac98ab1b5a3b82dfc2a0a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Sep 2005 15:22:58 -0700 Subject: [NET]: Prefetch dev->qdisc_lock in dev_queue_xmit() We know the lock is going to be taken. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 37c881070963..9066c874e273 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1259,6 +1259,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; + spin_lock_prefetch(&dev->queue_lock); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ -- cgit v1.2.3 From a79af59efd20990473d579b1d8d70bb120f0920c Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Tue, 27 Sep 2005 15:23:38 -0700 Subject: [NET]: Fix module reference counts for loadable protocol modules I have been experimenting with loadable protocol modules, and ran into several issues with module reference counting. The first issue was that __module_get failed at the BUG_ON check at the top of the routine (checking that my module reference count was not zero) when I created the first socket. When sk_alloc() is called, my module reference count was still 0. When I looked at why sctp didn't have this problem, I discovered that sctp creates a control socket during module init (when the module ref count is not 0), which keeps the reference count non-zero. This section has been updated to address the point Stephen raised about checking the return value of try_module_get(). The next problem arose when my socket init routine returned an error. This resulted in my module reference count being decremented below 0. My socket ops->release routine was also being called. The issue here is that sock_release() calls the ops->release routine and decrements the ref count if sock->ops is not NULL. Since the socket probably didn't get correctly initialized, this should not be done, so we will set sock->ops to NULL because we will not call try_module_get(). While searching for another bug, I also noticed that sys_accept() has a possibility of doing a module_put() when it did not do an __module_get so I re-ordered the call to security_socket_accept(). Signed-off-by: Frank Filz Signed-off-by: David S. Miller --- net/core/sock.c | 20 ++++++++++++-------- net/socket.c | 13 ++++++++----- 2 files changed, 20 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index ac63b56e23b2..928d2a1d6d8e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -660,16 +660,20 @@ struct sock *sk_alloc(int family, unsigned int __nocast priority, sock_lock_init(sk); } - if (security_sk_alloc(sk, family, priority)) { - if (slab != NULL) - kmem_cache_free(slab, sk); - else - kfree(sk); - sk = NULL; - } else - __module_get(prot->owner); + if (security_sk_alloc(sk, family, priority)) + goto out_free; + + if (!try_module_get(prot->owner)) + goto out_free; } return sk; + +out_free: + if (slab != NULL) + kmem_cache_free(slab, sk); + else + kfree(sk); + return NULL; } void sk_free(struct sock *sk) diff --git a/net/socket.c b/net/socket.c index dbd1a6851edd..3145103cdf54 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1145,8 +1145,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res if (!try_module_get(net_families[family]->owner)) goto out_release; - if ((err = net_families[family]->create(sock, protocol)) < 0) + if ((err = net_families[family]->create(sock, protocol)) < 0) { + sock->ops = NULL; goto out_module_put; + } + /* * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. @@ -1360,16 +1363,16 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ newsock->type = sock->type; newsock->ops = sock->ops; - err = security_socket_accept(sock, newsock); - if (err) - goto out_release; - /* * We don't need try_module_get here, as the listening socket (sock) * has the protocol module (sock->ops->owner) held. */ __module_get(newsock->ops->owner); + err = security_socket_accept(sock, newsock); + if (err) + goto out_release; + err = sock->ops->accept(sock, newsock, sock->file->f_flags); if (err < 0) goto out_release; -- cgit v1.2.3 From c3c4ed652e8f2c348ebdd3f2e45664a0e238ee52 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 27 Sep 2005 15:42:58 -0700 Subject: [ROSE]: do proto_unregister() on exit paths Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/rose/af_rose.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5acb1680524a..a190eaee702e 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1481,12 +1481,14 @@ static int __init rose_proto_init(void) if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); + proto_unregister(&rose_proto); return -1; } dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); + proto_unregister(&rose_proto); return -1; } -- cgit v1.2.3 From 70ff3b66d79c5110e533f3f2aea1a5b2fc5f8d90 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 27 Sep 2005 15:43:46 -0700 Subject: [ROSE]: return sane -E* from rose_proto_init() Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/rose/af_rose.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index a190eaee702e..dbfb0e3dc798 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1482,14 +1482,14 @@ static int __init rose_proto_init(void) if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); proto_unregister(&rose_proto); - return -1; + return -EINVAL; } dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); proto_unregister(&rose_proto); - return -1; + return -ENOMEM; } memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*)); @@ -1502,9 +1502,11 @@ static int __init rose_proto_init(void) name, rose_setup); if (!dev) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); + rc = -ENOMEM; goto fail; } - if (register_netdev(dev)) { + rc = register_netdev(dev); + if (rc) { printk(KERN_ERR "ROSE: netdevice regeistration failed\n"); free_netdev(dev); goto fail; @@ -1539,7 +1541,7 @@ fail: } kfree(dev_rose); proto_unregister(&rose_proto); - return -ENOMEM; + goto out; } module_init(rose_proto_init); -- cgit v1.2.3 From a83cd2cc90bd9390cf03cd40bba204d9ed520633 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 27 Sep 2005 15:44:36 -0700 Subject: [ROSE]: check rose_ndevs earlier * Don't bother with proto registering if rose_ndevs is bad. * Make escape structure more coherent. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/rose/af_rose.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index dbfb0e3dc798..0f96565a64ec 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1472,24 +1472,25 @@ static const char banner[] = KERN_INFO "F6FBB/G4KLX ROSE for Linux. Version 0.62 static int __init rose_proto_init(void) { int i; - int rc = proto_register(&rose_proto, 0); + int rc; + if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { + printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); + rc = -EINVAL; + goto out; + } + + rc = proto_register(&rose_proto, 0); if (rc != 0) goto out; rose_callsign = null_ax25_address; - if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { - printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); - proto_unregister(&rose_proto); - return -EINVAL; - } - dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); - proto_unregister(&rose_proto); - return -ENOMEM; + rc = -ENOMEM; + goto out_proto_unregister; } memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*)); @@ -1540,6 +1541,7 @@ fail: free_netdev(dev_rose[i]); } kfree(dev_rose); +out_proto_unregister: proto_unregister(&rose_proto); goto out; } -- cgit v1.2.3 From 520d1b830a93086c1f9e969d98f7ef01f0356493 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 27 Sep 2005 15:45:15 -0700 Subject: [ROSE]: fix typo (regeistration) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/rose/af_rose.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0f96565a64ec..829fdbc4400b 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1508,7 +1508,7 @@ static int __init rose_proto_init(void) } rc = register_netdev(dev); if (rc) { - printk(KERN_ERR "ROSE: netdevice regeistration failed\n"); + printk(KERN_ERR "ROSE: netdevice registration failed\n"); free_netdev(dev); goto fail; } -- cgit v1.2.3 From ba645c16026ed733a51f904df34756f61bc155fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Sep 2005 16:03:05 -0700 Subject: [NET]: Slightly optimize ethernet address comparison. We know the thing is at least 2-byte aligned, so take advantage of that instead of invoking memcmp() which results in truly horrifically inefficient code because it can't assume anything about alignment. Signed-off-by: David S. Miller --- net/ethernet/eth.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 87a052a9a84f..8b299cc82060 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -146,6 +146,19 @@ int eth_rebuild_header(struct sk_buff *skb) return 0; } +static inline unsigned int compare_eth_addr(const unsigned char *__a, const unsigned char *__b) +{ + const unsigned short *dest = (unsigned short *) __a; + const unsigned short *devaddr = (unsigned short *) __b; + unsigned int res; + + BUILD_BUG_ON(ETH_ALEN != 6); + res = ((dest[0] ^ devaddr[0]) | + (dest[1] ^ devaddr[1]) | + (dest[2] ^ devaddr[2])) != 0; + + return res; +} /* * Determine the packet's protocol ID. The rule here is that we @@ -158,16 +171,15 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) struct ethhdr *eth; unsigned char *rawp; - skb->mac.raw=skb->data; + skb->mac.raw = skb->data; skb_pull(skb,ETH_HLEN); eth = eth_hdr(skb); - if(*eth->h_dest&1) - { - if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) - skb->pkt_type=PACKET_BROADCAST; + if (*eth->h_dest&1) { + if (!compare_eth_addr(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; else - skb->pkt_type=PACKET_MULTICAST; + skb->pkt_type = PACKET_MULTICAST; } /* @@ -178,10 +190,9 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * seems to set IFF_PROMISC. */ - else if(1 /*dev->flags&IFF_PROMISC*/) - { - if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) - skb->pkt_type=PACKET_OTHERHOST; + else if(1 /*dev->flags&IFF_PROMISC*/) { + if (unlikely(!compare_eth_addr(eth->h_dest, dev->dev_addr))) + skb->pkt_type = PACKET_OTHERHOST; } if (ntohs(eth->h_proto) >= 1536) -- cgit v1.2.3 From 64233bffbb50f12e576c61d1698a573c8033004a Mon Sep 17 00:00:00 2001 From: Oliver Dawid Date: Tue, 27 Sep 2005 16:11:29 -0700 Subject: [APPLETALK]: Fix broadcast bug. From: Oliver Dawid we found a bug in net/appletalk/ddp.c concerning broadcast packets. In kernel 2.4 it was working fine. The bug first occured 4 years ago when switching to new SNAP layer handling. This bug can be splitted up into a sending(1) and reception(2) problem: Sending(1) In kernel 2.4 broadcast packets were sent to a matching ethernet device and atalk_rcv() was called to receive it as "loopback" (so loopback packets were shortcutted and handled in DDP layer). When switching to the new SNAP structure, this shortcut was removed and the loopback packet was send to SNAP layer. The author forgot to replace the remote device pointer by the loopback device pointer before sending the packet to SNAP layer (by calling ddp_dl->request() ) therfor the packet was not sent back by underlying layers to ddp's atalk_rcv(). Reception(2) In atalk_rcv() a packet received by this loopback mechanism contains now the (rigth) loopback device pointer (in Kernel 2.4 it was the (wrong) remote ethernet device pointer) and therefor no matching socket will be found to deliver this packet to. Because a broadcast packet should be send to the first matching socket (as it is done in many other protocols (?)), we removed the network comparison in broadcast case. Below you will find a patch to correct this bug. Its diffed to kernel 2.6.14-rc1 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 1d31b3a3f1e5..7982656b9c83 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -100,8 +100,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, continue; if (to->sat_addr.s_net == ATADDR_ANYNET && - to->sat_addr.s_node == ATADDR_BCAST && - at->src_net == atif->address.s_net) + to->sat_addr.s_node == ATADDR_BCAST) goto found; if (to->sat_addr.s_net == at->src_net && @@ -1443,8 +1442,10 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, else atif = atalk_find_interface(ddp->deh_dnet, ddp->deh_dnode); - /* Not ours, so we route the packet via the correct AppleTalk iface */ if (!atif) { + /* Not ours, so we route the packet via the correct + * AppleTalk iface + */ atalk_route_packet(skb, dev, ddp, &ddphv, origlen); goto out; } @@ -1592,9 +1593,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (usat->sat_addr.s_net || usat->sat_addr.s_node == ATADDR_ANYNODE) { rt = atrtr_find(&usat->sat_addr); - if (!rt) - return -ENETUNREACH; - dev = rt->dev; } else { struct atalk_addr at_hint; @@ -1603,11 +1601,12 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr at_hint.s_net = at->src_net; rt = atrtr_find(&at_hint); - if (!rt) - return -ENETUNREACH; - dev = rt->dev; } + if (!rt) + return -ENETUNREACH; + + dev = rt->dev; SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", sk, size, dev->name); @@ -1677,6 +1676,20 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk); /* loop back */ skb_orphan(skb); + if (ddp->deh_dnode == ATADDR_BCAST) { + struct atalk_addr at_lo; + + at_lo.s_node = 0; + at_lo.s_net = 0; + + rt = atrtr_find(&at_lo); + if (!rt) { + kfree_skb(skb); + return -ENETUNREACH; + } + dev = rt->dev; + skb->dev = dev; + } ddp_dl->request(ddp_dl, skb, dev->dev_addr); } else { SOCK_DEBUG(sk, "SK %p: send out.\n", sk); -- cgit v1.2.3 From 6b251858d377196b8cea20e65cae60f584a42735 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Sep 2005 16:31:48 -0700 Subject: [TCP]: Fix init_cwnd calculations in tcp_select_initial_window() Match it up to what RFC2414 really specifies. Noticed by Rick Jones. Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d6e3d269e906..caf2e2cff293 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -190,15 +190,16 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set initial window to value enough for senders, - * following RFC1414. Senders, not following this RFC, + * following RFC2414. Senders, not following this RFC, * will be satisfied with 2. */ if (mss > (1<<*rcv_wscale)) { - int init_cwnd = 4; - if (mss > 1460*3) + int init_cwnd; + + if (mss > 1460) init_cwnd = 2; - else if (mss > 1460) - init_cwnd = 3; + else + init_cwnd = (mss > 1095) ? 3 : 4; if (*rcv_wnd > init_cwnd*mss) *rcv_wnd = init_cwnd*mss; } -- cgit v1.2.3 From e2c4b72158a9f1286df41dee478e774f1b94e93a Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Wed, 28 Sep 2005 16:34:24 -0700 Subject: [ATM]: net/atm/ioctl.c: autoload pppoatm and br2684 Signed-off-by: Roman Kagan Signed-off-by: Chas Williams --- net/atm/ioctl.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index d89056ec44d4..a150198b05a3 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -105,17 +105,35 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (!error) sock->state = SS_CONNECTED; goto done; - default: + case ATM_SETBACKEND: + case ATM_NEWBACKENDIF: + { + atm_backend_t backend; + error = get_user(backend, (atm_backend_t __user *) argp); + if (error) + goto done; + switch (backend) { + case ATM_BACKEND_PPP: + request_module("pppoatm"); + break; + case ATM_BACKEND_BR2684: + request_module("br2684"); + break; + } + } + break; + case ATMMPC_CTRL: + case ATMMPC_DATA: + request_module("mpoa"); + break; + case ATMARPD_CTRL: + request_module("clip"); + break; + case ATMLEC_CTRL: + request_module("lec"); break; } - if (cmd == ATMMPC_CTRL || cmd == ATMMPC_DATA) - request_module("mpoa"); - if (cmd == ATMARPD_CTRL) - request_module("clip"); - if (cmd == ATMLEC_CTRL) - request_module("lec"); - error = -ENOIOCTLCMD; down(&ioctl_mutex); -- cgit v1.2.3 From 9301e320e98ff19a0e48881b038d0c24ca76e6c0 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Wed, 28 Sep 2005 16:35:01 -0700 Subject: [ATM]: track and close listen sockets when sigd exits Signed-off-by: Chas Williams --- net/atm/common.c | 4 ++-- net/atm/signaling.c | 8 ++++---- net/atm/svc.c | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index e93e838069e8..801a5813ec60 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -178,8 +178,6 @@ static void vcc_destroy_socket(struct sock *sk) if (vcc->push) vcc->push(vcc, NULL); /* atmarpd has no push */ - vcc_remove_socket(sk); /* no more receive */ - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { atm_return(vcc,skb->truesize); kfree_skb(skb); @@ -188,6 +186,8 @@ static void vcc_destroy_socket(struct sock *sk) module_put(vcc->dev->ops->owner); atm_dev_put(vcc->dev); } + + vcc_remove_socket(sk); } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index f7c449ac1800..e7211a7f382c 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -217,8 +217,9 @@ void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, static void purge_vcc(struct atm_vcc *vcc) { if (sk_atm(vcc)->sk_family == PF_ATMSVC && - !test_bit(ATM_VF_META,&vcc->flags)) { - set_bit(ATM_VF_RELEASED,&vcc->flags); + !test_bit(ATM_VF_META, &vcc->flags)) { + set_bit(ATM_VF_RELEASED, &vcc->flags); + clear_bit(ATM_VF_REGIS, &vcc->flags); vcc_release_async(vcc, -EUNATCH); } } @@ -243,8 +244,7 @@ static void sigd_close(struct atm_vcc *vcc) sk_for_each(s, node, head) { struct atm_vcc *vcc = atm_sk(s); - if (vcc->dev) - purge_vcc(vcc); + purge_vcc(vcc); } } read_unlock(&vcc_sklist_lock); diff --git a/net/atm/svc.c b/net/atm/svc.c index 08e46052a3e4..d7b266136bf6 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -302,6 +302,7 @@ static int svc_listen(struct socket *sock,int backlog) error = -EINVAL; goto out; } + vcc_insert_socket(sk); set_bit(ATM_VF_WAITING, &vcc->flags); prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local); -- cgit v1.2.3 From 735631a9196db42631b8817892605ee72e13a58b Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Wed, 28 Sep 2005 16:35:22 -0700 Subject: [ATM]: fix bug in atm address list handling From: Martin Whitaker Signed-off-by: Chas Williams --- net/atm/addr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/atm/addr.c b/net/atm/addr.c index 1c8867f7f54a..a30d0bf48063 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -50,8 +50,10 @@ void atm_reset_addr(struct atm_dev *dev) struct atm_dev_addr *this, *p; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry_safe(this, p, &dev->local, entry) - kfree(this); + list_for_each_entry_safe(this, p, &dev->local, entry) { + list_del(&this->entry); + kfree(this); + } spin_unlock_irqrestore(&dev->lock, flags); notify_sigd(dev); } -- cgit v1.2.3 From 01d40f28b125e0a9aa0ec24642be67fc4c5dfaff Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Sep 2005 22:37:53 -0700 Subject: [NET]: Fix reversed logic in eth_type_trans(). I got the second compare_eth_addr() test reversed, oops. Signed-off-by: David S. Miller --- net/ethernet/eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 8b299cc82060..68a5ca866442 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -191,7 +191,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) */ else if(1 /*dev->flags&IFF_PROMISC*/) { - if (unlikely(!compare_eth_addr(eth->h_dest, dev->dev_addr))) + if (unlikely(compare_eth_addr(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } -- cgit v1.2.3 From 666002218d59db271e5c1ede1d80227170c51987 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 Sep 2005 22:32:57 +0100 Subject: [PATCH] proc_mkdir() should be used to create procfs directories A bunch of create_proc_dir_entry() calls creating directories had crept in since the last sweep; converted to proc_mkdir(). Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/char/drm/drm_drv.c | 2 +- drivers/char/drm/drm_proc.c | 2 +- drivers/isdn/divert/divert_procfs.c | 6 +++--- drivers/isdn/hardware/eicon/diva_didd.c | 6 ++---- drivers/isdn/hardware/eicon/divasproc.c | 2 +- drivers/isdn/hysdn/hysdn_procconf.c | 2 +- drivers/media/video/cpia.c | 2 +- drivers/net/ibmveth.c | 6 +++--- drivers/net/irda/vlsi_ir.c | 4 ++-- drivers/net/pppoe.c | 4 ++-- drivers/net/sk98lin/skge.c | 8 ++++---- drivers/scsi/sg.c | 3 +-- drivers/usb/media/vicam.c | 4 ++-- net/core/pktgen.c | 23 ++++++----------------- net/ieee80211/ieee80211_module.c | 2 +- 15 files changed, 31 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c index 6ba48f346fcf..041bb47b5c39 100644 --- a/drivers/char/drm/drm_drv.c +++ b/drivers/char/drm/drm_drv.c @@ -376,7 +376,7 @@ static int __init drm_core_init(void) goto err_p2; } - drm_proc_root = create_proc_entry("dri", S_IFDIR, NULL); + drm_proc_root = proc_mkdir("dri", NULL); if (!drm_proc_root) { DRM_ERROR("Cannot create /proc/dri\n"); ret = -1; diff --git a/drivers/char/drm/drm_proc.c b/drivers/char/drm/drm_proc.c index 32d2bb99462c..977961002488 100644 --- a/drivers/char/drm/drm_proc.c +++ b/drivers/char/drm/drm_proc.c @@ -95,7 +95,7 @@ int drm_proc_init(drm_device_t *dev, int minor, char name[64]; sprintf(name, "%d", minor); - *dev_root = create_proc_entry(name, S_IFDIR, root); + *dev_root = proc_mkdir(name, root); if (!*dev_root) { DRM_ERROR("Cannot create /proc/dri/%s\n", name); return -1; diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c index e1f0d87de0eb..0b0ea26023e5 100644 --- a/drivers/isdn/divert/divert_procfs.c +++ b/drivers/isdn/divert/divert_procfs.c @@ -287,12 +287,12 @@ divert_dev_init(void) init_waitqueue_head(&rd_queue); #ifdef CONFIG_PROC_FS - isdn_proc_entry = create_proc_entry("isdn", S_IFDIR | S_IRUGO | S_IXUGO, proc_net); + isdn_proc_entry = proc_mkdir("net/isdn", NULL); if (!isdn_proc_entry) return (-1); isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry); if (!isdn_divert_entry) { - remove_proc_entry("isdn", proc_net); + remove_proc_entry("net/isdn", NULL); return (-1); } isdn_divert_entry->proc_fops = &isdn_fops; @@ -312,7 +312,7 @@ divert_dev_deinit(void) #ifdef CONFIG_PROC_FS remove_proc_entry("divert", isdn_proc_entry); - remove_proc_entry("isdn", proc_net); + remove_proc_entry("net/isdn", NULL); #endif /* CONFIG_PROC_FS */ return (0); diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c index 7fdf8ae5be52..27204f4b111a 100644 --- a/drivers/isdn/hardware/eicon/diva_didd.c +++ b/drivers/isdn/hardware/eicon/diva_didd.c @@ -30,8 +30,6 @@ static char *DRIVERNAME = static char *DRIVERLNAME = "divadidd"; char *DRIVERRELEASE_DIDD = "2.0"; -static char *main_proc_dir = "eicon"; - MODULE_DESCRIPTION("DIDD table driver for diva drivers"); MODULE_AUTHOR("Cytronics & Melware, Eicon Networks"); MODULE_SUPPORTED_DEVICE("Eicon diva drivers"); @@ -89,7 +87,7 @@ proc_read(char *page, char **start, off_t off, int count, int *eof, static int DIVA_INIT_FUNCTION create_proc(void) { - proc_net_eicon = create_proc_entry(main_proc_dir, S_IFDIR, proc_net); + proc_net_eicon = proc_mkdir("net/eicon", NULL); if (proc_net_eicon) { if ((proc_didd = @@ -105,7 +103,7 @@ static int DIVA_INIT_FUNCTION create_proc(void) static void DIVA_EXIT_FUNCTION remove_proc(void) { remove_proc_entry(DRIVERLNAME, proc_net_eicon); - remove_proc_entry(main_proc_dir, proc_net); + remove_proc_entry("net/eicon", NULL); } static int DIVA_INIT_FUNCTION divadidd_init(void) diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c index b6435589d459..c12efa6f8429 100644 --- a/drivers/isdn/hardware/eicon/divasproc.c +++ b/drivers/isdn/hardware/eicon/divasproc.c @@ -381,7 +381,7 @@ int create_adapter_proc(diva_os_xdi_adapter_t * a) char tmp[16]; sprintf(tmp, "%s%d", adapter_dir_name, a->controller); - if (!(de = create_proc_entry(tmp, S_IFDIR, proc_net_eicon))) + if (!(de = proc_mkdir(tmp, proc_net_eicon))) return (0); a->proc_adapter_dir = (void *) de; diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 5da507e532fc..639582f61f41 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -394,7 +394,7 @@ hysdn_procconf_init(void) hysdn_card *card; uchar conf_name[20]; - hysdn_proc_entry = create_proc_entry(PROC_SUBDIR_NAME, S_IFDIR | S_IRUGO | S_IXUGO, proc_net); + hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net); if (!hysdn_proc_entry) { printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n"); return (-1); diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c index 8c08b7f1ad23..b7ec9bf45085 100644 --- a/drivers/media/video/cpia.c +++ b/drivers/media/video/cpia.c @@ -1397,7 +1397,7 @@ static void destroy_proc_cpia_cam(struct cam_data *cam) static void proc_cpia_create(void) { - cpia_proc_root = create_proc_entry("cpia", S_IFDIR, NULL); + cpia_proc_root = proc_mkdir("cpia", NULL); if (cpia_proc_root) cpia_proc_root->owner = THIS_MODULE; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 32d5fabd4b10..a2c4dd4fb221 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -99,7 +99,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance, struct pt_regs static inline void ibmveth_schedule_replenishing(struct ibmveth_adapter*); #ifdef CONFIG_PROC_FS -#define IBMVETH_PROC_DIR "ibmveth" +#define IBMVETH_PROC_DIR "net/ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; #endif @@ -1010,7 +1010,7 @@ static int __devexit ibmveth_remove(struct vio_dev *dev) #ifdef CONFIG_PROC_FS static void ibmveth_proc_register_driver(void) { - ibmveth_proc_dir = create_proc_entry(IBMVETH_PROC_DIR, S_IFDIR, proc_net); + ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL); if (ibmveth_proc_dir) { SET_MODULE_OWNER(ibmveth_proc_dir); } @@ -1018,7 +1018,7 @@ static void ibmveth_proc_register_driver(void) static void ibmveth_proc_unregister_driver(void) { - remove_proc_entry(IBMVETH_PROC_DIR, proc_net); + remove_proc_entry(IBMVETH_PROC_DIR, NULL); } static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index 6d9de626c967..651c5a6578fd 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -1875,11 +1875,11 @@ static int __init vlsi_mod_init(void) sirpulse = !!sirpulse; - /* create_proc_entry returns NULL if !CONFIG_PROC_FS. + /* proc_mkdir returns NULL if !CONFIG_PROC_FS. * Failure to create the procfs entry is handled like running * without procfs - it's not required for the driver to work. */ - vlsi_proc_root = create_proc_entry(PROC_DIR, S_IFDIR, NULL); + vlsi_proc_root = proc_mkdir(PROC_DIR, NULL); if (vlsi_proc_root) { /* protect registered procdir against module removal. * Because we are in the module init path there's no race diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 82f236cc3b9b..a842ecc60a34 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -1070,7 +1070,7 @@ static int __init pppoe_proc_init(void) { struct proc_dir_entry *p; - p = create_proc_entry("pppoe", S_IRUGO, proc_net); + p = create_proc_entry("net/pppoe", S_IRUGO, NULL); if (!p) return -ENOMEM; @@ -1142,7 +1142,7 @@ static void __exit pppoe_exit(void) dev_remove_pack(&pppoes_ptype); dev_remove_pack(&pppoed_ptype); unregister_netdevice_notifier(&pppoe_notifier); - remove_proc_entry("pppoe", proc_net); + remove_proc_entry("net/pppoe", NULL); proto_unregister(&pppoe_sk_proto); } diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index 2e72d79a143c..b18c92cb629e 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -235,7 +235,7 @@ static int SkDrvDeInitAdapter(SK_AC *pAC, int devNbr); * Extern Function Prototypes * ******************************************************************************/ -static const char SKRootName[] = "sk98lin"; +static const char SKRootName[] = "net/sk98lin"; static struct proc_dir_entry *pSkRootDir; extern struct file_operations sk_proc_fops; @@ -5242,20 +5242,20 @@ static int __init skge_init(void) { int error; - pSkRootDir = proc_mkdir(SKRootName, proc_net); + pSkRootDir = proc_mkdir(SKRootName, NULL); if (pSkRootDir) pSkRootDir->owner = THIS_MODULE; error = pci_register_driver(&skge_driver); if (error) - proc_net_remove(SKRootName); + remove_proc_entry(SKRootName, NULL); return error; } static void __exit skge_exit(void) { pci_unregister_driver(&skge_driver); - proc_net_remove(SKRootName); + remove_proc_entry(SKRootName, NULL); } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4d09a6e4dd2e..ad94367df430 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2849,8 +2849,7 @@ sg_proc_init(void) struct proc_dir_entry *pdep; struct sg_proc_leaf * leaf; - sg_proc_sgp = create_proc_entry(sg_proc_sg_dirname, - S_IFDIR | S_IRUGO | S_IXUGO, NULL); + sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL); if (!sg_proc_sgp) return 1; for (k = 0; k < num_leaves; ++k) { diff --git a/drivers/usb/media/vicam.c b/drivers/usb/media/vicam.c index 4a5857c53f11..0bc0b1247a6b 100644 --- a/drivers/usb/media/vicam.c +++ b/drivers/usb/media/vicam.c @@ -1148,7 +1148,7 @@ vicam_write_proc_gain(struct file *file, const char *buffer, static void vicam_create_proc_root(void) { - vicam_proc_root = create_proc_entry("video/vicam", S_IFDIR, 0); + vicam_proc_root = proc_mkdir("video/vicam", NULL); if (vicam_proc_root) vicam_proc_root->owner = THIS_MODULE; @@ -1181,7 +1181,7 @@ vicam_create_proc_entry(struct vicam_camera *cam) sprintf(name, "video%d", cam->vdev.minor); - cam->proc_dir = create_proc_entry(name, S_IFDIR, vicam_proc_root); + cam->proc_dir = proc_mkdir(name, vicam_proc_root); if ( !cam->proc_dir ) return; // FIXME: We should probably return an error here diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ef430b1e8e42..b7f2d65a614f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -186,7 +186,7 @@ /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" +#define PG_PROC_DIR "net/pktgen" #define MAX_CFLOWS 65536 @@ -1476,18 +1476,7 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, static int create_proc_dir(void) { - int len; - /* does proc_dir already exists */ - len = strlen(PG_PROC_DIR); - - for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) { - if ((pg_proc_dir->namelen == len) && - (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) - break; - } - - if (!pg_proc_dir) - pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, NULL); if (!pg_proc_dir) return -ENODEV; @@ -1497,7 +1486,7 @@ static int create_proc_dir(void) static int remove_proc_dir(void) { - remove_proc_entry(PG_PROC_DIR, proc_net); + remove_proc_entry(PG_PROC_DIR, NULL); return 0; } @@ -2908,7 +2897,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) pkt_dev->udp_dst_max = 9; strncpy(pkt_dev->ifname, ifname, 31); - sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname); + sprintf(pkt_dev->fname, "%s/%s", PG_PROC_DIR, ifname); if (! pktgen_setup_dev(pkt_dev)) { printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); @@ -2981,7 +2970,7 @@ static int pktgen_create_thread(const char* name, int cpu) spin_lock_init(&t->if_lock); t->cpu = cpu; - sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name); + sprintf(t->fname, "%s/%s", PG_PROC_DIR, t->name); t->proc_ent = create_proc_entry(t->fname, 0600, NULL); if (!t->proc_ent) { printk("pktgen: cannot create %s procfs entry.\n", t->fname); @@ -3064,7 +3053,7 @@ static int __init pg_init(void) create_proc_dir(); - sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR); + sprintf(module_fname, "%s/pgctrl", PG_PROC_DIR); module_proc_ent = create_proc_entry(module_fname, 0600, NULL); if (!module_proc_ent) { printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 03a47343ddc7..6059e9e37123 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -230,7 +230,7 @@ static int __init ieee80211_init(void) struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = create_proc_entry(DRV_NAME, S_IFDIR, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); -- cgit v1.2.3 From 01ff367e62f0474e4d39aa5812cbe2a30d96e1e9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 29 Sep 2005 17:07:20 -0700 Subject: [TCP]: Revert 6b251858d377196b8cea20e65cae60f584a42735 But retain the comment fix. Alexey Kuznetsov has explained the situation as follows: -------------------- I think the fix is incorrect. Look, the RFC function init_cwnd(mss) is not continuous: f.e. for mss=1095 it needs initial window 1095*4, but for mss=1096 it is 1096*3. We do not know exactly what mss sender used for calculations. If we advertised 1096 (and calculate initial window 3*1096), the sender could limit it to some value < 1096 and then it will need window his_mss*4 > 3*1096 to send initial burst. See? So, the honest function for inital rcv_wnd derived from tcp_init_cwnd() is: init_rcv_wnd(mss)= min { init_cwnd(mss1)*mss1 for mss1 <= mss } It is something sort of: if (mss < 1096) return mss*4; if (mss < 1096*2) return 1096*4; return mss*2; (I just scrablled a graph of piece of paper, it is difficult to see or to explain without this) I selected it differently giving more window than it is strictly required. Initial receive window must be large enough to allow sender following to the rfc (or just setting initial cwnd to 2) to send initial burst. But besides that it is arbitrary, so I decided to give slack space of one segment. Actually, the logic was: If mss is low/normal (<=ethernet), set window to receive more than initial burst allowed by rfc under the worst conditions i.e. mss*4. This gives slack space of 1 segment for ethernet frames. For msses slighlty more than ethernet frame, take 3. Try to give slack space of 1 frame again. If mss is huge, force 2*mss. No slack space. Value 1460*3 is really confusing. Minimal one is 1096*2, but besides that it is an arbitrary value. It was meant to be ~4096. 1460*3 is just the magic number from RFC, 1460*3 = 1095*4 is the magic :-), so that I guess hands typed this themselves. -------------------- Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index caf2e2cff293..c5b911f9b662 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -194,12 +194,11 @@ void tcp_select_initial_window(int __space, __u32 mss, * will be satisfied with 2. */ if (mss > (1<<*rcv_wscale)) { - int init_cwnd; - - if (mss > 1460) + int init_cwnd = 4; + if (mss > 1460*3) init_cwnd = 2; - else - init_cwnd = (mss > 1095) ? 3 : 4; + else if (mss > 1460) + init_cwnd = 3; if (*rcv_wnd > init_cwnd*mss) *rcv_wnd = init_cwnd*mss; } -- cgit v1.2.3 From 09e9ec87111ba818d8171262b15ba4c357eb1d27 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 29 Sep 2005 17:17:15 -0700 Subject: [TCP]: Don't over-clamp window in tcp_clamp_window() From: Alexey Kuznetsov Handle better the case where the sender sends full sized frames initially, then moves to a mode where it trickles out small amounts of data at a time. This known problem is even mentioned in the comments above tcp_grow_window() in tcp_input.c, specifically: ... * The scheme does not work when sender sends good segments opening * window and then starts to feed us spagetti. But it should work * in common situations. Otherwise, we have to rely on queue collapsing. ... When the sender gives full sized frames, the "struct sk_buff" overhead from each packet is small. So we'll advertize a larger window. If the sender moves to a mode where small segments are sent, this ratio becomes tilted to the other extreme and we start overrunning the socket buffer space. tcp_clamp_window() tries to address this, but it's clamping of tp->window_clamp is a wee bit too aggressive for this particular case. Fix confirmed by Ion Badulescu. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7537c7bbd06..677419d0c9ad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -355,8 +355,6 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); - if (!ofo_win) - tp->window_clamp = min(tp->window_clamp, app_win); tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); } } -- cgit v1.2.3 From 4a7097fcc431ab2ccf509d8342831873138c814a Mon Sep 17 00:00:00 2001 From: Scott Talbert Date: Thu, 29 Sep 2005 17:30:54 -0700 Subject: [ATM]: [lec] attempt to support cisco failover From: Scott Talbert Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/lec.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index a0752487026d..47e1eae97461 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -686,9 +686,19 @@ static unsigned char lec_ctrl_magic[] = { 0x01, 0x01 }; +#define LEC_DATA_DIRECT_8023 2 +#define LEC_DATA_DIRECT_8025 3 + +static int lec_is_data_direct(struct atm_vcc *vcc) +{ + return ((vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8023) || + (vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8025)); +} + static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { + unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; struct lec_priv *priv = (struct lec_priv *)dev->priv; @@ -728,7 +738,8 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); } else { /* Data frame, queue to protocol handlers */ - unsigned char *dst; + struct lec_arp_table *entry; + unsigned char *src, *dst; atm_return(vcc,skb->truesize); if (*(uint16_t *)skb->data == htons(priv->lecid) || @@ -741,10 +752,30 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) return; } #ifdef CONFIG_TR - if (priv->is_trdev) dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest; + if (priv->is_trdev) + dst = ((struct lecdatahdr_8025 *) skb->data)->h_dest; else #endif - dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest; + dst = ((struct lecdatahdr_8023 *) skb->data)->h_dest; + + /* If this is a Data Direct VCC, and the VCC does not match + * the LE_ARP cache entry, delete the LE_ARP cache entry. + */ + spin_lock_irqsave(&priv->lec_arp_lock, flags); + if (lec_is_data_direct(vcc)) { +#ifdef CONFIG_TR + if (priv->is_trdev) + src = ((struct lecdatahdr_8025 *) skb->data)->h_source; + else +#endif + src = ((struct lecdatahdr_8023 *) skb->data)->h_source; + entry = lec_arp_find(priv, src); + if (entry && entry->vcc != vcc) { + lec_arp_remove(priv, entry); + kfree(entry); + } + } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); if (!(dst[0]&0x01) && /* Never filter Multi/Broadcast */ !priv->is_proxy && /* Proxy wants all the packets */ -- cgit v1.2.3 From 75b895c15b3ea2a3cd5c8e8f3c62e4598ef4d2ba Mon Sep 17 00:00:00 2001 From: Scott Talbert Date: Thu, 29 Sep 2005 17:31:30 -0700 Subject: [ATM]: [lec] reset retry counter when new arp issued From: Scott Talbert Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/lec.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index 47e1eae97461..ad840b9afba8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -2021,6 +2021,12 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, found = entry->vcc; goto out; } + /* If the LE_ARP cache entry is still pending, reset count to 0 + * so another LE_ARP request can be made for this frame. + */ + if (entry->status == ESI_ARP_PENDING) { + entry->no_tries = 0; + } /* Data direct VC not yet set up, check to see if the unknown frame count is greater than the limit. If the limit has not been reached, allow the caller to send packet to -- cgit v1.2.3 From 325ed8239309cb29f10ea58c5a668058ead11479 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 13:57:23 -0700 Subject: [NET]: Fix packet timestamping. I've found the problem in general. It affects any 64-bit architecture. The problem occurs when you change the system time. Suppose that when you boot your system clock is forward by a day. This gets recorded down in skb_tv_base. You then wind the clock back by a day. From that point onwards the offset will be negative which essentially overflows the 32-bit variables they're stored in. In fact, why don't we just store the real time stamp in those 32-bit variables? After all, we're not going to overflow for quite a while yet. When we do overflow, we'll need a better solution of course. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 +++--------- net/core/skbuff.c | 5 ----- net/ipv4/netfilter/ip_queue.c | 4 ++-- net/ipv4/netfilter/ipt_ULOG.c | 4 ++-- net/ipv6/netfilter/ip6_queue.c | 4 ++-- net/netfilter/nfnetlink_log.c | 4 ++-- net/netfilter/nfnetlink_queue.c | 4 ++-- net/packet/af_packet.c | 4 ++-- 8 files changed, 15 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2741c0c55e83..466c879f82b8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,8 +155,6 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) -extern struct timeval skb_tv_base; - struct skb_timeval { u32 off_sec; u32 off_usec; @@ -175,7 +173,7 @@ enum { * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @tstamp: Time we arrived stored as offset to skb_tv_base + * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -1255,10 +1253,6 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * { stamp->tv_sec = skb->tstamp.off_sec; stamp->tv_usec = skb->tstamp.off_usec; - if (skb->tstamp.off_sec) { - stamp->tv_sec += skb_tv_base.tv_sec; - stamp->tv_usec += skb_tv_base.tv_usec; - } } /** @@ -1272,8 +1266,8 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * */ static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) { - skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; - skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; + skb->tstamp.off_sec = stamp->tv_sec; + skb->tstamp.off_usec = stamp->tv_usec; } extern void __net_timestamp(struct sk_buff *skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f80a28785610..0e9431b59fb2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -71,8 +71,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; -struct timeval __read_mostly skb_tv_base; - /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -1708,8 +1706,6 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_fclone_cache) panic("cannot create skbuff cache"); - - do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1743,4 +1739,3 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); -EXPORT_SYMBOL(skb_tv_base); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d54f14d926f6..36339eb39e17 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index e2c14f3cb2fc..2883ccd8a91d 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum, /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + pm->timestamp_sec = skb->tstamp.off_sec; + pm->timestamp_usec = skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index aa11cf366efa..5027bbe6415e 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ff5601ceedcb..efcd10f996ba 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -494,8 +494,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->tstamp.off_sec) { struct nfulnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); + ts.sec = cpu_to_be64(skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb->tstamp.off_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f81fe8c52e99..eaa44c49567b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -492,8 +492,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); + ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec); + ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6a67a87384cc..499ae3df4a44 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -654,8 +654,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + h->tp_sec = skb->tstamp.off_sec; + h->tp_usec = skb->tstamp.off_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; -- cgit v1.2.3 From 81c3d5470ecc70564eb9209946730fe2be93ad06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Oct 2005 14:13:38 -0700 Subject: [INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +-- include/linux/tc_ematch/tc_em_meta.h | 2 +- include/net/inet6_hashtables.h | 21 ++++++------ include/net/inet_hashtables.h | 64 +++++++++++++++++++++--------------- include/net/inet_timewait_sock.h | 2 +- include/net/sock.h | 5 +-- net/atm/common.c | 2 +- net/dccp/ipv4.c | 12 +++---- net/ipv4/inet_timewait_sock.c | 6 ++-- net/ipv4/tcp_ipv4.c | 11 ++++--- net/ipv6/tcp_ipv6.c | 18 +++++----- net/sched/em_meta.c | 6 ++-- 12 files changed, 85 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bb6f88e14061..e0b922785d98 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -372,8 +372,9 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ +#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 081b1ee8516e..e21937cf91d0 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -71,7 +71,7 @@ enum TCF_META_ID_SK_SNDBUF, TCF_META_ID_SK_ALLOCS, TCF_META_ID_SK_ROUTE_CAPS, - TCF_META_ID_SK_HASHENT, + TCF_META_ID_SK_HASH, TCF_META_ID_SK_LINGERTIME, TCF_META_ID_SK_ACK_BACKLOG, TCF_META_ID_SK_MAX_ACK_BACKLOG, diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 03df3b157960..5a2beed5a770 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -26,19 +26,18 @@ struct inet_hashinfo; /* I have no idea if this is a good hash for v6 or not. -DaveM */ -static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const u16 fport, - const int ehash_size) +static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport) { - int hashent = (lport ^ fport); + unsigned int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent >> 16; hashent ^= hashent >> 8; - return (hashent & (ehash_size - 1)); + return hashent; } -static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) +static inline int inet6_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ipv6_pinfo *np = inet6_sk(sk); @@ -46,7 +45,7 @@ static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) const struct in6_addr *faddr = &np->daddr; const __u16 lport = inet->num; const __u16 fport = inet->dport; - return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); + return inet6_ehashfn(laddr, lport, faddr, fport); } /* @@ -69,14 +68,14 @@ static inline struct sock * /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, - hashinfo->ehash_size); - struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 646b6ea7fe26..35f49e65e295 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,7 +108,7 @@ struct inet_hashinfo { struct inet_bind_hashbucket *bhash; int bhash_size; - int ehash_size; + unsigned int ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here @@ -130,17 +130,16 @@ struct inet_hashinfo { int port_rover; }; -static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, - const __u32 faddr, const __u16 fport, - const int ehash_size) +static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport) { - int h = (laddr ^ lport) ^ (faddr ^ fport); + unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); h ^= h >> 16; h ^= h >> 8; - return h & (ehash_size - 1); + return h; } -static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) +static inline int inet_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const __u32 laddr = inet->rcv_saddr; @@ -148,7 +147,14 @@ static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) const __u32 faddr = inet->daddr; const __u16 fport = inet->dport; - return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); + return inet_ehashfn(laddr, lport, faddr, fport); +} + +static inline struct inet_ehash_bucket *inet_ehash_bucket( + struct inet_hashinfo *hashinfo, + unsigned int hash) +{ + return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } extern struct inet_bind_bucket * @@ -235,9 +241,11 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); } else { - sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); - list = &hashinfo->ehash[sk->sk_hashent].chain; - lock = &hashinfo->ehash[sk->sk_hashent].lock; + struct inet_ehash_bucket *head; + sk->sk_hash = inet_sk_ehashfn(sk); + head = inet_ehash_bucket(hashinfo, sk->sk_hash); + list = &head->chain; + lock = &head->lock; write_lock(lock); } __sk_add_node(sk, list); @@ -268,9 +276,8 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; } else { - struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; - lock = &head->lock; - write_lock_bh(&head->lock); + lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; + write_lock_bh(lock); } if (__sk_del_node_init(sk)) @@ -337,23 +344,27 @@ sherry_cache: #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ +#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) -#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_sk(__sk)->daddr == (__saddr)) && \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && \ + (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ +#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && \ + (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) @@ -378,18 +389,19 @@ static inline struct sock * /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); - struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { - if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) goto hit; } sk = NULL; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 3b070352e869..4ade56ef3a4d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -112,6 +112,7 @@ struct inet_timewait_sock { #define tw_node __tw_common.skc_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt +#define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ @@ -126,7 +127,6 @@ struct inet_timewait_sock { /* And these are ours. */ __u8 tw_ipv6only:1; /* 31 bits hole, try to pack */ - int tw_hashent; int tw_timeout; unsigned long tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/include/net/sock.h b/include/net/sock.h index 8c48fbecb7cf..b6440805c420 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -99,6 +99,7 @@ struct proto; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header @@ -112,6 +113,7 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; @@ -139,7 +141,6 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used @@ -186,6 +187,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, @@ -208,7 +210,6 @@ struct sock { unsigned int sk_allocation; int sk_sndbuf; int sk_route_caps; - int sk_hashent; unsigned long sk_flags; unsigned long sk_lingertime; /* diff --git a/net/atm/common.c b/net/atm/common.c index 801a5813ec60..63feea49fb13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -46,7 +46,7 @@ static void __vcc_insert_socket(struct sock *sk) struct atm_vcc *vcc = atm_sk(sk); struct hlist_head *head = &vcc_hash[vcc->vci & (VCC_HTABLE_SIZE - 1)]; - sk->sk_hashent = vcc->vci & (VCC_HTABLE_SIZE - 1); + sk->sk_hash = vcc->vci & (VCC_HTABLE_SIZE - 1); sk_add_node(sk, head); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 40fe6afacde6..ae088d1347af 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,27 +62,27 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, const int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, - dccp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); const struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } tw = NULL; /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -90,7 +90,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4d1502a49852..f9076ef3a1a8 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); write_lock(&ehead->lock); if (hlist_unhashed(&tw->tw_node)) { @@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -106,7 +106,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_dport = inet->dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; - tw->tw_hashent = sk->sk_hashent; + tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; atomic_set(&tw->tw_refcnt, 1); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13dfb391cdf1..c85819d8474b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -130,19 +130,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -179,7 +180,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -188,7 +189,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 80643e6b346b..d693cb988b78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -209,9 +209,11 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) lock = &tcp_hashinfo.lhash_lock; inet_listen_wlock(&tcp_hashinfo); } else { - sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); - list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; - lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; + unsigned int hash; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); + hash &= (tcp_hashinfo.ehash_size - 1); + list = &tcp_hashinfo.ehash[hash].chain; + lock = &tcp_hashinfo.ehash[hash].lock; write_lock(lock); } @@ -322,13 +324,13 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, - tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ @@ -365,14 +367,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) goto not_unique; } unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); - sk->sk_hashent = hash; + sk->sk_hash = hash; sock_prot_inc_use(sk->sk_prot); write_unlock(&head->lock); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 00eae5f9a01a..cf68a59fdc5a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -393,10 +393,10 @@ META_COLLECTOR(int_sk_route_caps) dst->value = skb->sk->sk_route_caps; } -META_COLLECTOR(int_sk_hashent) +META_COLLECTOR(int_sk_hash) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_hashent; + dst->value = skb->sk->sk_hash; } META_COLLECTOR(int_sk_lingertime) @@ -515,7 +515,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), - [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent), + [META_ID(SK_HASH)] = META_FUNC(int_sk_hash), [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), -- cgit v1.2.3 From 496a22b08fa326bf17c11eb900e0505aa9da3506 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 3 Oct 2005 14:16:34 -0700 Subject: [NET]: Fix "sysctl_net.c:36: error: 'core_table' undeclared here" During the build for ARM machine type "fortunet", this error occurred: CC net/sysctl_net.o net/sysctl_net.c:36: error: 'core_table' undeclared here (not in a function) It appears that the following configuration settings cause this error due to a missing include: CONFIG_SYSCTL=y CONFIG_NET=y # CONFIG_INET is not set core_table appears to be declared in net/sock.h. if CONFIG_INET were defined, net/sock.h would have been included via: sysctl_net.c -> net/ip.h -> linux/ip.h -> net/sock.h so include it directly. Signed-off-by: Russell King Signed-off-by: David S. Miller --- net/sysctl_net.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sysctl_net.c b/net/sysctl_net.c index c5241fcbb966..55538f6b60ff 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -16,6 +16,8 @@ #include #include +#include + #ifdef CONFIG_INET #include #endif -- cgit v1.2.3 From 444fc8fc3a1f926fa224655b8950bd853368c1a3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 14:18:10 -0700 Subject: [IPV4]: Fix "Proxy ARP seems broken" Meelis Roos wrote: > RK> My firewall setup relies on proxyarp working. However, with 2.6.14-rc3, > RK> it appears to be completely broken. The firewall is 212.18.232.186, > > Same here with some kernel between 14-rc2 and 14-rc3 - no reposnse to > ARP on a proxyarp gateway. Sorry, no exact revison and no more debugging > yet since it'a a production gateway. The breakage is caused by the change to use the CB area for flagging whether a packet has been queued due to proxy_delay. This area gets cleared every time arp_rcv gets called. Unfortunately packets delayed due to proxy_delay also go through arp_rcv when they are reprocessed. In fact, I can't think of a reason why delayed proxy packets should go through netfilter again at all. So the easiest solution is to bypass that and go straight to arp_process. This is essentially what would've happened before netfilter support was added to ARP. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/arp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8bf312bdea13..ec0e36893b01 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -697,12 +697,6 @@ void arp_send(int type, int ptype, u32 dest_ip, arp_xmit(skb); } -static void parp_redo(struct sk_buff *skb) -{ - nf_reset(skb); - arp_rcv(skb, skb->dev, NULL, skb->dev); -} - /* * Process an arp request. */ @@ -922,6 +916,11 @@ out: return 0; } +static void parp_redo(struct sk_buff *skb) +{ + arp_process(skb); +} + /* * Receive an arp request from the device layer. -- cgit v1.2.3 From f36d6ab182a5c68e92ea3e85821dde9d29bfe284 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 3 Oct 2005 14:19:15 -0700 Subject: [IPV6]: Fix ipv6 fragment ID selection at slow path Signed-Off-By: Yan Zheng Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2f589f24c093..563b442ffab8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -666,7 +666,7 @@ slow_path: */ fh->nexthdr = nexthdr; fh->reserved = 0; - if (frag_id) { + if (!frag_id) { ipv6_select_ident(skb, fh); frag_id = fh->identification; } else -- cgit v1.2.3 From a5e7c210fefd2454c757a3542e41063407ca7108 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Oct 2005 14:21:58 -0700 Subject: [IPV6]: Fix leak added by udp connect dst caching fix. Based upon a patch from Mitsuru KANDA Signed-off-by: David S. Miller --- net/ipv6/udp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6001948600f3..e4cad11f284a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -852,10 +852,16 @@ do_append_data: else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); - if (dst && connected) - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + if (dst) { + if (connected) { + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? + &np->daddr : NULL); + } else { + dst_release(dst); + } + } + if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); -- cgit v1.2.3 From e5ed639913eea3e4783a550291775ab78dd84966 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 14:35:55 -0700 Subject: [IPV4]: Replace __in_dev_get with __in_dev_get_rcu/rtnl The following patch renames __in_dev_get() to __in_dev_get_rtnl() and introduces __in_dev_get_rcu() to cover the second case. 1) RCU with refcnt should use in_dev_get(). 2) RCU without refcnt should use __in_dev_get_rcu(). 3) All others must hold RTNL and use __in_dev_get_rtnl(). There is one exception in net/ipv4/route.c which is in fact a pre-existing race condition. I've marked it as such so that we remember to fix it. This patch is based on suggestions and prior work by Suzanne Wood and Paul McKenney. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/wan/sdlamain.c | 23 ++++++++++++++--------- drivers/net/wan/syncppp.c | 2 +- drivers/net/wireless/strip.c | 4 ++-- drivers/parisc/led.c | 5 ++++- drivers/s390/net/qeth_main.c | 4 ++-- include/linux/inetdevice.h | 12 ++++++++++-- net/atm/clip.c | 2 +- net/core/netpoll.c | 2 +- net/core/pktgen.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 10 +++++----- net/ipv4/devinet.c | 22 +++++++++++----------- net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/igmp.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ipmr.c | 6 +++--- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 2 +- net/ipv4/netfilter/ipt_REDIRECT.c | 2 +- net/ipv4/route.c | 6 ++++-- net/ipv6/addrconf.c | 2 +- net/irda/irlan/irlan_eth.c | 2 +- net/sctp/protocol.c | 2 +- 24 files changed, 73 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6d00c3de1a83..bf81cd45e4d4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2776,7 +2776,7 @@ static u32 bond_glean_dev_ip(struct net_device *dev) return 0; rcu_read_lock(); - idev = __in_dev_get(dev); + idev = __in_dev_get_rcu(dev); if (!idev) goto out; diff --git a/drivers/net/wan/sdlamain.c b/drivers/net/wan/sdlamain.c index 74e151acef3e..7a8b22a7ea31 100644 --- a/drivers/net/wan/sdlamain.c +++ b/drivers/net/wan/sdlamain.c @@ -57,6 +57,7 @@ #include /* request_region(), release_region() */ #include /* WAN router definitions */ #include /* WANPIPE common user API definitions */ +#include #include #include /* phys_to_virt() */ @@ -1268,37 +1269,41 @@ unsigned long get_ip_address(struct net_device *dev, int option) struct in_ifaddr *ifaddr; struct in_device *in_dev; + unsigned long addr = 0; - if ((in_dev = __in_dev_get(dev)) == NULL){ - return 0; + rcu_read_lock(); + if ((in_dev = __in_dev_get_rcu(dev)) == NULL){ + goto out; } if ((ifaddr = in_dev->ifa_list)== NULL ){ - return 0; + goto out; } switch (option){ case WAN_LOCAL_IP: - return ifaddr->ifa_local; + addr = ifaddr->ifa_local; break; case WAN_POINTOPOINT_IP: - return ifaddr->ifa_address; + addr = ifaddr->ifa_address; break; case WAN_NETMASK_IP: - return ifaddr->ifa_mask; + addr = ifaddr->ifa_mask; break; case WAN_BROADCAST_IP: - return ifaddr->ifa_broadcast; + addr = ifaddr->ifa_broadcast; break; default: - return 0; + break; } - return 0; +out: + rcu_read_unlock(); + return addr; } void add_gateway(sdla_t *card, struct net_device *dev) diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index b56a7b516d24..a6d3b55013a5 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -769,7 +769,7 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb) u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */ #ifdef CONFIG_INET rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) != NULL) + if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { for (ifa=in_dev->ifa_list; ifa != NULL; ifa=ifa->ifa_next) { diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 4b0acae22b0d..7bc7fc823128 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -1352,7 +1352,7 @@ static unsigned char *strip_make_packet(unsigned char *buffer, struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(strip_info->dev); + in_dev = __in_dev_get_rcu(strip_info->dev); if (in_dev == NULL) { rcu_read_unlock(); return NULL; @@ -1508,7 +1508,7 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb) brd = addr = 0; rcu_read_lock(); - in_dev = __in_dev_get(strip_info->dev); + in_dev = __in_dev_get_rcu(strip_info->dev); if (in_dev) { if (in_dev->ifa_list) { brd = in_dev->ifa_list->ifa_broadcast; diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index e90fb72a6962..286902298e33 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -358,9 +359,10 @@ static __inline__ int led_get_net_activity(void) /* we are running as tasklet, so locking dev_base * for reading should be OK */ read_lock(&dev_base_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { struct net_device_stats *stats; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) continue; if (LOOPBACK(in_dev->ifa_list->ifa_local)) @@ -371,6 +373,7 @@ static __inline__ int led_get_net_activity(void) rx_total += stats->rx_packets; tx_total += stats->tx_packets; } + rcu_read_unlock(); read_unlock(&dev_base_lock); retval = 0; diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 86582cf1e19e..71de834ece1a 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -5200,7 +5200,7 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid) if (!card->vlangrp) return; rcu_read_lock(); - in_dev = __in_dev_get(card->vlangrp->vlan_devices[vid]); + in_dev = __in_dev_get_rcu(card->vlangrp->vlan_devices[vid]); if (!in_dev) goto out; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { @@ -7725,7 +7725,7 @@ qeth_arp_constructor(struct neighbour *neigh) goto out; rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 7e1e15f934f3..fd7af86151b1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -142,13 +142,21 @@ static __inline__ int bad_mask(u32 mask, u32 addr) #define endfor_ifa(in_dev) } +static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) +{ + struct in_device *in_dev = dev->ip_ptr; + if (in_dev) + in_dev = rcu_dereference(in_dev); + return in_dev; +} + static __inline__ struct in_device * in_dev_get(const struct net_device *dev) { struct in_device *in_dev; rcu_read_lock(); - in_dev = dev->ip_ptr; + in_dev = __in_dev_get_rcu(dev); if (in_dev) atomic_inc(&in_dev->refcnt); rcu_read_unlock(); @@ -156,7 +164,7 @@ in_dev_get(const struct net_device *dev) } static __inline__ struct in_device * -__in_dev_get(const struct net_device *dev) +__in_dev_get_rtnl(const struct net_device *dev) { return (struct in_device*)dev->ip_ptr; } diff --git a/net/atm/clip.c b/net/atm/clip.c index 28dab55a4387..4f54c9a5e84a 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -310,7 +310,7 @@ static int clip_constructor(struct neighbour *neigh) if (neigh->type != RTN_UNICAST) return -EINVAL; rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) { rcu_read_unlock(); return -EINVAL; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5265dfd69928..802fe11efad0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -703,7 +703,7 @@ int netpoll_setup(struct netpoll *np) if (!np->local_ip) { rcu_read_lock(); - in_dev = __in_dev_get(ndev); + in_dev = __in_dev_get_rcu(ndev); if (!in_dev || !in_dev->ifa_list) { rcu_read_unlock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b7f2d65a614f..44de070b6045 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1667,7 +1667,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(pkt_dev->odev); + in_dev = __in_dev_get_rcu(pkt_dev->odev); if (in_dev) { if (in_dev->ifa_list) { pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 4a62093eb343..34fdac51df96 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -406,7 +406,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned long network = 0; rcu_read_lock(); - idev = __in_dev_get(dev); + idev = __in_dev_get_rcu(dev); if (idev) { if (idev->ifa_list) network = ntohl(idev->ifa_list->ifa_address) & diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ec0e36893b01..b425748f02d7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->type = inet_addr_type(addr); rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; @@ -989,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 1; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 1; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1; return 0; } return -ENXIO; @@ -1095,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 0; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 0; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0; return 0; } return -ENXIO; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ba2895ae8151..74f2207e131a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg goto out; rc = -ENOBUFS; - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { in_dev = inetdev_init(dev); if (!in_dev) goto out; @@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if (colon) *colon = ':'; - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -748,7 +748,7 @@ rarok: static int inet_gifconf(struct net_device *dev, char __user *buf, int len) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -791,7 +791,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; @@ -818,7 +818,7 @@ no_in_dev: read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; for_primary_ifa(in_dev) { @@ -887,7 +887,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop if (dev) { rcu_read_lock(); - if ((in_dev = __in_dev_get(dev))) + if ((in_dev = __in_dev_get_rcu(dev))) addr = confirm_addr_indev(in_dev, dst, local, scope); rcu_read_unlock(); @@ -897,7 +897,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev))) { + if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; @@ -957,7 +957,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1078,7 +1078,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) s_ip_idx = 0; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); continue; } @@ -1149,7 +1149,7 @@ void inet_forward_change(void) for (dev = dev_base; dev; dev = dev->next) { struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) in_dev->cnf.forwarding = on; rcu_read_unlock(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e1379f71269..e61bc7177eb1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, no_addr = rpf = 0; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); @@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d41219e8037c..186f20c4a45e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, rta->rta_oif = &dev->ifindex; if (colon) { struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (!in_dev) return -ENODEV; *colon = ':'; @@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev) } if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) continue; - if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) + if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; spin_lock_bh(&fib_multipath_lock); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 70c44e4c3ceb..8b6d3939e1e6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) } if (dev) { imr->imr_ifindex = dev->ifindex; - idev = __in_dev_get(dev); + idev = __in_dev_get_rtnl(dev); } return idev; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f0d5740d7e22..896ce3f8f53a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev) return -EADDRNOTAVAIL; dev = rt->u.dst.dev; ip_rt_put(rt); - if (__in_dev_get(dev) == NULL) + if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; - ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr); + ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); } return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9dbf5909f3a6..302b7eb507c9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) goto failure; in_dev->cnf.rp_filter = 0; @@ -278,7 +278,7 @@ static int vif_delete(int vifi) dev_set_allmulti(dev, -1); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { in_dev->cnf.mc_forwarding--; ip_rt_multicast_event(in_dev); } @@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return -EINVAL; } - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; in_dev->cnf.mc_forwarding++; dev_set_allmulti(dev, +1); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 577bac22dcc6..186646eb249f 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb, goto out; rcu_read_lock(); - in_dev = __in_dev_get(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->u.dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 715cb613405c..5245bfd33d52 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get((*pskb)->dev); + indev = __in_dev_get_rcu((*pskb)->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8549f26e2495..381dd6a6aebb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, struct in_device *in_dev; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { int our = ip_check_mc(in_dev, daddr, saddr, skb->nh.iph->protocol); if (our @@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) err = -ENODEV; if (dev_out == NULL) goto out; - if (__in_dev_get(dev_out) == NULL) { + + /* RACE: Check return value of inet_select_addr instead. */ + if (__in_dev_get_rtnl(dev_out) == NULL) { dev_put(dev_out); goto out; /* Wrong error code */ } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4e509e52fbc1..a970b4727ce8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1806,7 +1806,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } for (dev = dev_base; dev != NULL; dev = dev->next) { - struct in_device * in_dev = __in_dev_get(dev); + struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 071cd2cefd8a..953e255d2bc8 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -310,7 +310,7 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) #ifdef CONFIG_INET IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n"); rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) goto out; if (in_dev->ifa_list) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e7025be77691..f01d1c9002a1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -147,7 +147,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, struct sctp_sockaddr_entry *addr; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); return; } -- cgit v1.2.3 From 3e56a40bb36f1f73b4eac2ffe267c5357811e321 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 14:36:32 -0700 Subject: [IPV4]: Get rid of bogus __in_put_dev in pktgen This patch gets rid of a bogus __in_dev_put() in pktgen.c. This was spotted by Suzanne Wood. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/pktgen.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 44de070b6045..5f043d346694 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1673,7 +1673,6 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; pkt_dev->saddr_max = pkt_dev->saddr_min; } - __in_dev_put(in_dev); } rcu_read_unlock(); } -- cgit v1.2.3 From 7ce312467edc270fcbd8a699efabb37ce1802b98 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Oct 2005 16:07:30 -0700 Subject: [IPV4]: Update icmp sysctl docs and disable broadcast ECHO/TIMESTAMP by default It's not a good idea to be smurf'able by default. The few people who need this can turn it on. Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 10 +++++++--- net/ipv4/icmp.c | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab65714d95fc..b433c8a27e2d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -355,10 +355,14 @@ ip_dynaddr - BOOLEAN Default: 0 icmp_echo_ignore_all - BOOLEAN + If set non-zero, then the kernel will ignore all ICMP ECHO + requests sent to it. + Default: 0 + icmp_echo_ignore_broadcasts - BOOLEAN - If either is set to true, then the kernel will ignore either all - ICMP ECHO requests sent to it or just those to broadcast/multicast - addresses, respectively. + If set non-zero, then the kernel will ignore all ICMP ECHO and + TIMESTAMP requests sent to it via broadcast/multicast. + Default: 1 icmp_ratelimit - INTEGER Limit the maximal rates for sending ICMP packets whose type matches diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 24eb56ae1b5a..90dca711ac9f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -188,7 +188,7 @@ struct icmp_err icmp_err_convert[] = { /* Control parameters for ECHO replies. */ int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts; +int sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ int sysctl_icmp_ignore_bogus_error_responses; -- cgit v1.2.3