From 9bbc768aa911a3ef336272eaa6d220abfba8ce50 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 23 Mar 2010 04:07:29 +0100 Subject: netfilter: ipv4: use NFPROTO values for NF_HOOK invocation The semantic patch that was used: // @@ @@ (NF_HOOK |NF_HOOK_COND |nf_hook )( -PF_INET, +NFPROTO_IPV4, ...) // Signed-off-by: Jan Engelhardt --- net/ipv4/ip_output.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3451799e3dbf..f09135e1e14f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -95,8 +95,8 @@ int __ip_local_out(struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); - return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, + skb_dst(skb)->dev, dst_output); } int ip_local_out(struct sk_buff *skb) @@ -271,8 +271,8 @@ int ip_mc_output(struct sk_buff *skb) ) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) - NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, - NULL, newskb->dev, + NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, + newskb, NULL, newskb->dev, ip_dev_loopback_xmit); } @@ -287,12 +287,12 @@ int ip_mc_output(struct sk_buff *skb) if (rt->rt_flags&RTCF_BROADCAST) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) - NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL, - newskb->dev, ip_dev_loopback_xmit); + NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, + NULL, newskb->dev, ip_dev_loopback_xmit); } - return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev, - ip_finish_output, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, + skb->dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -305,7 +305,7 @@ int ip_output(struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } -- cgit v1.2.3 From 4e15ed4d930297c127d280ca1d0c785be870def4 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Thu, 15 Apr 2010 16:43:08 +0000 Subject: net: replace ipfragok with skb->local_df As Herbert Xu said: we should be able to simply replace ipfragok with skb->local_df. commit f88037(sctp: Drop ipfargok in sctp_xmit function) has droped ipfragok and set local_df value properly. The patch kills the ipfragok parameter of .queue_xmit(). Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 2 +- include/net/inet_connection_sock.h | 2 +- include/net/ip.h | 2 +- include/net/ipv6.h | 3 +-- net/dccp/ipv6.c | 4 ++-- net/dccp/output.c | 2 +- net/ipv4/ip_output.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- net/ipv6/inet6_connection_sock.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/l2tp/l2tp_core.c | 3 ++- net/l2tp/l2tp_ip.c | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- 15 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index f13ddc2543b1..aae08f686633 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -38,5 +38,5 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); -extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok); +extern int inet6_csk_xmit(struct sk_buff *skb); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 52c8b8b8a0b9..b6d3b55da19b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -36,7 +36,7 @@ struct tcp_congestion_ops; * (i.e. things that depend on the address family) */ struct inet_connection_sock_af_ops { - int (*queue_xmit)(struct sk_buff *skb, int ipfragok); + int (*queue_xmit)(struct sk_buff *skb); void (*send_check)(struct sock *sk, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); int (*conn_request)(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/ip.h b/include/net/ip.h index 503994a38ed1..a84ceb692687 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -101,7 +101,7 @@ extern int ip_do_nat(struct sk_buff *skb); extern void ip_send_check(struct iphdr *ip); extern int __ip_local_out(struct sk_buff *skb); extern int ip_local_out(struct sk_buff *skb); -extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok); +extern int ip_queue_xmit(struct sk_buff *skb); extern void ip_init(void); extern int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 033ddd4652a5..b1d8db90b214 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -482,8 +482,7 @@ extern int ip6_rcv_finish(struct sk_buff *skb); extern int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, - struct ipv6_txoptions *opt, - int ipfragok); + struct ipv6_txoptions *opt); extern int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ab1ab95946df..091698899594 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -292,7 +292,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, &ireq6->loc_addr, &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt, 0); + err = ip6_xmit(sk, skb, &fl, opt); err = net_xmit_eval(err); } @@ -347,7 +347,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl, NULL, 0); + ip6_xmit(ctl_sk, skb, &fl, NULL); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); return; diff --git a/net/dccp/output.c b/net/dccp/output.c index b8d98e3c052a..e98b65e9569f 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -136,7 +136,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb); return net_xmit_eval(err); } return -ENOBUFS; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65f18e0936e..512af81b750f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -311,7 +311,7 @@ int ip_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_queue_xmit(struct sk_buff *skb, int ipfragok) +int ip_queue_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); @@ -370,7 +370,7 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) + if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e46849989a53..2b7d71fb8439 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -890,7 +890,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb); if (likely(err <= 0)) return err; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 628db24bcf22..0c5e3c3b7fd5 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -178,7 +178,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) +int inet6_csk_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); @@ -234,7 +234,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) /* Restore final destination back after routing done */ ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - return ip6_xmit(sk, skb, &fl, np->opt, 0); + return ip6_xmit(sk, skb, &fl, np->opt); } EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f3a847e3ec88..141819f0c6f1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -185,7 +185,7 @@ int ip6_output(struct sk_buff *skb) */ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, - struct ipv6_txoptions *opt, int ipfragok) + struct ipv6_txoptions *opt) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b429dfdd69dc..bd5ef7b6e48e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -509,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt, 0); + err = ip6_xmit(sk, skb, &fl, opt); err = net_xmit_eval(err); } @@ -1071,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl, NULL, 0); + ip6_xmit(ctl_sk, buff, &fl, NULL); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 98dfcce1a5fc..ecc7aea9efe4 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -954,7 +954,8 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat } /* Queue the packet to IP for output */ - error = ip_queue_xmit(skb, 1); + skb->local_df = 1; + error = ip_queue_xmit(skb); /* Update stats */ if (error >= 0) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 75bf784ba18d..0852512d392c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -501,7 +501,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m skb_dst_set(skb, dst_clone(&rt->u.dst)); /* Queue the packet to IP for output */ - rc = ip_queue_xmit(skb, 0); + rc = ip_queue_xmit(skb); error: /* Update stats */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 14db5689fb89..732689140fb8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -232,7 +232,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl, np->opt, 0); + return ip6_xmit(sk, skb, &fl, np->opt); } /* Returns the dst cache entry for the given source and destination ip diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a56f98e82f92..704298f4b284 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -854,7 +854,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, IP_PMTUDISC_DO : IP_PMTUDISC_DONT; SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, 0); + return ip_queue_xmit(skb); } static struct sctp_af sctp_af_inet; -- cgit v1.2.3 From e281b19897dc21c1071802808d461627d747a877 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 19 Apr 2010 14:17:47 +0200 Subject: netfilter: xtables: inclusion of xt_TEE xt_TEE can be used to clone and reroute a packet. This can for example be used to copy traffic at a router for logging purposes to another dedicated machine. References: http://www.gossamer-threads.com/lists/iptables/devel/68781 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_TEE.h | 9 ++ net/ipv4/ip_output.c | 1 + net/ipv6/ip6_output.c | 1 + net/netfilter/Kconfig | 7 ++ net/netfilter/Makefile | 1 + net/netfilter/xt_TEE.c | 256 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 276 insertions(+) create mode 100644 include/linux/netfilter/xt_TEE.h create mode 100644 net/netfilter/xt_TEE.c (limited to 'net/ipv4/ip_output.c') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index a5a63e41b8af..48767cd16453 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -16,6 +16,7 @@ header-y += xt_RATEEST.h header-y += xt_SECMARK.h header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h +header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h new file mode 100644 index 000000000000..55d4a5011523 --- /dev/null +++ b/include/linux/netfilter/xt_TEE.h @@ -0,0 +1,9 @@ +#ifndef _XT_TEE_TARGET_H +#define _XT_TEE_TARGET_H + +struct xt_tee_tginfo { + union nf_inet_addr gw; + char oif[16]; +}; + +#endif /* _XT_TEE_TARGET_H */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f09135e1e14f..0abfddec1e26 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -309,6 +309,7 @@ int ip_output(struct sk_buff *skb) ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } +EXPORT_SYMBOL_GPL(ip_output); int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c10a38a71a5e..d09be7ff8735 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -176,6 +176,7 @@ int ip6_output(struct sk_buff *skb) ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +EXPORT_SYMBOL_GPL(ip6_output); /* * xmit an sk_buff (used by TCP) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8055786b7702..673a6c8f0e95 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -502,6 +502,13 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TEE + tristate '"TEE" - packet cloning to alternate destiantion' + depends on NETFILTER_ADVANCED + ---help--- + This option adds a "TEE" target with which a packet can be cloned and + this clone be rerouted to another nexthop. + config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target support (EXPERIMENTAL)' depends on EXPERIMENTAL diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index cd31afe0692a..14e3a8fd8180 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o # matches diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c new file mode 100644 index 000000000000..b3d730163f12 --- /dev/null +++ b/net/netfilter/xt_TEE.c @@ -0,0 +1,256 @@ +/* + * "TEE" target extension for Xtables + * Copyright © Sebastian Claßen, 2007 + * Jan Engelhardt, 2007-2010 + * + * based on ipt_ROUTE.c from Cédric de Launois + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 or later, as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +# define WITH_CONNTRACK 1 +# include +#endif +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +# define WITH_IPV6 1 +#endif + +static const union nf_inet_addr tee_zero_address; + +static struct net *pick_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS + const struct dst_entry *dst; + + if (skb->dev != NULL) + return dev_net(skb->dev); + dst = skb_dst(skb); + if (dst != NULL && dst->dev != NULL) + return dev_net(dst->dev); +#endif + return &init_net; +} + +static bool tee_tg_route_oif(struct flowi *f, struct net *net, + const struct xt_tee_tginfo *info) +{ + const struct net_device *dev; + + if (*info->oif != '\0') + return true; + dev = dev_get_by_name(net, info->oif); + if (dev == NULL) + return false; + f->oif = dev->ifindex; + return true; +} + +static bool +tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) +{ + const struct iphdr *iph = ip_hdr(skb); + struct net *net = pick_net(skb); + struct rtable *rt; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + if (!tee_tg_route_oif(&fl, net, info)) + return false; + fl.nl_u.ip4_u.daddr = info->gw.ip; + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE; + if (ip_route_output_key(net, &rt, &fl) != 0) + return false; + + dst_release(skb_dst(skb)); + skb_dst_set(skb, &rt->u.dst); + skb->dev = rt->u.dst.dev; + skb->protocol = htons(ETH_P_IP); + return true; +} + +static unsigned int +tee_tg4(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_tee_tginfo *info = par->targinfo; + struct iphdr *iph; + + /* + * Copy the skb, and route the copy. Will later return %XT_CONTINUE for + * the original skb, which should continue on its way as if nothing has + * happened. The copy should be independently delivered to the TEE + * --gateway. + */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return XT_CONTINUE; + +#ifdef WITH_CONNTRACK + /* Avoid counting cloned packets towards the original connection. */ + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + /* + * If we are in PREROUTING/INPUT, the checksum must be recalculated + * since the length could have changed as a result of defragmentation. + * + * We also decrease the TTL to mitigate potential TEE loops + * between two hosts. + * + * Set %IP_DF so that the original source is notified of a potentially + * decreased MTU on the clone route. IPv6 does this too. + */ + iph = ip_hdr(skb); + iph->frag_off |= htons(IP_DF); + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_IN) + --iph->ttl; + ip_send_check(iph); + + /* + * Xtables is not reentrant currently, so a choice has to be made: + * 1. return absolute verdict for the original and let the cloned + * packet travel through the chains + * 2. let the original continue travelling and not pass the clone + * to Xtables. + * #2 is chosen. Normally, we would use ip_local_out for the clone. + * Because iph->check is already correct and we don't pass it to + * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can + * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not + * invoke POSTROUTING on the cloned packet. + */ + IPCB(skb)->flags |= IPSKB_REROUTED; + if (tee_tg_route4(skb, info)) + ip_output(skb); + else + kfree_skb(skb); + + return XT_CONTINUE; +} + +#ifdef WITH_IPV6 +static bool +tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = pick_net(skb); + struct dst_entry *dst; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + if (!tee_tg_route_oif(&fl, net, info)) + return false; + fl.nl_u.ip6_u.daddr = info->gw.in6; + fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; + dst = ip6_route_output(net, NULL, &fl); + if (dst == NULL) + return false; + + dst_release(skb_dst(skb)); + skb_dst_set(skb, dst); + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + return true; +} + +static unsigned int +tee_tg6(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_tee_tginfo *info = par->targinfo; + + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return XT_CONTINUE; + +#ifdef WITH_CONNTRACK + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_IN) { + struct ipv6hdr *iph = ipv6_hdr(skb); + --iph->hop_limit; + } + IP6CB(skb)->flags |= IP6SKB_REROUTED; + if (tee_tg_route6(skb, info)) + ip6_output(skb); + else + kfree_skb(skb); + + return XT_CONTINUE; +} +#endif /* WITH_IPV6 */ + +static int tee_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_tee_tginfo *info = par->targinfo; + + if (info->oif[sizeof(info->oif)-1] != '\0') + return -EINVAL; + /* 0.0.0.0 and :: not allowed */ + return (memcmp(&info->gw, &tee_zero_address, + sizeof(tee_zero_address)) == 0) ? -EINVAL : 0; +} + +static struct xt_target tee_tg_reg[] __read_mostly = { + { + .name = "TEE", + .revision = 1, + .family = NFPROTO_IPV4, + .target = tee_tg4, + .targetsize = sizeof(struct xt_tee_tginfo), + .checkentry = tee_tg_check, + .me = THIS_MODULE, + }, +#ifdef WITH_IPV6 + { + .name = "TEE", + .revision = 1, + .family = NFPROTO_IPV6, + .target = tee_tg6, + .targetsize = sizeof(struct xt_tee_tginfo), + .checkentry = tee_tg_check, + .me = THIS_MODULE, + }, +#endif +}; + +static int __init tee_tg_init(void) +{ + return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); +} + +static void __exit tee_tg_exit(void) +{ + xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); +} + +module_init(tee_tg_init); +module_exit(tee_tg_exit); +MODULE_AUTHOR("Sebastian Claßen "); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_DESCRIPTION("Xtables: Reroute packet copy"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_TEE"); +MODULE_ALIAS("ip6t_TEE"); -- cgit v1.2.3 From cd58bcd9787ef4c16ab6e442c4f1bf3539b3ab39 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 19 Apr 2010 16:06:52 +0200 Subject: netfilter: xt_TEE: have cloned packet travel through Xtables too Since Xtables is now reentrant/nestable, the cloned packet can also go through Xtables and be subject to rules itself. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - net/netfilter/xt_TEE.c | 40 ++++++++++++++++++---------------------- 3 files changed, 18 insertions(+), 24 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0abfddec1e26..f09135e1e14f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -309,7 +309,6 @@ int ip_output(struct sk_buff *skb) ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } -EXPORT_SYMBOL_GPL(ip_output); int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d09be7ff8735..c10a38a71a5e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -176,7 +176,6 @@ int ip6_output(struct sk_buff *skb) ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -EXPORT_SYMBOL_GPL(ip6_output); /* * xmit an sk_buff (used by TCP) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index b3d730163f12..842e7012eca7 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #endif static const union nf_inet_addr tee_zero_address; +static DEFINE_PER_CPU(bool, tee_active); static struct net *pick_net(struct sk_buff *skb) { @@ -91,6 +93,8 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par) const struct xt_tee_tginfo *info = par->targinfo; struct iphdr *iph; + if (percpu_read(tee_active)) + return XT_CONTINUE; /* * Copy the skb, and route the copy. Will later return %XT_CONTINUE for * the original skb, which should continue on its way as if nothing has @@ -125,24 +129,13 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par) --iph->ttl; ip_send_check(iph); - /* - * Xtables is not reentrant currently, so a choice has to be made: - * 1. return absolute verdict for the original and let the cloned - * packet travel through the chains - * 2. let the original continue travelling and not pass the clone - * to Xtables. - * #2 is chosen. Normally, we would use ip_local_out for the clone. - * Because iph->check is already correct and we don't pass it to - * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can - * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not - * invoke POSTROUTING on the cloned packet. - */ - IPCB(skb)->flags |= IPSKB_REROUTED; - if (tee_tg_route4(skb, info)) - ip_output(skb); - else + if (tee_tg_route4(skb, info)) { + percpu_write(tee_active, true); + ip_local_out(skb); + percpu_write(tee_active, false); + } else { kfree_skb(skb); - + } return XT_CONTINUE; } @@ -177,6 +170,8 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par) { const struct xt_tee_tginfo *info = par->targinfo; + if (percpu_read(tee_active)) + return XT_CONTINUE; skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) return XT_CONTINUE; @@ -192,12 +187,13 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par) struct ipv6hdr *iph = ipv6_hdr(skb); --iph->hop_limit; } - IP6CB(skb)->flags |= IP6SKB_REROUTED; - if (tee_tg_route6(skb, info)) - ip6_output(skb); - else + if (tee_tg_route6(skb, info)) { + percpu_write(tee_active, true); + ip6_local_out(skb); + percpu_write(tee_active, false); + } else { kfree_skb(skb); - + } return XT_CONTINUE; } #endif /* WITH_IPV6 */ -- cgit v1.2.3 From 6c79bf0f2440fd250c8fce8d9b82fcf03d4e8350 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Tue, 20 Apr 2010 16:22:01 +0200 Subject: netfilter: bridge-netfilter: fix refragmenting IP traffic encapsulated in PPPoE traffic The MTU for IP traffic encapsulated inside PPPoE traffic is smaller than the MTU of the Ethernet device (1500). Connection tracking gathers all IP packets and sometimes will refragment them in ip_fragment(). We then need to subtract the length of the encapsulating header from the mtu used in ip_fragment(). The check in br_nf_dev_queue_xmit() which determines if ip_fragment() has to be called is also updated for the PPPoE-encapsulated packets. nf_bridge_copy_header() is also updated to make sure the PPPoE data length field has the correct value. Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge.h | 7 +++++++ net/bridge/br_netfilter.c | 2 +- net/ipv4/ip_output.c | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net/ipv4/ip_output.c') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ea0e44b90432..0ddd161f3b06 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -68,6 +68,13 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } +static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) + return PPPOE_SES_HLEN; + return 0; +} + extern int br_handle_frame_finish(struct sk_buff *skb); /* Only used in br_device.c */ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6b80ebc37667..93f80fefa496 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -745,7 +745,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && - skb->len > skb->dev->mtu && + skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b0b2e3059f11..d979710684b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -469,6 +469,10 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) hlen = iph->ihl * 4; mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) + mtu -= nf_bridge_mtu_reduction(skb); +#endif IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: -- cgit v1.2.3 From ab6e3feba1f1bc3b9418b854da6f481408d243de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 May 2010 11:31:49 +0000 Subject: net: No dst refcounting in ip_queue_xmit() TCP outgoing packets can avoid two atomic ops, and dirtying of previously higly contended cache line using new refdst infrastructure. Note 1: loopback device excluded because of !IFF_XMIT_DST_RELEASE Note 2: UDP packets dsts are built before ip_queue_xmit(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 252897443ef9..9a4a6c96cb0d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -318,10 +318,12 @@ int ip_queue_xmit(struct sk_buff *skb) struct ip_options *opt = inet->opt; struct rtable *rt; struct iphdr *iph; + int res; /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. */ + rcu_read_lock(); rt = skb_rtable(skb); if (rt != NULL) goto packet_routed; @@ -359,7 +361,7 @@ int ip_queue_xmit(struct sk_buff *skb) } sk_setup_caps(sk, &rt->u.dst); } - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set_noref(skb, &rt->u.dst); packet_routed: if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) @@ -391,9 +393,12 @@ packet_routed: skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - return ip_local_out(skb); + res = ip_local_out(skb); + rcu_read_unlock(); + return res; no_route: + rcu_read_unlock(); IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EHOSTUNREACH; -- cgit v1.2.3