summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-06-11 20:00:44 -0700
committerDavid S. Miller <davem@davemloft.net>2009-06-11 20:00:44 -0700
commitadf76cfe24dab32a54e2dd1f51534cea8277f32a (patch)
tree6935c74a4b7237bd5f95918b3145ac57e0769fca /net
parent17d0cdfa8f3c09a110061c67421d662b3e149d0a (diff)
parent24992eacd8a9f4af286bdaaab627b6802ceb8bce (diff)
downloadlwn-adf76cfe24dab32a54e2dd1f51534cea8277f32a.tar.gz
lwn-adf76cfe24dab32a54e2dd1f51534cea8277f32a.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/ebtables.c18
-rw-r--r--net/ipv4/netfilter/arp_tables.c109
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c172
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c17
-rw-r--r--net/ipv6/netfilter/ip6_queue.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c170
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c17
-rw-r--r--net/netfilter/Kconfig13
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/nf_conntrack_core.c30
-rw-r--r--net/netfilter/nf_conntrack_ecache.c83
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c263
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c31
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c27
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c140
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nfnetlink.c28
-rw-r--r--net/netfilter/x_tables.c42
-rw-r--r--net/netfilter/xt_NFQUEUE.c93
-rw-r--r--net/netfilter/xt_osf.c428
-rw-r--r--net/netfilter/xt_socket.c63
25 files changed, 1189 insertions, 581 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 820252aee81f..37928d5f2840 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -142,6 +142,12 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
return 0;
}
+static inline __pure
+struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
/* Do some firewalling */
unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
@@ -164,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
mtpar.in = tgpar.in = in;
mtpar.out = tgpar.out = out;
mtpar.hotdrop = &hotdrop;
- tgpar.hooknum = hook;
+ mtpar.hooknum = tgpar.hooknum = hook;
read_lock_bh(&table->lock);
private = table->private;
@@ -249,8 +255,7 @@ letsreturn:
/* jump to a udc */
cs[sp].n = i + 1;
cs[sp].chaininfo = chaininfo;
- cs[sp].e = (struct ebt_entry *)
- (((char *)point) + point->next_offset);
+ cs[sp].e = ebt_next_entry(point);
i = 0;
chaininfo = (struct ebt_entries *) (base + verdict);
#ifdef CONFIG_NETFILTER_DEBUG
@@ -266,8 +271,7 @@ letsreturn:
sp++;
continue;
letscontinue:
- point = (struct ebt_entry *)
- (((char *)point) + point->next_offset);
+ point = ebt_next_entry(point);
i++;
}
@@ -787,7 +791,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
/* this can't be 0, so the loop test is correct */
cl_s[i].cs.n = pos + 1;
pos = 0;
- cl_s[i].cs.e = ((void *)e + e->next_offset);
+ cl_s[i].cs.e = ebt_next_entry(e);
e = (struct ebt_entry *)(hlp2->data);
nentries = hlp2->nentries;
cl_s[i].from = chain_nr;
@@ -797,7 +801,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
continue;
}
letscontinue:
- e = (void *)e + e->next_offset;
+ e = ebt_next_entry(e);
pos++;
}
return 0;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 831fe1879dc0..7505dff4ffdf 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -231,6 +231,12 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset);
}
+static inline __pure
+struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
@@ -267,67 +273,64 @@ unsigned int arpt_do_table(struct sk_buff *skb,
arp = arp_hdr(skb);
do {
- if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
- struct arpt_entry_target *t;
- int hdr_len;
-
- hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
- (2 * skb->dev->addr_len);
+ struct arpt_entry_target *t;
+ int hdr_len;
- ADD_COUNTER(e->counters, hdr_len, 1);
+ if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+ e = arpt_next_entry(e);
+ continue;
+ }
- t = arpt_get_target(e);
+ hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
+ (2 * skb->dev->addr_len);
+ ADD_COUNTER(e->counters, hdr_len, 1);
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
+ t = arpt_get_target(e);
- v = ((struct arpt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != ARPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v
- != (void *)e + e->next_offset) {
- /* Save old back ptr in next entry */
- struct arpt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom =
- (void *)back - table_base;
-
- /* set back pointer to next entry */
- back = next;
- }
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- * abs. verdicts
- */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
-
- /* Target might have changed stuff. */
- arp = arp_hdr(skb);
-
- if (verdict == ARPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
+ v = ((struct arpt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != ARPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
break;
+ }
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
}
- } else {
- e = (void *)e + e->next_offset;
+ if (table_base + v
+ != arpt_next_entry(e)) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next = arpt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
}
+
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
+ verdict = t->u.kernel.target->target(skb, &tgpar);
+
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
+
+ if (verdict == ARPT_CONTINUE)
+ e = arpt_next_entry(e);
+ else
+ /* Verdict */
+ break;
} while (!hotdrop);
xt_info_rdunlock_bh();
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5f22c91c6e15..c156db215987 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -596,7 +596,7 @@ static int __init ip_queue_init(void)
#ifdef CONFIG_SYSCTL
ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
#endif
- status = nf_register_queue_handler(PF_INET, &nfqh);
+ status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
if (status < 0) {
printk(KERN_ERR "ip_queue: failed to register queue handler\n");
goto cleanup_sysctl;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2ec8d7290c40..fdefae6b5dfc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -238,8 +238,8 @@ static struct nf_loginfo trace_loginfo = {
/* Mildly perf critical (only if packet tracing is on) */
static inline int
get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
- char *hookname, char **chainname,
- char **comment, unsigned int *rulenum)
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
{
struct ipt_standard_target *t = (void *)ipt_get_target(s);
@@ -257,8 +257,8 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
&& unconditional(&s->ip)) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
- ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
- : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
+ ? comments[NF_IP_TRACE_COMMENT_POLICY]
+ : comments[NF_IP_TRACE_COMMENT_RETURN];
}
return 1;
} else
@@ -277,14 +277,14 @@ static void trace_packet(struct sk_buff *skb,
{
void *table_base;
const struct ipt_entry *root;
- char *hookname, *chainname, *comment;
+ const char *hookname, *chainname, *comment;
unsigned int rulenum = 0;
- table_base = (void *)private->entries[smp_processor_id()];
+ table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
- hookname = chainname = (char *)hooknames[hook];
- comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP_TRACE_COMMENT_RULE];
IPT_ENTRY_ITERATE(root,
private->size - private->hook_entry[hook],
@@ -297,6 +297,12 @@ static void trace_packet(struct sk_buff *skb,
}
#endif
+static inline __pure
+struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff *skb,
@@ -305,6 +311,8 @@ ipt_do_table(struct sk_buff *skb,
const struct net_device *out,
struct xt_table *table)
{
+#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
+
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
u_int16_t datalen;
@@ -335,7 +343,7 @@ ipt_do_table(struct sk_buff *skb,
mtpar.in = tgpar.in = in;
mtpar.out = tgpar.out = out;
mtpar.family = tgpar.family = NFPROTO_IPV4;
- tgpar.hooknum = hook;
+ mtpar.hooknum = tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
xt_info_rdlock_bh();
@@ -348,92 +356,84 @@ ipt_do_table(struct sk_buff *skb,
back = get_entry(table_base, private->underflow[hook]);
do {
+ struct ipt_entry_target *t;
+
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- if (ip_packet_match(ip, indev, outdev,
- &e->ip, mtpar.fragoff)) {
- struct ipt_entry_target *t;
-
- if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
- goto no_match;
+ if (!ip_packet_match(ip, indev, outdev,
+ &e->ip, mtpar.fragoff) ||
+ IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+ e = ipt_next_entry(e);
+ continue;
+ }
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
- t = ipt_get_target(e);
- IP_NF_ASSERT(t->u.kernel.target);
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- /* The packet is traced: log it */
- if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, in, out,
- table->name, private, e);
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
#endif
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
-
- v = ((struct ipt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != IPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v != (void *)e + e->next_offset
- && !(e->ip.flags & IPT_F_GOTO)) {
- /* Save old back ptr in next entry */
- struct ipt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom
- = (void *)back - table_base;
- /* set back pointer to next entry */
- back = next;
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ipt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
}
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
+ }
+ if (table_base + v != ipt_next_entry(e)
+ && !(e->ip.flags & IPT_F_GOTO)) {
+ /* Save old back ptr in next entry */
+ struct ipt_entry *next = ipt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
+ }
+
+ /* Targets which reenter must return
+ abs. verdicts */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
+
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- abs. verdicts */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ipt_entry *)table_base)->comefrom
- = 0xeeeeeeec;
+ tb_comefrom = 0xeeeeeeec;
#endif
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
+ verdict = t->u.kernel.target->target(skb, &tgpar);
#ifdef CONFIG_NETFILTER_DEBUG
- if (((struct ipt_entry *)table_base)->comefrom
- != 0xeeeeeeec
- && verdict == IPT_CONTINUE) {
- printk("Target %s reentered!\n",
- t->u.kernel.target->name);
- verdict = NF_DROP;
- }
- ((struct ipt_entry *)table_base)->comefrom
- = 0x57acc001;
+ if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
+ }
+ tb_comefrom = 0x57acc001;
#endif
- /* Target might have changed stuff. */
- ip = ip_hdr(skb);
- datalen = skb->len - ip->ihl * 4;
-
- if (verdict == IPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
- break;
- }
- } else {
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
- no_match:
- e = (void *)e + e->next_offset;
- }
+ if (verdict == IPT_CONTINUE)
+ e = ipt_next_entry(e);
+ else
+ /* Verdict */
+ break;
} while (!hotdrop);
xt_info_rdunlock_bh();
@@ -444,6 +444,8 @@ ipt_do_table(struct sk_buff *skb,
return NF_DROP;
else return verdict;
#endif
+
+#undef tb_comefrom
}
/* Figures out from what hook each rule can be called: returns 0 if
@@ -2158,7 +2160,7 @@ static bool icmp_checkentry(const struct xt_mtchk_param *par)
static struct xt_target ipt_standard_target __read_mostly = {
.name = IPT_STANDARD_TARGET,
.targetsize = sizeof(int),
- .family = AF_INET,
+ .family = NFPROTO_IPV4,
#ifdef CONFIG_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = compat_standard_from_user,
@@ -2170,7 +2172,7 @@ static struct xt_target ipt_error_target __read_mostly = {
.name = IPT_ERROR_TARGET,
.target = ipt_error,
.targetsize = IPT_FUNCTION_MAXNAMELEN,
- .family = AF_INET,
+ .family = NFPROTO_IPV4,
};
static struct nf_sockopt_ops ipt_sockopts = {
@@ -2196,17 +2198,17 @@ static struct xt_match icmp_matchstruct __read_mostly = {
.matchsize = sizeof(struct ipt_icmp),
.checkentry = icmp_checkentry,
.proto = IPPROTO_ICMP,
- .family = AF_INET,
+ .family = NFPROTO_IPV4,
};
static int __net_init ip_tables_net_init(struct net *net)
{
- return xt_proto_init(net, AF_INET);
+ return xt_proto_init(net, NFPROTO_IPV4);
}
static void __net_exit ip_tables_net_exit(struct net *net)
{
- xt_proto_fini(net, AF_INET);
+ xt_proto_fini(net, NFPROTO_IPV4);
}
static struct pernet_operations ip_tables_net_ops = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index c0992c75bdac..dada0863946d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -27,9 +27,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
-/* Lock protects masq region inside conntrack */
-static DEFINE_RWLOCK(masq_lock);
-
/* FIXME: Multiple targets. --RR */
static bool masquerade_tg_check(const struct xt_tgchk_param *par)
{
@@ -79,9 +76,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
return NF_DROP;
}
- write_lock_bh(&masq_lock);
nat->masq_index = par->out->ifindex;
- write_unlock_bh(&masq_lock);
/* Transfer from original range. */
newrange = ((struct nf_nat_range)
@@ -97,16 +92,11 @@ static int
device_cmp(struct nf_conn *i, void *ifindex)
{
const struct nf_conn_nat *nat = nfct_nat(i);
- int ret;
if (!nat)
return 0;
- read_lock_bh(&masq_lock);
- ret = (nat->masq_index == (int)(long)ifindex);
- read_unlock_bh(&masq_lock);
-
- return ret;
+ return nat->masq_index == (int)(long)ifindex;
}
static int masq_device_event(struct notifier_block *this,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 23b2c2ee869a..d71ba7677344 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -82,18 +82,10 @@ static int icmp_packet(struct nf_conn *ct,
u_int8_t pf,
unsigned int hooknum)
{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count))
- nf_ct_kill_acct(ct, ctinfo, skb);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
- nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
- }
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
return NF_ACCEPT;
}
@@ -117,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
return false;
}
- atomic_set(&ct->proto.icmp.count, 0);
return true;
}
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index b693f841aeb4..1cf3f0c6a959 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -598,7 +598,7 @@ static int __init ip6_queue_init(void)
#ifdef CONFIG_SYSCTL
ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
#endif
- status = nf_register_queue_handler(PF_INET6, &nfqh);
+ status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
if (status < 0) {
printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
goto cleanup_sysctl;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 219e165aea10..ced1f2c0cb65 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -270,8 +270,8 @@ static struct nf_loginfo trace_loginfo = {
/* Mildly perf critical (only if packet tracing is on) */
static inline int
get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
- char *hookname, char **chainname,
- char **comment, unsigned int *rulenum)
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
{
struct ip6t_standard_target *t = (void *)ip6t_get_target(s);
@@ -289,8 +289,8 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
&& unconditional(&s->ipv6)) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
- ? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY]
- : (char *)comments[NF_IP6_TRACE_COMMENT_RETURN];
+ ? comments[NF_IP6_TRACE_COMMENT_POLICY]
+ : comments[NF_IP6_TRACE_COMMENT_RETURN];
}
return 1;
} else
@@ -309,14 +309,14 @@ static void trace_packet(struct sk_buff *skb,
{
void *table_base;
const struct ip6t_entry *root;
- char *hookname, *chainname, *comment;
+ const char *hookname, *chainname, *comment;
unsigned int rulenum = 0;
- table_base = (void *)private->entries[smp_processor_id()];
+ table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
- hookname = chainname = (char *)hooknames[hook];
- comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE];
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP6_TRACE_COMMENT_RULE];
IP6T_ENTRY_ITERATE(root,
private->size - private->hook_entry[hook],
@@ -329,6 +329,12 @@ static void trace_packet(struct sk_buff *skb,
}
#endif
+static inline __pure struct ip6t_entry *
+ip6t_next_entry(const struct ip6t_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ip6t_do_table(struct sk_buff *skb,
@@ -337,6 +343,8 @@ ip6t_do_table(struct sk_buff *skb,
const struct net_device *out,
struct xt_table *table)
{
+#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
+
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -361,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
mtpar.in = tgpar.in = in;
mtpar.out = tgpar.out = out;
mtpar.family = tgpar.family = NFPROTO_IPV6;
- tgpar.hooknum = hook;
+ mtpar.hooknum = tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -375,96 +383,86 @@ ip6t_do_table(struct sk_buff *skb,
back = get_entry(table_base, private->underflow[hook]);
do {
+ struct ip6t_entry_target *t;
+
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
- &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
- struct ip6t_entry_target *t;
-
- if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
- goto no_match;
+ if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
+ &mtpar.thoff, &mtpar.fragoff, &hotdrop) ||
+ IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+ e = ip6t_next_entry(e);
+ continue;
+ }
- ADD_COUNTER(e->counters,
- ntohs(ipv6_hdr(skb)->payload_len) +
- sizeof(struct ipv6hdr), 1);
+ ADD_COUNTER(e->counters,
+ ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr), 1);
- t = ip6t_get_target(e);
- IP_NF_ASSERT(t->u.kernel.target);
+ t = ip6t_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- /* The packet is traced: log it */
- if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, in, out,
- table->name, private, e);
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
#endif
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
-
- v = ((struct ip6t_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != IP6T_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v != (void *)e + e->next_offset
- && !(e->ipv6.flags & IP6T_F_GOTO)) {
- /* Save old back ptr in next entry */
- struct ip6t_entry *next
- = (void *)e + e->next_offset;
- next->comefrom
- = (void *)back - table_base;
- /* set back pointer to next entry */
- back = next;
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ip6t_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IP6T_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
}
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
+ }
+ if (table_base + v != ip6t_next_entry(e)
+ && !(e->ipv6.flags & IP6T_F_GOTO)) {
+ /* Save old back ptr in next entry */
+ struct ip6t_entry *next = ip6t_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- abs. verdicts */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
+ e = get_entry(table_base, v);
+ continue;
+ }
-#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ip6t_entry *)table_base)->comefrom
- = 0xeeeeeeec;
-#endif
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
+ /* Targets which reenter must return
+ abs. verdicts */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
#ifdef CONFIG_NETFILTER_DEBUG
- if (((struct ip6t_entry *)table_base)->comefrom
- != 0xeeeeeeec
- && verdict == IP6T_CONTINUE) {
- printk("Target %s reentered!\n",
- t->u.kernel.target->name);
- verdict = NF_DROP;
- }
- ((struct ip6t_entry *)table_base)->comefrom
- = 0x57acc001;
+ tb_comefrom = 0xeeeeeeec;
#endif
- if (verdict == IP6T_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
- break;
- }
- } else {
+ verdict = t->u.kernel.target->target(skb, &tgpar);
- no_match:
- e = (void *)e + e->next_offset;
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
}
+ tb_comefrom = 0x57acc001;
+#endif
+ if (verdict == IP6T_CONTINUE)
+ e = ip6t_next_entry(e);
+ else
+ /* Verdict */
+ break;
} while (!hotdrop);
#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
+ tb_comefrom = NETFILTER_LINK_POISON;
#endif
xt_info_rdunlock_bh();
@@ -475,6 +473,8 @@ ip6t_do_table(struct sk_buff *skb,
return NF_DROP;
else return verdict;
#endif
+
+#undef tb_comefrom
}
/* Figures out from what hook each rule can be called: returns 0 if
@@ -2191,7 +2191,7 @@ static bool icmp6_checkentry(const struct xt_mtchk_param *par)
static struct xt_target ip6t_standard_target __read_mostly = {
.name = IP6T_STANDARD_TARGET,
.targetsize = sizeof(int),
- .family = AF_INET6,
+ .family = NFPROTO_IPV6,
#ifdef CONFIG_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = compat_standard_from_user,
@@ -2203,7 +2203,7 @@ static struct xt_target ip6t_error_target __read_mostly = {
.name = IP6T_ERROR_TARGET,
.target = ip6t_error,
.targetsize = IP6T_FUNCTION_MAXNAMELEN,
- .family = AF_INET6,
+ .family = NFPROTO_IPV6,
};
static struct nf_sockopt_ops ip6t_sockopts = {
@@ -2229,17 +2229,17 @@ static struct xt_match icmp6_matchstruct __read_mostly = {
.matchsize = sizeof(struct ip6t_icmp),
.checkentry = icmp6_checkentry,
.proto = IPPROTO_ICMPV6,
- .family = AF_INET6,
+ .family = NFPROTO_IPV6,
};
static int __net_init ip6_tables_net_init(struct net *net)
{
- return xt_proto_init(net, AF_INET6);
+ return xt_proto_init(net, NFPROTO_IPV6);
}
static void __net_exit ip6_tables_net_exit(struct net *net)
{
- xt_proto_fini(net, AF_INET6);
+ xt_proto_fini(net, NFPROTO_IPV6);
}
static struct pernet_operations ip6_tables_net_ops = {
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9903227bf37c..642dcb127bab 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -95,18 +95,10 @@ static int icmpv6_packet(struct nf_conn *ct,
u_int8_t pf,
unsigned int hooknum)
{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count))
- nf_ct_kill_acct(ct, ctinfo, skb);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
- nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
- }
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
return NF_ACCEPT;
}
@@ -132,7 +124,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
type + 128);
return false;
}
- atomic_set(&ct->proto.icmp.count, 0);
return true;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index cb3ad741ebf8..79ba47f042c0 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32
Details and examples are in the kernel module source.
+config NETFILTER_XT_MATCH_OSF
+ tristate '"osf" Passive OS fingerprint match'
+ depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+ help
+ This option selects the Passive OS Fingerprinting match module
+ that allows to passively match the remote operating system by
+ analyzing incoming TCP SYN packets.
+
+ Rules and loading software can be downloaded from
+ http://www.ioremap.net/projects/osf
+
+ To compile it as a module, choose M here. If unsure, say N.
+
endif # NETFILTER_XTABLES
endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6282060fbda9..49f62ee4e9ff 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8020db6274b8..edf95695e0aa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -398,11 +398,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct);
-#ifdef CONFIG_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, ct);
-#endif
+
nf_conntrack_event_cache(master_ct(ct) ?
IPCT_RELATED : IPCT_NEW, ct);
return NF_ACCEPT;
@@ -523,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
return ERR_PTR(-ENOMEM);
}
+ spin_lock_init(&ct->lock);
atomic_set(&ct->ct_general.use, 1);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
@@ -807,8 +804,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- int event = 0;
-
NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
@@ -821,7 +816,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
/* If not in hash table, timer will not be active yet */
if (!nf_ct_is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- event = IPCT_REFRESH;
} else {
unsigned long newtime = jiffies + extra_jiffies;
@@ -832,7 +826,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
&& del_timer(&ct->timeout)) {
ct->timeout.expires = newtime;
add_timer(&ct->timeout);
- event = IPCT_REFRESH;
}
}
@@ -849,10 +842,6 @@ acct:
}
spin_unlock_bh(&nf_conntrack_lock);
-
- /* must be unlocked when calling event cache */
- if (event)
- nf_conntrack_event_cache(event, ct);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
@@ -1001,7 +990,7 @@ struct __nf_ct_flush_report {
int report;
};
-static int kill_all(struct nf_conn *i, void *data)
+static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
@@ -1013,6 +1002,11 @@ static int kill_all(struct nf_conn *i, void *data)
return 1;
}
+static int kill_all(struct nf_conn *i, void *data)
+{
+ return 1;
+}
+
void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
{
if (vmalloced)
@@ -1023,15 +1017,15 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
}
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-void nf_conntrack_flush(struct net *net, u32 pid, int report)
+void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
{
struct __nf_ct_flush_report fr = {
.pid = pid,
.report = report,
};
- nf_ct_iterate_cleanup(net, kill_all, &fr);
+ nf_ct_iterate_cleanup(net, kill_report, &fr);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_flush);
+EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
static void nf_conntrack_cleanup_init_net(void)
{
@@ -1045,7 +1039,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
nf_ct_event_cache_flush(net);
nf_conntrack_ecache_fini(net);
i_see_dead_people:
- nf_conntrack_flush(net, 0, 0);
+ nf_ct_iterate_cleanup(net, kill_all, NULL);
if (atomic_read(&net->ct.count) != 0) {
schedule();
goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index dee4190209cc..5516b3e64b43 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,24 +16,32 @@
#include <linux/stddef.h>
#include <linux/err.h>
#include <linux/percpu.h>
-#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
static inline void
__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
{
+ struct nf_ct_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
&& ecache->events) {
struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
.report = 0
};
- atomic_notifier_call_chain(&nf_conntrack_chain,
- ecache->events,
- &item);
+ notify->fcn(ecache->events, &item);
}
ecache->events = 0;
nf_ct_put(ecache->ct);
ecache->ct = NULL;
+
+out_unlock:
+ rcu_read_unlock();
}
/* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net)
free_percpu(net->ct.ecache);
}
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
{
- return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+ int ret = 0;
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_conntrack_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
{
- return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ BUG_ON(notify != new);
+ rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
{
- return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+ int ret = 0;
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_expect_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
{
- return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ BUG_ON(notify != new);
+ rcu_assign_pointer(nf_expect_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 00fecc385f9b..5509dd1f14cf 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
} else if (oldest != NUM_SEQ_TO_REMEMBER &&
after(nl_seq, info->seq_aft_nl[dir][oldest])) {
info->seq_aft_nl[dir][oldest] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
}
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c523f0b8cee5..4e503ada5728 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,7 +27,6 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
-#include <linux/notifier.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
@@ -144,7 +143,7 @@ nla_put_failure:
}
static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
@@ -346,23 +345,21 @@ nla_put_failure:
return -1;
}
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, int nowait,
- const struct nf_conn *ct)
+ int event, struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- unsigned char *b = skb_tail_pointer(skb);
+ unsigned int flags = pid ? NLM_F_MULTI : 0;
event |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = nf_ct_l3num(ct);
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -370,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
@@ -395,104 +392,81 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
goto nla_put_failure;
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ nlmsg_end(skb, nlh);
return skb->len;
nlmsg_failure:
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_cancel(skb, nlh);
return -1;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-/*
- * The general structure of a ctnetlink event is
- *
- * CTA_TUPLE_ORIG
- * <l3/l4-proto-attributes>
- * CTA_TUPLE_REPLY
- * <l3/l4-proto-attributes>
- * CTA_ID
- * ...
- * CTA_PROTOINFO
- * <l4-proto-attributes>
- * CTA_TUPLE_MASTER
- * <l3/l4-proto-attributes>
- *
- * Therefore the formular is
- *
- * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas)
- * + sizeof(protoinfo_nlas)
- */
-static struct sk_buff *
-ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp)
+static inline size_t
+ctnetlink_proto_size(const struct nf_conn *ct)
{
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
- int len;
-
-#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type))
-
- /* proto independant part */
- len = NLMSG_SPACE(sizeof(struct nfgenmsg))
- + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
- + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
- + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
- + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */
+ size_t len = 0;
+
+ rcu_read_lock();
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ len += l3proto->nla_size;
+
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ len += l4proto->nla_size;
+ rcu_read_unlock();
+
+ return len;
+}
+
+static inline size_t
+ctnetlink_nlmsg_size(const struct nf_conn *ct)
+{
+ return NLMSG_ALIGN(sizeof(struct nfgenmsg))
+ + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+ + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
+ + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
+ + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
#ifdef CONFIG_NF_CT_ACCT
- + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
- + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */
- + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */
+ + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+ + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+ + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
#endif
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */
- + nla_total_size(0) /* CTA_PROTOINFO */
- + nla_total_size(0) /* CTA_HELP */
- + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+ + nla_total_size(0) /* CTA_PROTOINFO */
+ + nla_total_size(0) /* CTA_HELP */
+ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
#ifdef CONFIG_NF_CONNTRACK_SECMARK
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */
#endif
#ifdef CONFIG_NF_NAT_NEEDED
- + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
- + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */
- + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */
- + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */
+ + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
#endif
#ifdef CONFIG_NF_CONNTRACK_MARK
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
- ;
-
-#undef NLA_TYPE_SIZE
-
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
- len += l3proto->nla_size;
-
- l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
- len += l4proto->nla_size;
- rcu_read_unlock();
-
- return alloc_skb(len, gfp);
+ + ctnetlink_proto_size(ct)
+ ;
}
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- struct nf_ct_event *item = (struct nf_ct_event *)ptr;
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
- sk_buff_data_t b;
unsigned int flags = 0, group;
/* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked)
- return NOTIFY_DONE;
+ return 0;
if (events & IPCT_DESTROY) {
type = IPCTNL_MSG_CT_DELETE;
@@ -501,26 +475,25 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
+ } else if (events) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report && !nfnetlink_has_listeners(group))
- return NOTIFY_DONE;
+ return 0;
- skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
- if (!skb)
+ skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
+ if (skb == NULL)
goto errout;
- b = skb->tail;
-
type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = flags;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = nf_ct_l3num(ct);
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -529,14 +502,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
@@ -584,17 +557,18 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
#endif
rcu_read_unlock();
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, item->pid, group, item->report);
- return NOTIFY_DONE;
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+ return 0;
nla_put_failure:
rcu_read_unlock();
+ nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
errout:
nfnetlink_set_err(0, group, -ENOBUFS);
- return NOTIFY_DONE;
+ return 0;
}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -611,7 +585,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
rcu_read_lock();
@@ -637,8 +611,7 @@ restart:
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0) {
+ IPCTNL_MSG_CT_NEW, ct) < 0) {
cb->args[1] = (unsigned long)ct;
goto out;
}
@@ -792,7 +765,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -802,9 +775,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
else {
/* Flush the whole table */
- nf_conntrack_flush(&init_net,
- NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ nf_conntrack_flush_report(&init_net,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
return 0;
}
@@ -847,7 +820,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct sk_buff *skb2 = NULL;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -872,15 +845,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
ct = nf_ct_tuplehash_to_ctrack(h);
err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2) {
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
nf_ct_put(ct);
return -ENOMEM;
}
rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, 1, ct);
+ IPCTNL_MSG_CT_NEW, ct);
rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
@@ -1325,7 +1298,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
{
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -1503,19 +1476,18 @@ nla_put_failure:
static int
ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event,
- int nowait,
- const struct nf_conntrack_expect *exp)
+ int event, const struct nf_conntrack_expect *exp)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned char *b = skb_tail_pointer(skb);
+ unsigned int flags = pid ? NLM_F_MULTI : 0;
event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = exp->tuple.src.l3num;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -1523,49 +1495,46 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ nlmsg_end(skb, nlh);
return skb->len;
nlmsg_failure:
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_cancel(skb, nlh);
return -1;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- struct nf_exp_event *item = (struct nf_exp_event *)ptr;
struct nf_conntrack_expect *exp = item->exp;
struct sk_buff *skb;
unsigned int type;
- sk_buff_data_t b;
int flags = 0;
if (events & IPEXP_NEW) {
type = IPCTNL_MSG_EXP_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report &&
!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
- return NOTIFY_DONE;
+ return 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (skb == NULL)
goto errout;
- b = skb->tail;
-
type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = flags;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = exp->tuple.src.l3num;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -1575,17 +1544,19 @@ static int ctnetlink_expect_event(struct notifier_block *this,
goto nla_put_failure;
rcu_read_unlock();
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
- return NOTIFY_DONE;
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+ item->report, GFP_ATOMIC);
+ return 0;
nla_put_failure:
rcu_read_unlock();
+ nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
errout:
nfnetlink_set_err(0, 0, -ENOBUFS);
- return NOTIFY_DONE;
+ return 0;
}
#endif
static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1600,7 +1571,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = &init_net;
struct nf_conntrack_expect *exp, *last;
- struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
struct hlist_node *n;
u_int8_t l3proto = nfmsg->nfgen_family;
@@ -1617,10 +1588,11 @@ restart:
continue;
cb->args[1] = 0;
}
- if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (ctnetlink_exp_fill_info(skb,
+ NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
- 1, exp) < 0) {
+ exp) < 0) {
if (!atomic_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
@@ -1652,7 +1624,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct sk_buff *skb2;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -1683,14 +1655,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
}
err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2)
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL)
goto out;
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
- nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
- 1, exp);
+ nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
rcu_read_unlock();
if (err <= 0)
goto free;
@@ -1713,7 +1684,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_helper *h;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct hlist_node *n, *next;
u_int8_t u3 = nfmsg->nfgen_family;
unsigned int i;
@@ -1854,7 +1825,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -1891,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+ .fcn = ctnetlink_conntrack_event,
};
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+ .fcn = ctnetlink_expect_event,
};
#endif
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index aee0d6bea309..1b816a2ea813 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -25,8 +25,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_log.h>
-static DEFINE_RWLOCK(dccp_lock);
-
/* Timeouts are based on values from RFC4340:
*
* - REQUEST:
@@ -492,7 +490,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}
- write_lock_bh(&dccp_lock);
+ spin_lock_bh(&ct->lock);
role = ct->proto.dccp.role[dir];
old_state = ct->proto.dccp.state;
@@ -536,13 +534,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_dir = dir;
ct->proto.dccp.last_pkt = type;
- write_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
- write_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
@@ -552,7 +550,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_dir = dir;
ct->proto.dccp.last_pkt = type;
ct->proto.dccp.state = new_state;
- write_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -621,36 +619,39 @@ static int dccp_print_tuple(struct seq_file *s,
ntohs(tuple->dst.u.dccp.port));
}
-static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- const struct nf_conn *ct)
+ struct nf_conn *ct)
{
struct nlattr *nest_parms;
- read_lock_bh(&dccp_lock);
+ spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
+ NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
+ cpu_to_be64(ct->proto.dccp.handshake_seq));
nla_nest_end(skb, nest_parms);
- read_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
return 0;
nla_put_failure:
- read_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
return -1;
}
static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
[CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
[CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
};
static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -674,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
return -EINVAL;
}
- write_lock_bh(&dccp_lock);
+ spin_lock_bh(&ct->lock);
ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
@@ -683,7 +684,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
}
- write_unlock_bh(&dccp_lock);
+ if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
+ ct->proto.dccp.handshake_seq =
+ be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
+ }
+ spin_unlock_bh(&ct->lock);
return 0;
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a6d6ec320fbc..a54a0af0edba 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s,
}
/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
- const struct nf_conn *ct)
+static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
return seq_printf(s, "timeout=%u, stream_timeout=%u ",
(ct->proto.gre.timeout / HZ),
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 101b4ad9e817..c10e6f36e31e 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -25,9 +25,6 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
-/* Protects ct->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
@@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s,
}
/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
enum sctp_conntrack state;
- read_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
state = ct->proto.sctp.state;
- read_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
return seq_printf(s, "%s ", sctp_conntrack_names[state]);
}
@@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct,
}
old_state = new_state = SCTP_CONNTRACK_NONE;
- write_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
@@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct,
if (old_state != new_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
}
- write_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
@@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct,
return NF_ACCEPT;
out_unlock:
- write_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
out:
return -NF_ACCEPT;
}
@@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
#include <linux/netfilter/nfnetlink_conntrack.h>
static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- const struct nf_conn *ct)
+ struct nf_conn *ct)
{
struct nlattr *nest_parms;
- read_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
CTA_PROTOINFO_SCTP_VTAG_REPLY,
ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
- read_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
nla_nest_end(skb, nest_parms);
return 0;
nla_put_failure:
- read_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
return -1;
}
@@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
!tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
return -EINVAL;
- write_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
- write_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
return 0;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 97a6e93d742e..33fc0a443f3d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,9 +29,6 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-/* Protects ct->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -59,7 +56,7 @@ static const char *const tcp_conntrack_names[] = {
"LAST_ACK",
"TIME_WAIT",
"CLOSE",
- "LISTEN"
+ "SYN_SENT2",
};
#define SECS * HZ
@@ -82,6 +79,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
[TCP_CONNTRACK_LAST_ACK] = 30 SECS,
[TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
[TCP_CONNTRACK_CLOSE] = 10 SECS,
+ [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
};
#define sNO TCP_CONNTRACK_NONE
@@ -93,7 +91,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
#define sLA TCP_CONNTRACK_LAST_ACK
#define sTW TCP_CONNTRACK_TIME_WAIT
#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
+#define sS2 TCP_CONNTRACK_SYN_SENT2
#define sIV TCP_CONNTRACK_MAX
#define sIG TCP_CONNTRACK_IGNORE
@@ -123,6 +121,7 @@ enum tcp_bit_set {
*
* NONE: initial state
* SYN_SENT: SYN-only packet seen
+ * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
* SYN_RECV: SYN-ACK packet seen
* ESTABLISHED: ACK packet seen
* FIN_WAIT: FIN packet seen
@@ -131,26 +130,24 @@ enum tcp_bit_set {
* TIME_WAIT: last ACK seen
* CLOSE: closed connection (RST)
*
- * LISTEN state is not used.
- *
* Packets marked as IGNORED (sIG):
* if they may be either invalid or valid
* and the receiver may send back a connection
* closing RST or a SYN/ACK.
*
* Packets marked as INVALID (sIV):
- * if they are invalid
- * or we do not support the request (simultaneous open)
+ * if we regard them as truly invalid packets
*/
static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
/*
* sNO -> sSS Initialize a new connection
* sSS -> sSS Retransmitted SYN
- * sSR -> sIG Late retransmitted SYN?
+ * sS2 -> sS2 Late retransmitted SYN
+ * sSR -> sIG
* sES -> sIG Error: SYNs in window outside the SYN_SENT state
* are errors. Receiver will reply with RST
* and close the connection.
@@ -161,22 +158,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sSS Reopened connection (RFC 1122).
* sCL -> sSS
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
/*
- * A SYN/ACK from the client is always invalid:
- * - either it tries to set up a simultaneous open, which is
- * not supported;
- * - or the firewall has just been inserted between the two hosts
- * during the session set-up. The SYN will be retransmitted
- * by the true client (or it'll time out).
+ * sNO -> sIV Too late and no reason to do anything
+ * sSS -> sIV Client can't send SYN and then SYN/ACK
+ * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
+ * sSR -> sIG
+ * sES -> sIG Error: SYNs in window outside the SYN_SENT state
+ * are errors. Receiver will reply with RST
+ * and close the connection.
+ * Or we are not in sync and hold a dead connection.
+ * sFW -> sIG
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sIG
+ * sCL -> sIG
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sNO -> sIV Too late and no reason to do anything...
* sSS -> sIV Client migth not send FIN in this state:
* we enforce waiting for a SYN/ACK reply first.
+ * sS2 -> sIV
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions, waiting for
@@ -187,11 +192,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
* sNO -> sES Assumed.
* sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
+ * sS2 -> sIV
* sSR -> sES Established state is reached.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
@@ -200,29 +206,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW Retransmitted last ACK. Remain in the same state.
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
/*
* sNO -> sIV Never reached.
- * sSS -> sIV Simultaneous open, not supported
- * sSR -> sIV Simultaneous open, not supported.
- * sES -> sIV Server may not initiate a connection.
+ * sSS -> sS2 Simultaneous open
+ * sS2 -> sS2 Retransmitted simultaneous SYN
+ * sSR -> sIV Invalid SYN packets sent by the server
+ * sES -> sIV
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
* sTW -> sIV Reopened connection, but server may not do it.
* sCL -> sIV
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
/*
* sSS -> sSR Standard open.
+ * sS2 -> sSR Simultaneous open
* sSR -> sSR Retransmitted SYN/ACK.
* sES -> sIG Late retransmitted SYN/ACK?
* sFW -> sIG Might be SYN/ACK answering ignored SYN
@@ -231,10 +239,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sIG
* sCL -> sIG
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sSS -> sIV Server might not send FIN in this state.
+ * sS2 -> sIV
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions.
@@ -243,10 +252,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
/*
* sSS -> sIG Might be a half-open connection.
+ * sS2 -> sIG
* sSR -> sSR Might answer late resent SYN.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
@@ -255,8 +265,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW Retransmitted last ACK.
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
@@ -296,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s,
}
/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
enum tcp_conntrack state;
- read_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
state = ct->proto.tcp.state;
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return seq_printf(s, "%s ", tcp_conntrack_names[state]);
}
@@ -521,13 +531,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
- if (sender->td_end == 0) {
+ if (sender->td_maxwin == 0) {
/*
* Initialize sender data.
*/
- if (tcph->syn && tcph->ack) {
+ if (tcph->syn) {
/*
- * Outgoing SYN-ACK in reply to a SYN.
+ * SYN-ACK in reply to a SYN
+ * or SYN from reply direction in simultaneous open.
*/
sender->td_end =
sender->td_maxend = end;
@@ -543,6 +554,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
&& receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
sender->td_scale =
receiver->td_scale = 0;
+ if (!tcph->ack)
+ /* Simultaneous open */
+ return true;
} else {
/*
* We are in the middle of a connection,
@@ -716,14 +730,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
/*
* We have to worry for the ack in the reply packet only...
*/
if (after(end, ct->proto.tcp.seen[dir].td_end))
ct->proto.tcp.seen[dir].td_end = end;
ct->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -832,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct,
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
BUG_ON(th == NULL);
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
old_state = ct->proto.tcp.state;
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(th);
@@ -862,7 +876,7 @@ static int tcp_packet(struct nf_conn *ct,
&& ct->proto.tcp.last_index == TCP_RST_SET)) {
/* Attempt to reopen a closed/aborted connection.
* Delete this connection and look up again. */
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
/* Only repeat if we can actually remove the timer.
* Destruction may already be in progress in process
@@ -898,7 +912,7 @@ static int tcp_packet(struct nf_conn *ct,
* that the client cannot but retransmit its SYN and
* thus initiate a clean new session.
*/
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: killing out of sync session ");
@@ -911,7 +925,7 @@ static int tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored ");
@@ -920,7 +934,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Invalid packet */
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
@@ -930,7 +944,7 @@ static int tcp_packet(struct nf_conn *ct,
&& (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
&& before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid RST ");
@@ -961,7 +975,7 @@ static int tcp_packet(struct nf_conn *ct,
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
skb, dataoff, th, pf)) {
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
in_window:
@@ -990,9 +1004,8 @@ static int tcp_packet(struct nf_conn *ct,
timeout = nf_ct_tcp_timeout_unacknowledged;
else
timeout = tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -1086,7 +1099,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.tcp.seen[1].td_end = 0;
ct->proto.tcp.seen[1].td_maxend = 0;
- ct->proto.tcp.seen[1].td_maxwin = 1;
+ ct->proto.tcp.seen[1].td_maxwin = 0;
ct->proto.tcp.seen[1].td_scale = 0;
/* tcp_packet will set them */
@@ -1108,12 +1121,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
#include <linux/netfilter/nfnetlink_conntrack.h>
static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- const struct nf_conn *ct)
+ struct nf_conn *ct)
{
struct nlattr *nest_parms;
struct nf_ct_tcp_flags tmp = {};
- read_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -1133,14 +1146,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
tmp.flags = ct->proto.tcp.seen[1].flags;
NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
sizeof(struct nf_ct_tcp_flags), &tmp);
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
nla_nest_end(skb, nest_parms);
return 0;
nla_put_failure:
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return -1;
}
@@ -1171,7 +1184,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
return -EINVAL;
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
@@ -1198,7 +1211,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
ct->proto.tcp.seen[1].td_scale =
nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
}
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return 0;
}
@@ -1328,6 +1341,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
+ .procname = "ip_conntrack_tcp_timeout_syn_sent2",
+ .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
.procname = "ip_conntrack_tcp_timeout_syn_recv",
.data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
.maxlen = sizeof(unsigned int),
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4f2310c93e01..3a6fd77f7761 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -204,10 +204,10 @@ int nf_queue(struct sk_buff *skb,
queuenum);
switch (pf) {
- case AF_INET:
+ case NFPROTO_IPV4:
skb->protocol = htons(ETH_P_IP);
break;
- case AF_INET6:
+ case NFPROTO_IPV6:
skb->protocol = htons(ETH_P_IPV6);
break;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b8ab37ad7ed5..92761a988375 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int nfnetlink_send(struct sk_buff *skb, u32 pid,
+ unsigned group, int echo, gfp_t flags)
{
- return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+ return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_send);
@@ -136,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
/* All the messages must at least contain nfgenmsg */
- if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg)))
return 0;
type = nlh->nlmsg_type;
@@ -160,19 +161,14 @@ replay:
{
int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
- u_int16_t attr_count = ss->cb[cb_id].attr_count;
- struct nlattr *cda[attr_count+1];
-
- if (likely(nlh->nlmsg_len >= min_len)) {
- struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
- int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
- err = nla_parse(cda, attr_count, attr, attrlen,
- ss->cb[cb_id].policy);
- if (err < 0)
- return err;
- } else
- return -EINVAL;
+ struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+ struct nlattr *attr = (void *)nlh + min_len;
+ int attrlen = nlh->nlmsg_len - min_len;
+
+ err = nla_parse(cda, ss->cb[cb_id].attr_count,
+ attr, attrlen, ss->cb[cb_id].policy);
+ if (err < 0)
+ return err;
err = nc->call(nfnl, skb, nlh, cda);
if (err == -EAGAIN)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 150e5cf62f85..46dba5f043d5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -329,6 +329,32 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
}
EXPORT_SYMBOL_GPL(xt_find_revision);
+static char *textify_hooks(char *buf, size_t size, unsigned int mask)
+{
+ static const char *const names[] = {
+ "PREROUTING", "INPUT", "FORWARD",
+ "OUTPUT", "POSTROUTING", "BROUTING",
+ };
+ unsigned int i;
+ char *p = buf;
+ bool np = false;
+ int res;
+
+ *p = '\0';
+ for (i = 0; i < ARRAY_SIZE(names); ++i) {
+ if (!(mask & (1 << i)))
+ continue;
+ res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
+ if (res > 0) {
+ size -= res;
+ p += res;
+ }
+ np = true;
+ }
+
+ return buf;
+}
+
int xt_check_match(struct xt_mtchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -351,9 +377,13 @@ int xt_check_match(struct xt_mtchk_param *par,
return -EINVAL;
}
if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
- printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
+ char used[64], allow[64];
+
+ printk("%s_tables: %s match: used from hooks %s, but only "
+ "valid from %s\n",
xt_prefix[par->family], par->match->name,
- par->hook_mask, par->match->hooks);
+ textify_hooks(used, sizeof(used), par->hook_mask),
+ textify_hooks(allow, sizeof(allow), par->match->hooks));
return -EINVAL;
}
if (par->match->proto && (par->match->proto != proto || inv_proto)) {
@@ -497,9 +527,13 @@ int xt_check_target(struct xt_tgchk_param *par,
return -EINVAL;
}
if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
- printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
+ char used[64], allow[64];
+
+ printk("%s_tables: %s target: used from hooks %s, but only "
+ "usable from %s\n",
xt_prefix[par->family], par->target->name,
- par->hook_mask, par->target->hooks);
+ textify_hooks(used, sizeof(used), par->hook_mask),
+ textify_hooks(allow, sizeof(allow), par->target->hooks));
return -EINVAL;
}
if (par->target->proto && (par->target->proto != proto || inv_proto)) {
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f9977b3311f7..498b45101df7 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -11,6 +11,10 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
#include <linux/netfilter.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
@@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE");
MODULE_ALIAS("ip6t_NFQUEUE");
MODULE_ALIAS("arpt_NFQUEUE");
+static u32 jhash_initval __read_mostly;
+
static unsigned int
nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
{
@@ -31,32 +37,105 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
return NF_QUEUE_NR(tinfo->queuenum);
}
+static u32 hash_v4(const struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ u32 ipaddr;
+
+ /* packets in either direction go into same queue */
+ ipaddr = iph->saddr ^ iph->daddr;
+
+ return jhash_2words(ipaddr, iph->protocol, jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1)
+ queue = hash_v4(skb) % info->queues_total + queue;
+ return NF_QUEUE_NR(queue);
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static u32 hash_v6(const struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ u32 addr[4];
+
+ addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
+ addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
+ addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
+ addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
+
+ return jhash2(addr, ARRAY_SIZE(addr), jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1)
+ queue = hash_v6(skb) % info->queues_total + queue;
+ return NF_QUEUE_NR(queue);
+}
+#endif
+
+static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 maxid;
+
+ if (info->queues_total == 0) {
+ pr_err("NFQUEUE: number of total queues is 0\n");
+ return false;
+ }
+ maxid = info->queues_total - 1 + info->queuenum;
+ if (maxid > 0xffff) {
+ pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
+ info->queues_total, maxid);
+ return false;
+ }
+ return true;
+}
+
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
{
.name = "NFQUEUE",
- .family = NFPROTO_IPV4,
+ .family = NFPROTO_UNSPEC,
.target = nfqueue_tg,
.targetsize = sizeof(struct xt_NFQ_info),
.me = THIS_MODULE,
},
{
.name = "NFQUEUE",
- .family = NFPROTO_IPV6,
- .target = nfqueue_tg,
- .targetsize = sizeof(struct xt_NFQ_info),
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .checkentry = nfqueue_tg_v1_check,
+ .target = nfqueue_tg4_v1,
+ .targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
{
.name = "NFQUEUE",
- .family = NFPROTO_ARP,
- .target = nfqueue_tg,
- .targetsize = sizeof(struct xt_NFQ_info),
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .checkentry = nfqueue_tg_v1_check,
+ .target = nfqueue_tg6_v1,
+ .targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
+#endif
};
static int __init nfqueue_tg_init(void)
{
+ get_random_bytes(&jhash_initval, sizeof(jhash_initval));
return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
new file mode 100644
index 000000000000..863e40977a4d
--- /dev/null
+++ b/net/netfilter/xt_osf.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/xt_osf.h>
+
+struct xt_osf_finger {
+ struct rcu_head rcu_head;
+ struct list_head finger_entry;
+ struct xt_osf_user_finger finger;
+};
+
+enum osf_fmatch_states {
+ /* Packet does not match the fingerprint */
+ FMATCH_WRONG = 0,
+ /* Packet matches the fingerprint */
+ FMATCH_OK,
+ /* Options do not match the fingerprint, but header does */
+ FMATCH_OPT_WRONG,
+};
+
+/*
+ * Indexed by dont-fragment bit.
+ * It is the only constant value in the fingerprint.
+ */
+static struct list_head xt_osf_fingers[2];
+
+static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
+ [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
+};
+
+static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
+{
+ struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
+
+ kfree(f);
+}
+
+static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+ struct xt_osf_user_finger *f;
+ struct xt_osf_finger *kf = NULL, *sf;
+ int err = 0;
+
+ if (!osf_attrs[OSF_ATTR_FINGER])
+ return -EINVAL;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ return -EINVAL;
+
+ f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+ kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
+ if (!kf)
+ return -ENOMEM;
+
+ memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger));
+
+ list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+ if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+ continue;
+
+ kfree(kf);
+ kf = NULL;
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ err = -EEXIST;
+ break;
+ }
+
+ /*
+ * We are protected by nfnl mutex.
+ */
+ if (kf)
+ list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]);
+
+ return err;
+}
+
+static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+ struct xt_osf_user_finger *f;
+ struct xt_osf_finger *sf;
+ int err = ENOENT;
+
+ if (!osf_attrs[OSF_ATTR_FINGER])
+ return -EINVAL;
+
+ f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+ list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+ if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+ continue;
+
+ /*
+ * We are protected by nfnl mutex.
+ */
+ list_del_rcu(&sf->finger_entry);
+ call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu);
+
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = {
+ [OSF_MSG_ADD] = {
+ .call = xt_osf_add_callback,
+ .attr_count = OSF_ATTR_MAX,
+ .policy = xt_osf_policy,
+ },
+ [OSF_MSG_REMOVE] = {
+ .call = xt_osf_remove_callback,
+ .attr_count = OSF_ATTR_MAX,
+ .policy = xt_osf_policy,
+ },
+};
+
+static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
+ .name = "osf",
+ .subsys_id = NFNL_SUBSYS_OSF,
+ .cb_count = OSF_MSG_MAX,
+ .cb = xt_osf_nfnetlink_callbacks,
+};
+
+static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
+ unsigned char f_ttl)
+{
+ const struct iphdr *ip = ip_hdr(skb);
+
+ if (info->flags & XT_OSF_TTL) {
+ if (info->ttl == XT_OSF_TTL_TRUE)
+ return ip->ttl == f_ttl;
+ if (info->ttl == XT_OSF_TTL_NOCHECK)
+ return 1;
+ else if (ip->ttl <= f_ttl)
+ return 1;
+ else {
+ struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ int ret = 0;
+
+ for_ifa(in_dev) {
+ if (inet_ifa_match(ip->saddr, ifa)) {
+ ret = (ip->ttl == f_ttl);
+ break;
+ }
+ }
+ endfor_ifa(in_dev);
+
+ return ret;
+ }
+ }
+
+ return ip->ttl == f_ttl;
+}
+
+static bool xt_osf_match_packet(const struct sk_buff *skb,
+ const struct xt_match_param *p)
+{
+ const struct xt_osf_info *info = p->matchinfo;
+ const struct iphdr *ip = ip_hdr(skb);
+ const struct tcphdr *tcp;
+ struct tcphdr _tcph;
+ int fmatch = FMATCH_WRONG, fcount = 0;
+ unsigned int optsize = 0, check_WSS = 0;
+ u16 window, totlen, mss = 0;
+ bool df;
+ const unsigned char *optp = NULL, *_optp = NULL;
+ unsigned char opts[MAX_IPOPTLEN];
+ const struct xt_osf_finger *kf;
+ const struct xt_osf_user_finger *f;
+
+ if (!info)
+ return false;
+
+ tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+ if (!tcp)
+ return false;
+
+ if (!tcp->syn)
+ return false;
+
+ totlen = ntohs(ip->tot_len);
+ df = ntohs(ip->frag_off) & IP_DF;
+ window = ntohs(tcp->window);
+
+ if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+ optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+
+ _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+ sizeof(struct tcphdr), optsize, opts);
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
+ f = &kf->finger;
+
+ if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
+ continue;
+
+ optp = _optp;
+ fmatch = FMATCH_WRONG;
+
+ if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
+ int foptsize, optnum;
+
+ /*
+ * Should not happen if userspace parser was written correctly.
+ */
+ if (f->wss.wc >= OSF_WSS_MAX)
+ continue;
+
+ /* Check options */
+
+ foptsize = 0;
+ for (optnum = 0; optnum < f->opt_num; ++optnum)
+ foptsize += f->opt[optnum].length;
+
+ if (foptsize > MAX_IPOPTLEN ||
+ optsize > MAX_IPOPTLEN ||
+ optsize != foptsize)
+ continue;
+
+ check_WSS = f->wss.wc;
+
+ for (optnum = 0; optnum < f->opt_num; ++optnum) {
+ if (f->opt[optnum].kind == (*optp)) {
+ __u32 len = f->opt[optnum].length;
+ const __u8 *optend = optp + len;
+ int loop_cont = 0;
+
+ fmatch = FMATCH_OK;
+
+ switch (*optp) {
+ case OSFOPT_MSS:
+ mss = optp[3];
+ mss <<= 8;
+ mss |= optp[2];
+
+ mss = ntohs(mss);
+ break;
+ case OSFOPT_TS:
+ loop_cont = 1;
+ break;
+ }
+
+ optp = optend;
+ } else
+ fmatch = FMATCH_OPT_WRONG;
+
+ if (fmatch != FMATCH_OK)
+ break;
+ }
+
+ if (fmatch != FMATCH_OPT_WRONG) {
+ fmatch = FMATCH_WRONG;
+
+ switch (check_WSS) {
+ case OSF_WSS_PLAIN:
+ if (f->wss.val == 0 || window == f->wss.val)
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MSS:
+ /*
+ * Some smart modems decrease mangle MSS to
+ * SMART_MSS_2, so we check standard, decreased
+ * and the one provided in the fingerprint MSS
+ * values.
+ */
+#define SMART_MSS_1 1460
+#define SMART_MSS_2 1448
+ if (window == f->wss.val * mss ||
+ window == f->wss.val * SMART_MSS_1 ||
+ window == f->wss.val * SMART_MSS_2)
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MTU:
+ if (window == f->wss.val * (mss + 40) ||
+ window == f->wss.val * (SMART_MSS_1 + 40) ||
+ window == f->wss.val * (SMART_MSS_2 + 40))
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MODULO:
+ if ((window % f->wss.val) == 0)
+ fmatch = FMATCH_OK;
+ break;
+ }
+ }
+
+ if (fmatch != FMATCH_OK)
+ continue;
+
+ fcount++;
+
+ if (info->flags & XT_OSF_LOG)
+ nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+ "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+ f->genre, f->version, f->subtype,
+ &ip->saddr, ntohs(tcp->source),
+ &ip->daddr, ntohs(tcp->dest),
+ f->ttl - ip->ttl);
+
+ if ((info->flags & XT_OSF_LOG) &&
+ info->loglevel == XT_OSF_LOGLEVEL_FIRST)
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!fcount && (info->flags & XT_OSF_LOG))
+ nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+ "Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+ &ip->saddr, ntohs(tcp->source),
+ &ip->daddr, ntohs(tcp->dest));
+
+ if (fcount)
+ fmatch = FMATCH_OK;
+
+ return fmatch == FMATCH_OK;
+}
+
+static struct xt_match xt_osf_match = {
+ .name = "osf",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .proto = IPPROTO_TCP,
+ .hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD),
+ .match = xt_osf_match_packet,
+ .matchsize = sizeof(struct xt_osf_info),
+ .me = THIS_MODULE,
+};
+
+static int __init xt_osf_init(void)
+{
+ int err = -EINVAL;
+ int i;
+
+ for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i)
+ INIT_LIST_HEAD(&xt_osf_fingers[i]);
+
+ err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
+ if (err < 0) {
+ printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
+ goto err_out_exit;
+ }
+
+ err = xt_register_match(&xt_osf_match);
+ if (err) {
+ printk(KERN_ERR "Failed (%d) to register OS fingerprint "
+ "matching module.\n", err);
+ goto err_out_remove;
+ }
+
+ return 0;
+
+err_out_remove:
+ nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+err_out_exit:
+ return err;
+}
+
+static void __exit xt_osf_fini(void)
+{
+ struct xt_osf_finger *f;
+ int i;
+
+ nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+ xt_unregister_match(&xt_osf_match);
+
+ rcu_read_lock();
+ for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) {
+
+ list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
+ list_del_rcu(&f->finger_entry);
+ call_rcu(&f->rcu_head, xt_osf_finger_free_rcu);
+ }
+ }
+ rcu_read_unlock();
+
+ rcu_barrier();
+}
+
+module_init(xt_osf_init);
+module_exit(xt_osf_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1acc089be7e9..ebf00ad5b194 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,6 +22,8 @@
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <linux/netfilter/xt_socket.h>
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#define XT_SOCKET_HAVE_CONNTRACK 1
#include <net/netfilter/nf_conntrack.h>
@@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb,
static bool
-socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+ const struct xt_socket_mtinfo1 *info)
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr _hdr, *hp = NULL;
@@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport, par->in, false);
if (sk != NULL) {
- bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0);
+ bool wildcard;
+ bool transparent = true;
+
+ /* Ignore sockets listening on INADDR_ANY */
+ wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+ inet_sk(sk)->rcv_saddr == 0);
+
+ /* Ignore non-transparent sockets,
+ if XT_SOCKET_TRANSPARENT is used */
+ if (info && info->flags & XT_SOCKET_TRANSPARENT)
+ transparent = ((sk->sk_state != TCP_TIME_WAIT &&
+ inet_sk(sk)->transparent) ||
+ (sk->sk_state == TCP_TIME_WAIT &&
+ inet_twsk(sk)->tw_transparent));
nf_tproxy_put_sock(sk);
- if (wildcard)
+
+ if (wildcard || !transparent)
sk = NULL;
}
@@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
return (sk != NULL);
}
-static struct xt_match socket_mt_reg __read_mostly = {
- .name = "socket",
- .family = AF_INET,
- .match = socket_mt,
- .hooks = 1 << NF_INET_PRE_ROUTING,
- .me = THIS_MODULE,
+static bool
+socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ return socket_match(skb, par, NULL);
+}
+
+static bool
+socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ return socket_match(skb, par, par->matchinfo);
+}
+
+static struct xt_match socket_mt_reg[] __read_mostly = {
+ {
+ .name = "socket",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = socket_mt_v0,
+ .hooks = 1 << NF_INET_PRE_ROUTING,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "socket",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .match = socket_mt_v1,
+ .matchsize = sizeof(struct xt_socket_mtinfo1),
+ .hooks = 1 << NF_INET_PRE_ROUTING,
+ .me = THIS_MODULE,
+ },
};
static int __init socket_mt_init(void)
{
nf_defrag_ipv4_enable();
- return xt_register_match(&socket_mt_reg);
+ return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
}
static void __exit socket_mt_exit(void)
{
- xt_unregister_match(&socket_mt_reg);
+ xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
}
module_init(socket_mt_init);