diff options
author | David S. Miller <davem@davemloft.net> | 2011-12-25 02:21:45 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-25 02:21:45 -0500 |
commit | c5e1fd8ccae09f574d6f978c90c2b968ee29030c (patch) | |
tree | e4485dc086ce76c4ff2ff551246255f5de0a250b /net/netfilter | |
parent | 60b778ce519625102d3f72a2071ea72a05e990ce (diff) | |
parent | ceb98d03eac5704820f2ac1f370c9ff385e3a9f5 (diff) | |
download | lwn-c5e1fd8ccae09f574d6f978c90c2b968ee29030c.tar.gz lwn-c5e1fd8ccae09f574d6f978c90c2b968ee29030c.zip |
Merge branch 'nf-next' of git://1984.lsi.us.es/net-next
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/Kconfig | 18 | ||||
-rw-r--r-- | net/netfilter/Makefile | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/Kconfig | 15 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sh.c | 18 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_acct.c | 4 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 15 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_expect.c | 63 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_helper.c | 12 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 82 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_acct.c | 352 | ||||
-rw-r--r-- | net/netfilter/xt_CT.c | 8 | ||||
-rw-r--r-- | net/netfilter/xt_connbytes.c | 32 | ||||
-rw-r--r-- | net/netfilter/xt_nfacct.c | 76 |
13 files changed, 590 insertions, 107 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d5597b759ba3..bac93ba60778 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -4,6 +4,14 @@ menu "Core Netfilter Configuration" config NETFILTER_NETLINK tristate +config NETFILTER_NETLINK_ACCT +tristate "Netfilter NFACCT over NFNETLINK interface" + depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for extended accounting via NFNETLINK. + config NETFILTER_NETLINK_QUEUE tristate "Netfilter NFQUEUE over NFNETLINK interface" depends on NETFILTER_ADVANCED @@ -879,6 +887,16 @@ config NETFILTER_XT_MATCH_MULTIPORT To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_NFACCT + tristate '"nfacct" match support' + default m if NETFILTER_ADVANCED=n + select NETFILTER_NETLINK_ACCT + help + This option allows you to use the extended accounting through + nfnetlink_acct. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_OSF tristate '"osf" Passive OS fingerprint match' depends on NETFILTER_ADVANCED && NETFILTER_NETLINK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1a02853df863..b2eee4df8168 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o @@ -90,6 +91,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 70bd1d0774c6..af4c0b8c5275 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -232,6 +232,21 @@ config IP_VS_NQ If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +comment 'IPVS SH scheduler' + +config IP_VS_SH_TAB_BITS + int "IPVS source hashing table size (the Nth power of 2)" + range 4 20 + default 8 + ---help--- + The source hashing scheduler maps source IPs to destinations + stored in a hash table. This table is tiled by each destination + until all slots in the table are filled. When using weights to + allow destinations to receive more connections, the table is + tiled an amount proportional to the weights specified. The table + needs to be large enough to effectively fit all the destinations + multiplied by their respective weights. + comment 'IPVS application helper' config IP_VS_FTP diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 33815f4fb451..069e8d4d5c01 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -30,6 +30,11 @@ * server is dead or overloaded, the load balancer can bypass the cache * server and send requests to the original server directly. * + * The weight destination attribute can be used to control the + * distribution of connections to the destinations in servernode. The + * greater the weight, the more connections the destination + * will receive. + * */ #define KMSG_COMPONENT "IPVS" @@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; + int d_count; b = tbl; p = &svc->destinations; + d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { if (list_empty(p)) { b->dest = NULL; @@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) atomic_inc(&dest->refcnt); b->dest = dest; - p = p->next; + IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", + i, IP_VS_DBG_ADDR(svc->af, &dest->addr), + atomic_read(&dest->weight)); + + /* Don't move to next dest until filling weight */ + if (++d_count >= atomic_read(&dest->weight)) { + p = p->next; + d_count = 0; + } + } b++; } diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index bffa6b03bb79..f4f8cda05986 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -46,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) return 0; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)acct[dir].packets, - (unsigned long long)acct[dir].bytes); + (unsigned long long)atomic64_read(&acct[dir].packets), + (unsigned long long)atomic64_read(&acct[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index deeef74e775f..e875f8902db3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); unsigned int nf_conntrack_hash_rnd __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) { @@ -1044,10 +1045,8 @@ acct: acct = nf_conn_acct_find(ct); if (acct) { - spin_lock_bh(&ct->lock); - acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += skb->len; - spin_unlock_bh(&ct->lock); + atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1063,11 +1062,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, acct = nf_conn_acct_find(ct); if (acct) { - spin_lock_bh(&ct->lock); - acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); - spin_unlock_bh(&ct->lock); + atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len - skb_network_offset(skb), + &acct[CTINFO2DIR(ctinfo)].bytes); } } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 340c80d968d4..bebb1675e6ff 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static HLIST_HEAD(nf_ct_userspace_expect_list); - /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, u32 pid, int report) @@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); + NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); net->ct.expect_count--; hlist_del(&exp->lnode); - if (!(exp->flags & NF_CT_EXPECT_USERSPACE)) - master_help->expecting[exp->class]--; + master_help->expecting[exp->class]--; nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); nf_ct_expect_put(exp); @@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp) } EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) +static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(exp); - const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); /* two references : one for hash insert, one for the timer */ atomic_add(2, &exp->use); - if (master_help) { - hlist_add_head(&exp->lnode, &master_help->expectations); - master_help->expecting[exp->class]++; - } else if (exp->flags & NF_CT_EXPECT_USERSPACE) - hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list); + hlist_add_head(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); - if (master_help) { - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[exp->class]; - exp->timeout.expires = jiffies + p->timeout * HZ; + helper = rcu_dereference_protected(master_help->helper, + lockdep_is_held(&nf_conntrack_lock)); + if (helper) { + exp->timeout.expires = jiffies + + helper->expect_policy[exp->class].timeout * HZ; } add_timer(&exp->timeout); NF_CT_STAT_INC(net, expect_create); + return 0; } /* Race with expectations being used means we could have none to find; OK. */ @@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret = 1; - /* Don't allow expectations created from kernel-space with no helper */ - if (!(expect->flags & NF_CT_EXPECT_USERSPACE) && - (!master_help || (master_help && !master_help->helper))) { + if (!master_help) { ret = -ESHUTDOWN; goto out; } @@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } } /* Will be over limit? */ - if (master_help) { - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[expect->class]; + helper = rcu_dereference_protected(master_help->helper, + lockdep_is_held(&nf_conntrack_lock)); + if (helper) { + p = &helper->expect_policy[expect->class]; if (p->max_expected && master_help->expecting[expect->class] >= p->max_expected) { evict_oldest_expect(master, expect); @@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret <= 0) goto out; - ret = 0; - nf_ct_expect_insert(expect); + ret = nf_ct_expect_insert(expect); + if (ret < 0) + goto out; spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; @@ -461,21 +455,6 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); -void nf_ct_remove_userspace_expectations(void) -{ - struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; - - hlist_for_each_entry_safe(exp, n, next, - &nf_ct_userspace_expect_list, lnode) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } - } -} -EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations); - #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { struct seq_net_private p; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 93c4bdbfc1ae..c9e0de08aa87 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, int ret = 0; if (tmpl != NULL) { + /* we've got a userspace helper. */ + if (tmpl->status & IPS_USERSPACE_HELPER) { + help = nf_ct_helper_ext_add(ct, flags); + if (help == NULL) { + ret = -ENOMEM; + goto out; + } + rcu_assign_pointer(help->helper, NULL); + __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status); + ret = 0; + goto out; + } help = nfct_help(tmpl); if (help != NULL) helper = help->helper; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ef21b221f036..85033344aed2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -203,25 +203,18 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, + enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; - const struct nf_conn_counter *acct; - - acct = nf_conn_acct_find(ct); - if (!acct) - return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, - cpu_to_be64(acct[dir].packets)); - NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, - cpu_to_be64(acct[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes)); nla_nest_end(skb, nest_count); @@ -232,6 +225,27 @@ nla_put_failure: } static int +ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, + enum ip_conntrack_dir dir, int type) +{ + struct nf_conn_counter *acct; + u64 pkts, bytes; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&acct[dir].packets, 0); + bytes = atomic64_xchg(&acct[dir].bytes, 0); + } else { + pkts = atomic64_read(&acct[dir].packets); + bytes = atomic64_read(&acct[dir].bytes); + } + return dump_counters(skb, pkts, bytes, dir); +} + +static int ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_count; @@ -393,15 +407,15 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, struct nf_conn *ct) +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = pid ? NLM_F_MULTI : 0, event; - event |= NFNL_SUBSYS_CTNETLINK << 8; + event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + if (ctnetlink_dump_counters(skb, ct, + IP_CT_DIR_ORIGINAL, type) < 0 || + ctnetlink_dump_counters(skb, ct, + IP_CT_DIR_REPLY, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { @@ -709,20 +725,13 @@ restart: } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, ct) < 0) { + NFNL_MSG_TYPE( + cb->nlh->nlmsg_type), + ct) < 0) { nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; goto out; } - - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { - struct nf_conn_counter *acct; - - acct = nf_conn_acct_find(ct); - if (acct) - memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); - } } if (cb->args[1]) { cb->args[1] = 0; @@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, ct); + NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) @@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) if (cda[CTA_NAT_DST]) { ret = ctnetlink_parse_nat_setup(ct, - IP_NAT_MANIP_DST, + NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) return ret; } if (cda[CTA_NAT_SRC]) { ret = ctnetlink_parse_nat_setup(ct, - IP_NAT_MANIP_SRC, + NF_NAT_MANIP_SRC, cda[CTA_NAT_SRC]); if (ret < 0) return ret; @@ -1847,7 +1856,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - if (cda[CTA_EXPECT_MASTER]) + if (cda[CTA_EXPECT_TUPLE]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + else if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else return -EINVAL; @@ -2023,6 +2034,10 @@ ctnetlink_create_expect(struct net *net, u16 zone, } help = nfct_help(ct); if (!help) { + err = -EOPNOTSUPP; + goto out; + } + if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) { if (!cda[CTA_EXPECT_TIMEOUT]) { err = -EINVAL; goto out; @@ -2247,7 +2262,6 @@ static void __exit ctnetlink_exit(void) { pr_info("ctnetlink: unregistering from nfnetlink.\n"); - nf_ct_remove_userspace_expectations(); unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c new file mode 100644 index 000000000000..362ab6ca3dc1 --- /dev/null +++ b/net/netfilter/nfnetlink_acct.c @@ -0,0 +1,352 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <asm/atomic.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_acct.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); + +static LIST_HEAD(nfnl_acct_list); + +struct nf_acct { + atomic64_t pkts; + atomic64_t bytes; + struct list_head head; + atomic_t refcnt; + char name[NFACCT_NAME_MAX]; + struct rcu_head rcu_head; +}; + +static int +nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + struct nf_acct *nfacct, *matching = NULL; + char *acct_name; + + if (!tb[NFACCT_NAME]) + return -EINVAL; + + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(nfacct, &nfnl_acct_list, head) { + if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + matching = nfacct; + break; + } + + if (matching) { + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + /* reset counters if you request a replacement. */ + atomic64_set(&matching->pkts, 0); + atomic64_set(&matching->bytes, 0); + return 0; + } + return -EBUSY; + } + + nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (nfacct == NULL) + return -ENOMEM; + + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + + if (tb[NFACCT_BYTES]) { + atomic64_set(&nfacct->bytes, + be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); + } + if (tb[NFACCT_PKTS]) { + atomic64_set(&nfacct->pkts, + be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); + } + atomic_set(&nfacct->refcnt, 1); + list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + return 0; +} + +static int +nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct nf_acct *acct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + u64 pkts, bytes; + + event |= NFNL_SUBSYS_ACCT << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + NLA_PUT_STRING(skb, NFACCT_NAME, acct->name); + + if (type == NFNL_MSG_ACCT_GET_CTRZERO) { + pkts = atomic64_xchg(&acct->pkts, 0); + bytes = atomic64_xchg(&acct->bytes, 0); + } else { + pkts = atomic64_read(&acct->pkts); + bytes = atomic64_read(&acct->bytes); + } + NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts)); + NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes)); + NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))); + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_acct *cur, *last; + + if (cb->args[2]) + return 0; + + last = (struct nf_acct *)cb->args[1]; + if (cb->args[1]) + cb->args[1] = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + if (last && cur != last) + continue; + + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + NFNL_MSG_ACCT_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + break; + } + } + if (!cb->args[1]) + cb->args[2] = 1; + rcu_read_unlock(); + return skb->len; +} + +static int +nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = 0; + struct nf_acct *cur; + char *acct_name; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, + NULL, 0); + } + + if (!tb[NFACCT_NAME]) + return -EINVAL; + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(cur, &nfnl_acct_list, head) { + struct sk_buff *skb2; + + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + break; + + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_ACCT_NEW, cur); + if (ret <= 0) + kfree_skb(skb2); + + break; + } + return ret; +} + +/* try to delete object, fail if it is still in use. */ +static int nfnl_acct_try_del(struct nf_acct *cur) +{ + int ret = 0; + + /* we want to avoid races with nfnl_acct_find_get. */ + if (atomic_dec_and_test(&cur->refcnt)) { + /* We are protected by nfnl mutex. */ + list_del_rcu(&cur->head); + kfree_rcu(cur, rcu_head); + } else { + /* still in use, restore reference counter. */ + atomic_inc(&cur->refcnt); + ret = -EBUSY; + } + return ret; +} + +static int +nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + char *acct_name; + struct nf_acct *cur; + int ret = -ENOENT; + + if (!tb[NFACCT_NAME]) { + list_for_each_entry(cur, &nfnl_acct_list, head) + nfnl_acct_try_del(cur); + + return 0; + } + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(cur, &nfnl_acct_list, head) { + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) + continue; + + ret = nfnl_acct_try_del(cur); + if (ret < 0) + return ret; + + break; + } + return ret; +} + +static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { + [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, + [NFACCT_BYTES] = { .type = NLA_U64 }, + [NFACCT_PKTS] = { .type = NLA_U64 }, +}; + +static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { + [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_acct_subsys = { + .name = "acct", + .subsys_id = NFNL_SUBSYS_ACCT, + .cb_count = NFNL_MSG_ACCT_MAX, + .cb = nfnl_acct_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); + +struct nf_acct *nfnl_acct_find_get(const char *acct_name) +{ + struct nf_acct *cur, *acct = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) + continue; + + if (!try_module_get(THIS_MODULE)) + goto err; + + if (!atomic_inc_not_zero(&cur->refcnt)) { + module_put(THIS_MODULE); + goto err; + } + + acct = cur; + break; + } +err: + rcu_read_unlock(); + return acct; +} +EXPORT_SYMBOL_GPL(nfnl_acct_find_get); + +void nfnl_acct_put(struct nf_acct *acct) +{ + atomic_dec(&acct->refcnt); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(nfnl_acct_put); + +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + atomic64_inc(&nfacct->pkts); + atomic64_add(skb->len, &nfacct->bytes); +} +EXPORT_SYMBOL_GPL(nfnl_acct_update); + +static int __init nfnl_acct_init(void) +{ + int ret; + + pr_info("nfnl_acct: registering with nfnetlink.\n"); + ret = nfnetlink_subsys_register(&nfnl_acct_subsys); + if (ret < 0) { + pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); + goto err_out; + } + return 0; +err_out: + return ret; +} + +static void __exit nfnl_acct_exit(void) +{ + struct nf_acct *cur, *tmp; + + pr_info("nfnl_acct: unregistering from nfnetlink.\n"); + nfnetlink_subsys_unregister(&nfnl_acct_subsys); + + list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { + list_del_rcu(&cur->head); + /* We are sure that our objects have no clients at this point, + * it's safe to release them all without checking refcnt. */ + kfree_rcu(cur, rcu_head); + } +} + +module_init(nfnl_acct_init); +module_exit(nfnl_acct_exit); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0221d10de75a..8e87123f1373 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) int ret = 0; u8 proto; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; + if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER)) + return -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); @@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) GFP_KERNEL)) goto err3; - if (info->helper[0]) { + if (info->flags & XT_CT_USERSPACE_HELPER) { + __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status); + } else if (info->helper[0]) { ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 9ddf1c3bfb39..e595e07a759b 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) case XT_CONNBYTES_PKTS: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = counters[IP_CT_DIR_ORIGINAL].packets; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: - what = counters[IP_CT_DIR_REPLY].packets; + what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: - what = counters[IP_CT_DIR_ORIGINAL].packets; - what += counters[IP_CT_DIR_REPLY].packets; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); + what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } break; case XT_CONNBYTES_BYTES: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = counters[IP_CT_DIR_ORIGINAL].bytes; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); break; case XT_CONNBYTES_DIR_REPLY: - what = counters[IP_CT_DIR_REPLY].bytes; + what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; case XT_CONNBYTES_DIR_BOTH: - what = counters[IP_CT_DIR_ORIGINAL].bytes; - what += counters[IP_CT_DIR_REPLY].bytes; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); + what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; } break; case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - bytes = counters[IP_CT_DIR_ORIGINAL].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: - bytes = counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_REPLY].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: - bytes = counters[IP_CT_DIR_ORIGINAL].bytes + - counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets + - counters[IP_CT_DIR_REPLY].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + + atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + + atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } if (pkts != 0) diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c new file mode 100644 index 000000000000..b3be0ef21f19 --- /dev/null +++ b/net/netfilter/xt_nfacct.c @@ -0,0 +1,76 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 (or any + * later at your option) as published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nfnetlink_acct.h> +#include <linux/netfilter/xt_nfacct.h> + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_nfacct"); +MODULE_ALIAS("ip6t_nfacct"); + +static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_nfacct_match_info *info = par->targinfo; + + nfnl_acct_update(skb, info->nfacct); + + return true; +} + +static int +nfacct_mt_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_nfacct_match_info *info = par->matchinfo; + struct nf_acct *nfacct; + + nfacct = nfnl_acct_find_get(info->name); + if (nfacct == NULL) { + pr_info("xt_nfacct: accounting object with name `%s' " + "does not exists\n", info->name); + return -ENOENT; + } + info->nfacct = nfacct; + return 0; +} + +static void +nfacct_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_nfacct_match_info *info = par->matchinfo; + + nfnl_acct_put(info->nfacct); +} + +static struct xt_match nfacct_mt_reg __read_mostly = { + .name = "nfacct", + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info), + .me = THIS_MODULE, +}; + +static int __init nfacct_mt_init(void) +{ + return xt_register_match(&nfacct_mt_reg); +} + +static void __exit nfacct_mt_exit(void) +{ + xt_unregister_match(&nfacct_mt_reg); +} + +module_init(nfacct_mt_init); +module_exit(nfacct_mt_exit); |