summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-12-25 02:21:45 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-25 02:21:45 -0500
commitc5e1fd8ccae09f574d6f978c90c2b968ee29030c (patch)
treee4485dc086ce76c4ff2ff551246255f5de0a250b /net/netfilter
parent60b778ce519625102d3f72a2071ea72a05e990ce (diff)
parentceb98d03eac5704820f2ac1f370c9ff385e3a9f5 (diff)
downloadlwn-c5e1fd8ccae09f574d6f978c90c2b968ee29030c.tar.gz
lwn-c5e1fd8ccae09f574d6f978c90c2b968ee29030c.zip
Merge branch 'nf-next' of git://1984.lsi.us.es/net-next
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig18
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/ipvs/Kconfig15
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c18
-rw-r--r--net/netfilter/nf_conntrack_acct.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c15
-rw-r--r--net/netfilter/nf_conntrack_expect.c63
-rw-r--r--net/netfilter/nf_conntrack_helper.c12
-rw-r--r--net/netfilter/nf_conntrack_netlink.c82
-rw-r--r--net/netfilter/nfnetlink_acct.c352
-rw-r--r--net/netfilter/xt_CT.c8
-rw-r--r--net/netfilter/xt_connbytes.c32
-rw-r--r--net/netfilter/xt_nfacct.c76
13 files changed, 590 insertions, 107 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d5597b759ba3..bac93ba60778 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -4,6 +4,14 @@ menu "Core Netfilter Configuration"
config NETFILTER_NETLINK
tristate
+config NETFILTER_NETLINK_ACCT
+tristate "Netfilter NFACCT over NFNETLINK interface"
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK
+ help
+ If this option is enabled, the kernel will include support
+ for extended accounting via NFNETLINK.
+
config NETFILTER_NETLINK_QUEUE
tristate "Netfilter NFQUEUE over NFNETLINK interface"
depends on NETFILTER_ADVANCED
@@ -879,6 +887,16 @@ config NETFILTER_XT_MATCH_MULTIPORT
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_MATCH_NFACCT
+ tristate '"nfacct" match support'
+ default m if NETFILTER_ADVANCED=n
+ select NETFILTER_NETLINK_ACCT
+ help
+ This option allows you to use the extended accounting through
+ nfnetlink_acct.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_MATCH_OSF
tristate '"osf" Passive OS fingerprint match'
depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1a02853df863..b2eee4df8168 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
@@ -90,6 +91,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 70bd1d0774c6..af4c0b8c5275 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -232,6 +232,21 @@ config IP_VS_NQ
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+comment 'IPVS SH scheduler'
+
+config IP_VS_SH_TAB_BITS
+ int "IPVS source hashing table size (the Nth power of 2)"
+ range 4 20
+ default 8
+ ---help---
+ The source hashing scheduler maps source IPs to destinations
+ stored in a hash table. This table is tiled by each destination
+ until all slots in the table are filled. When using weights to
+ allow destinations to receive more connections, the table is
+ tiled an amount proportional to the weights specified. The table
+ needs to be large enough to effectively fit all the destinations
+ multiplied by their respective weights.
+
comment 'IPVS application helper'
config IP_VS_FTP
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 33815f4fb451..069e8d4d5c01 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -30,6 +30,11 @@
* server is dead or overloaded, the load balancer can bypass the cache
* server and send requests to the original server directly.
*
+ * The weight destination attribute can be used to control the
+ * distribution of connections to the destinations in servernode. The
+ * greater the weight, the more connections the destination
+ * will receive.
+ *
*/
#define KMSG_COMPONENT "IPVS"
@@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
struct ip_vs_sh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
+ int d_count;
b = tbl;
p = &svc->destinations;
+ d_count = 0;
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
if (list_empty(p)) {
b->dest = NULL;
@@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
atomic_inc(&dest->refcnt);
b->dest = dest;
- p = p->next;
+ IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
+ i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ atomic_read(&dest->weight));
+
+ /* Don't move to next dest until filling weight */
+ if (++d_count >= atomic_read(&dest->weight)) {
+ p = p->next;
+ d_count = 0;
+ }
+
}
b++;
}
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index bffa6b03bb79..f4f8cda05986 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -46,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
return 0;
return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)acct[dir].packets,
- (unsigned long long)acct[dir].bytes);
+ (unsigned long long)atomic64_read(&acct[dir].packets),
+ (unsigned long long)atomic64_read(&acct[dir].bytes));
};
EXPORT_SYMBOL_GPL(seq_print_acct);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index deeef74e775f..e875f8902db3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
unsigned int nf_conntrack_hash_rnd __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
{
@@ -1044,10 +1045,8 @@ acct:
acct = nf_conn_acct_find(ct);
if (acct) {
- spin_lock_bh(&ct->lock);
- acct[CTINFO2DIR(ctinfo)].packets++;
- acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
- spin_unlock_bh(&ct->lock);
+ atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
+ atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);
}
}
}
@@ -1063,11 +1062,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
acct = nf_conn_acct_find(ct);
if (acct) {
- spin_lock_bh(&ct->lock);
- acct[CTINFO2DIR(ctinfo)].packets++;
- acct[CTINFO2DIR(ctinfo)].bytes +=
- skb->len - skb_network_offset(skb);
- spin_unlock_bh(&ct->lock);
+ atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
+ atomic64_add(skb->len - skb_network_offset(skb),
+ &acct[CTINFO2DIR(ctinfo)].bytes);
}
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 340c80d968d4..bebb1675e6ff 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static HLIST_HEAD(nf_ct_userspace_expect_list);
-
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 pid, int report)
@@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
+ NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
net->ct.expect_count--;
hlist_del(&exp->lnode);
- if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
- master_help->expecting[exp->class]--;
+ master_help->expecting[exp->class]--;
nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
nf_ct_expect_put(exp);
@@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
-static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(exp);
- const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
/* two references : one for hash insert, one for the timer */
atomic_add(2, &exp->use);
- if (master_help) {
- hlist_add_head(&exp->lnode, &master_help->expectations);
- master_help->expecting[exp->class]++;
- } else if (exp->flags & NF_CT_EXPECT_USERSPACE)
- hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
+ hlist_add_head(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
- if (master_help) {
- p = &rcu_dereference_protected(
- master_help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- )->expect_policy[exp->class];
- exp->timeout.expires = jiffies + p->timeout * HZ;
+ helper = rcu_dereference_protected(master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock));
+ if (helper) {
+ exp->timeout.expires = jiffies +
+ helper->expect_policy[exp->class].timeout * HZ;
}
add_timer(&exp->timeout);
NF_CT_STAT_INC(net, expect_create);
+ return 0;
}
/* Race with expectations being used means we could have none to find; OK. */
@@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
+ struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
int ret = 1;
- /* Don't allow expectations created from kernel-space with no helper */
- if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
- (!master_help || (master_help && !master_help->helper))) {
+ if (!master_help) {
ret = -ESHUTDOWN;
goto out;
}
@@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
}
/* Will be over limit? */
- if (master_help) {
- p = &rcu_dereference_protected(
- master_help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- )->expect_policy[expect->class];
+ helper = rcu_dereference_protected(master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock));
+ if (helper) {
+ p = &helper->expect_policy[expect->class];
if (p->max_expected &&
master_help->expecting[expect->class] >= p->max_expected) {
evict_oldest_expect(master, expect);
@@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
if (ret <= 0)
goto out;
- ret = 0;
- nf_ct_expect_insert(expect);
+ ret = nf_ct_expect_insert(expect);
+ if (ret < 0)
+ goto out;
spin_unlock_bh(&nf_conntrack_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
return ret;
@@ -461,21 +455,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
-void nf_ct_remove_userspace_expectations(void)
-{
- struct nf_conntrack_expect *exp;
- struct hlist_node *n, *next;
-
- hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_userspace_expect_list, lnode) {
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- }
- }
-}
-EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
-
#ifdef CONFIG_PROC_FS
struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 93c4bdbfc1ae..c9e0de08aa87 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
int ret = 0;
if (tmpl != NULL) {
+ /* we've got a userspace helper. */
+ if (tmpl->status & IPS_USERSPACE_HELPER) {
+ help = nf_ct_helper_ext_add(ct, flags);
+ if (help == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ rcu_assign_pointer(help->helper, NULL);
+ __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+ ret = 0;
+ goto out;
+ }
help = nfct_help(tmpl);
if (help != NULL)
helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ef21b221f036..85033344aed2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -203,25 +203,18 @@ nla_put_failure:
}
static int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
- enum ip_conntrack_dir dir)
+dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes,
+ enum ip_conntrack_dir dir)
{
enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
struct nlattr *nest_count;
- const struct nf_conn_counter *acct;
-
- acct = nf_conn_acct_find(ct);
- if (!acct)
- return 0;
nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
if (!nest_count)
goto nla_put_failure;
- NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS,
- cpu_to_be64(acct[dir].packets));
- NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES,
- cpu_to_be64(acct[dir].bytes));
+ NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts));
+ NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes));
nla_nest_end(skb, nest_count);
@@ -232,6 +225,27 @@ nla_put_failure:
}
static int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
+ enum ip_conntrack_dir dir, int type)
+{
+ struct nf_conn_counter *acct;
+ u64 pkts, bytes;
+
+ acct = nf_conn_acct_find(ct);
+ if (!acct)
+ return 0;
+
+ if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
+ pkts = atomic64_xchg(&acct[dir].packets, 0);
+ bytes = atomic64_xchg(&acct[dir].bytes, 0);
+ } else {
+ pkts = atomic64_read(&acct[dir].packets);
+ bytes = atomic64_read(&acct[dir].bytes);
+ }
+ return dump_counters(skb, pkts, bytes, dir);
+}
+
+static int
ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
{
struct nlattr *nest_count;
@@ -393,15 +407,15 @@ nla_put_failure:
}
static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, struct nf_conn *ct)
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- unsigned int flags = pid ? NLM_F_MULTI : 0;
+ unsigned int flags = pid ? NLM_F_MULTI : 0, event;
- event |= NFNL_SUBSYS_CTNETLINK << 8;
+ event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
@@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
if (events & (1 << IPCT_DESTROY)) {
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ if (ctnetlink_dump_counters(skb, ct,
+ IP_CT_DIR_ORIGINAL, type) < 0 ||
+ ctnetlink_dump_counters(skb, ct,
+ IP_CT_DIR_REPLY, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
} else {
@@ -709,20 +725,13 @@ restart:
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, ct) < 0) {
+ NFNL_MSG_TYPE(
+ cb->nlh->nlmsg_type),
+ ct) < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
goto out;
}
-
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO) {
- struct nf_conn_counter *acct;
-
- acct = nf_conn_acct_find(ct);
- if (acct)
- memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
- }
}
if (cb->args[1]) {
cb->args[1] = 0;
@@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, ct);
+ NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
@@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
if (cda[CTA_NAT_DST]) {
ret = ctnetlink_parse_nat_setup(ct,
- IP_NAT_MANIP_DST,
+ NF_NAT_MANIP_DST,
cda[CTA_NAT_DST]);
if (ret < 0)
return ret;
}
if (cda[CTA_NAT_SRC]) {
ret = ctnetlink_parse_nat_setup(ct,
- IP_NAT_MANIP_SRC,
+ NF_NAT_MANIP_SRC,
cda[CTA_NAT_SRC]);
if (ret < 0)
return ret;
@@ -1847,7 +1856,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- if (cda[CTA_EXPECT_MASTER])
+ if (cda[CTA_EXPECT_TUPLE])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ else if (cda[CTA_EXPECT_MASTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
else
return -EINVAL;
@@ -2023,6 +2034,10 @@ ctnetlink_create_expect(struct net *net, u16 zone,
}
help = nfct_help(ct);
if (!help) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
err = -EINVAL;
goto out;
@@ -2247,7 +2262,6 @@ static void __exit ctnetlink_exit(void)
{
pr_info("ctnetlink: unregistering from nfnetlink.\n");
- nf_ct_remove_userspace_expectations();
unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
new file mode 100644
index 000000000000..362ab6ca3dc1
--- /dev/null
+++ b/net/netfilter/nfnetlink_acct.c
@@ -0,0 +1,352 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
+
+static LIST_HEAD(nfnl_acct_list);
+
+struct nf_acct {
+ atomic64_t pkts;
+ atomic64_t bytes;
+ struct list_head head;
+ atomic_t refcnt;
+ char name[NFACCT_NAME_MAX];
+ struct rcu_head rcu_head;
+};
+
+static int
+nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ struct nf_acct *nfacct, *matching = NULL;
+ char *acct_name;
+
+ if (!tb[NFACCT_NAME])
+ return -EINVAL;
+
+ acct_name = nla_data(tb[NFACCT_NAME]);
+
+ list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+ if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
+ continue;
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ matching = nfacct;
+ break;
+ }
+
+ if (matching) {
+ if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ /* reset counters if you request a replacement. */
+ atomic64_set(&matching->pkts, 0);
+ atomic64_set(&matching->bytes, 0);
+ return 0;
+ }
+ return -EBUSY;
+ }
+
+ nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+ if (nfacct == NULL)
+ return -ENOMEM;
+
+ strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+
+ if (tb[NFACCT_BYTES]) {
+ atomic64_set(&nfacct->bytes,
+ be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
+ }
+ if (tb[NFACCT_PKTS]) {
+ atomic64_set(&nfacct->pkts,
+ be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
+ }
+ atomic_set(&nfacct->refcnt, 1);
+ list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+ return 0;
+}
+
+static int
+nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ int event, struct nf_acct *acct)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = pid ? NLM_F_MULTI : 0;
+ u64 pkts, bytes;
+
+ event |= NFNL_SUBSYS_ACCT << 8;
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
+
+ if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
+ pkts = atomic64_xchg(&acct->pkts, 0);
+ bytes = atomic64_xchg(&acct->bytes, 0);
+ } else {
+ pkts = atomic64_read(&acct->pkts);
+ bytes = atomic64_read(&acct->bytes);
+ }
+ NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts));
+ NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes));
+ NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)));
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nf_acct *cur, *last;
+
+ if (cb->args[2])
+ return 0;
+
+ last = (struct nf_acct *)cb->args[1];
+ if (cb->args[1])
+ cb->args[1] = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ if (last && cur != last)
+ continue;
+
+ if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ NFNL_MSG_ACCT_NEW, cur) < 0) {
+ cb->args[1] = (unsigned long)cur;
+ break;
+ }
+ }
+ if (!cb->args[1])
+ cb->args[2] = 1;
+ rcu_read_unlock();
+ return skb->len;
+}
+
+static int
+nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ int ret = 0;
+ struct nf_acct *cur;
+ char *acct_name;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
+ NULL, 0);
+ }
+
+ if (!tb[NFACCT_NAME])
+ return -EINVAL;
+ acct_name = nla_data(tb[NFACCT_NAME]);
+
+ list_for_each_entry(cur, &nfnl_acct_list, head) {
+ struct sk_buff *skb2;
+
+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+ continue;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ break;
+
+ ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_ACCT_NEW, cur);
+ if (ret <= 0)
+ kfree_skb(skb2);
+
+ break;
+ }
+ return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int nfnl_acct_try_del(struct nf_acct *cur)
+{
+ int ret = 0;
+
+ /* we want to avoid races with nfnl_acct_find_get. */
+ if (atomic_dec_and_test(&cur->refcnt)) {
+ /* We are protected by nfnl mutex. */
+ list_del_rcu(&cur->head);
+ kfree_rcu(cur, rcu_head);
+ } else {
+ /* still in use, restore reference counter. */
+ atomic_inc(&cur->refcnt);
+ ret = -EBUSY;
+ }
+ return ret;
+}
+
+static int
+nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ char *acct_name;
+ struct nf_acct *cur;
+ int ret = -ENOENT;
+
+ if (!tb[NFACCT_NAME]) {
+ list_for_each_entry(cur, &nfnl_acct_list, head)
+ nfnl_acct_try_del(cur);
+
+ return 0;
+ }
+ acct_name = nla_data(tb[NFACCT_NAME]);
+
+ list_for_each_entry(cur, &nfnl_acct_list, head) {
+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
+ continue;
+
+ ret = nfnl_acct_try_del(cur);
+ if (ret < 0)
+ return ret;
+
+ break;
+ }
+ return ret;
+}
+
+static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
+ [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
+ [NFACCT_BYTES] = { .type = NLA_U64 },
+ [NFACCT_PKTS] = { .type = NLA_U64 },
+};
+
+static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
+ [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+};
+
+static const struct nfnetlink_subsystem nfnl_acct_subsys = {
+ .name = "acct",
+ .subsys_id = NFNL_SUBSYS_ACCT,
+ .cb_count = NFNL_MSG_ACCT_MAX,
+ .cb = nfnl_acct_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
+
+struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+{
+ struct nf_acct *cur, *acct = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+ continue;
+
+ if (!try_module_get(THIS_MODULE))
+ goto err;
+
+ if (!atomic_inc_not_zero(&cur->refcnt)) {
+ module_put(THIS_MODULE);
+ goto err;
+ }
+
+ acct = cur;
+ break;
+ }
+err:
+ rcu_read_unlock();
+ return acct;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
+
+void nfnl_acct_put(struct nf_acct *acct)
+{
+ atomic_dec(&acct->refcnt);
+ module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_put);
+
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+ atomic64_inc(&nfacct->pkts);
+ atomic64_add(skb->len, &nfacct->bytes);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_update);
+
+static int __init nfnl_acct_init(void)
+{
+ int ret;
+
+ pr_info("nfnl_acct: registering with nfnetlink.\n");
+ ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
+ if (ret < 0) {
+ pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
+ goto err_out;
+ }
+ return 0;
+err_out:
+ return ret;
+}
+
+static void __exit nfnl_acct_exit(void)
+{
+ struct nf_acct *cur, *tmp;
+
+ pr_info("nfnl_acct: unregistering from nfnetlink.\n");
+ nfnetlink_subsys_unregister(&nfnl_acct_subsys);
+
+ list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
+ list_del_rcu(&cur->head);
+ /* We are sure that our objects have no clients at this point,
+ * it's safe to release them all without checking refcnt. */
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+module_init(nfnl_acct_init);
+module_exit(nfnl_acct_exit);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0221d10de75a..8e87123f1373 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
int ret = 0;
u8 proto;
- if (info->flags & ~XT_CT_NOTRACK)
- return -EINVAL;
+ if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
+ return -EOPNOTSUPP;
if (info->flags & XT_CT_NOTRACK) {
ct = nf_ct_untracked_get();
@@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
GFP_KERNEL))
goto err3;
- if (info->helper[0]) {
+ if (info->flags & XT_CT_USERSPACE_HELPER) {
+ __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+ } else if (info->helper[0]) {
ret = -ENOENT;
proto = xt_ct_find_proto(par);
if (!proto) {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 9ddf1c3bfb39..e595e07a759b 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
case XT_CONNBYTES_PKTS:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
- what = counters[IP_CT_DIR_ORIGINAL].packets;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
break;
case XT_CONNBYTES_DIR_REPLY:
- what = counters[IP_CT_DIR_REPLY].packets;
+ what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
case XT_CONNBYTES_DIR_BOTH:
- what = counters[IP_CT_DIR_ORIGINAL].packets;
- what += counters[IP_CT_DIR_REPLY].packets;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
+ what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
}
break;
case XT_CONNBYTES_BYTES:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
- what = counters[IP_CT_DIR_ORIGINAL].bytes;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
break;
case XT_CONNBYTES_DIR_REPLY:
- what = counters[IP_CT_DIR_REPLY].bytes;
+ what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
break;
case XT_CONNBYTES_DIR_BOTH:
- what = counters[IP_CT_DIR_ORIGINAL].bytes;
- what += counters[IP_CT_DIR_REPLY].bytes;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+ what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
break;
}
break;
case XT_CONNBYTES_AVGPKT:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
- bytes = counters[IP_CT_DIR_ORIGINAL].bytes;
- pkts = counters[IP_CT_DIR_ORIGINAL].packets;
+ bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+ pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
break;
case XT_CONNBYTES_DIR_REPLY:
- bytes = counters[IP_CT_DIR_REPLY].bytes;
- pkts = counters[IP_CT_DIR_REPLY].packets;
+ bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+ pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
case XT_CONNBYTES_DIR_BOTH:
- bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
- counters[IP_CT_DIR_REPLY].bytes;
- pkts = counters[IP_CT_DIR_ORIGINAL].packets +
- counters[IP_CT_DIR_REPLY].packets;
+ bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) +
+ atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+ pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) +
+ atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
}
if (pkts != 0)
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
new file mode 100644
index 000000000000..b3be0ef21f19
--- /dev/null
+++ b/net/netfilter/xt_nfacct.c
@@ -0,0 +1,76 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 (or any
+ * later at your option) as published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/xt_nfacct.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_nfacct");
+MODULE_ALIAS("ip6t_nfacct");
+
+static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_nfacct_match_info *info = par->targinfo;
+
+ nfnl_acct_update(skb, info->nfacct);
+
+ return true;
+}
+
+static int
+nfacct_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ struct xt_nfacct_match_info *info = par->matchinfo;
+ struct nf_acct *nfacct;
+
+ nfacct = nfnl_acct_find_get(info->name);
+ if (nfacct == NULL) {
+ pr_info("xt_nfacct: accounting object with name `%s' "
+ "does not exists\n", info->name);
+ return -ENOENT;
+ }
+ info->nfacct = nfacct;
+ return 0;
+}
+
+static void
+nfacct_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_nfacct_match_info *info = par->matchinfo;
+
+ nfnl_acct_put(info->nfacct);
+}
+
+static struct xt_match nfacct_mt_reg __read_mostly = {
+ .name = "nfacct",
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfacct_mt_checkentry,
+ .match = nfacct_mt,
+ .destroy = nfacct_mt_destroy,
+ .matchsize = sizeof(struct xt_nfacct_match_info),
+ .me = THIS_MODULE,
+};
+
+static int __init nfacct_mt_init(void)
+{
+ return xt_register_match(&nfacct_mt_reg);
+}
+
+static void __exit nfacct_mt_exit(void)
+{
+ xt_unregister_match(&nfacct_mt_reg);
+}
+
+module_init(nfacct_mt_init);
+module_exit(nfacct_mt_exit);