From 4a15044a2b06748c99a8c8c3c6b3ee0a01f8004d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 25 May 2026 06:54:39 +0300 Subject: ipvs: add conn_max sysctl to limit connections Currently, we are using atomic_t to track the number of connections. On 64-bit setups with large memory there is a risk this counter to overflow. Also, setups with many containers may need to tune the limit for connections. Add sysctl control to limit the number of connections to 1,073,741,824 (64-bit) and 16,777,216 (32-bit). Depending on the admin's privilege, the value is used to change a soft or hard limit allowing unprivileged admins to change the soft limit in range determined by privileged admins. Link: https://sashiko.dev/#/patchset/20260523172715.94795-1-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260430074420.26697-7-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260522105546.13732-1-ja%40ssi.bg Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e517eaaa177b..49297fec448a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -44,6 +44,14 @@ #define IP_VS_CONN_TAB_MAX_BITS 20 #endif +/* conn_max limits */ +#if BITS_PER_LONG > 32 +/* Limit of atomic_t but restricted by roundup_pow_of_two() in ip_vs_core.c */ +#define IP_VS_CONN_MAX (1 << 30) +#else +#define IP_VS_CONN_MAX (1 << 24) +#endif + /* svc_table limits */ #define IP_VS_SVC_TAB_MIN_BITS 4 #define IP_VS_SVC_TAB_MAX_BITS 20 @@ -1220,6 +1228,10 @@ struct netns_ipvs { /* sysctl variables */ int sysctl_amemthresh; int sysctl_am_droprate; +#ifdef CONFIG_SYSCTL + int sysctl_conn_max;/* soft limit for conns */ + int conn_max_limit; /* hard limit for conn_max */ +#endif int sysctl_drop_entry; int sysctl_drop_packet; int sysctl_secure_tcp; @@ -1317,6 +1329,11 @@ struct netns_ipvs { #ifdef CONFIG_SYSCTL +static inline int sysctl_conn_max(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_conn_max); +} + static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) { return ipvs->sysctl_sync_threshold[0]; @@ -1436,6 +1453,11 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs) #else +static inline int sysctl_conn_max(struct netns_ipvs *ipvs) +{ + return IP_VS_CONN_MAX; +} + static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) { return DEFAULT_SYNC_THRESHOLD; -- cgit v1.2.3 From 7d6a9cdb8d3a51d9cfe546a09a518ab3d2671549 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jun 2026 08:21:08 +0200 Subject: netfilter: cttimeout: detach dataplane timeout policy and repurpose refcount Add a refcount for struct nf_ct_timeout which is used by ct extension to set the custom ct timeout policy, this tells us that the ct timeout is being used by a conntrack entry. When the last conntrack entry drops the refcount on the ct timeout, the ct timeout is released. Remove the refcount for control plane which controls if the ruleset refers to the timeout policy. After this update, it is possible to remove the ct timeout policy from nfnetlink_cttimeout immediately. This is for simplicity not to handle two refcounts on a single object. Remove nf_queue_nf_hook_drop(): a packet sitting in nfqueue will just hold a reference to the nf_ct_timeout object until packet is reinjected, since this is part of the ct extension, this will be released by the time the conntrack is freed. nf_ct_untimeout() is still called to clean up in a best effort basis: the ct timeout on existing entries gets removed when the ct timeout goes away, but as long as the iptables ruleset still refers to the ct timeout through a template, new conntracks may keep attaching it and extend its lifetime until the rule is removed. nf_ct_untimeout() is not called anymore from module removal path, this is unlikely to find timeouts give module refcount is bumped, and the new refcount already tracks the ct timeout policy use so it is released when unused. Fixes: 50978462300f ("netfilter: add cttimeout infrastructure for fine timeout tuning") Fixes: 7e0b2b57f01d ("netfilter: nft_ct: add ct timeout support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_timeout.h | 27 ++++++- net/netfilter/nf_conntrack_core.c | 6 +- net/netfilter/nf_conntrack_timeout.c | 27 ++++++- net/netfilter/nfnetlink_cttimeout.c | 112 ++++++++++++--------------- net/netfilter/nft_ct.c | 7 +- net/netfilter/xt_CT.c | 2 +- 6 files changed, 107 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 3a66d4abb6d6..d60aa86be019 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -12,6 +12,7 @@ #define CTNL_TIMEOUT_NAME_MAX 32 struct nf_ct_timeout { + refcount_t refcnt; __u16 l3num; const struct nf_conntrack_l4proto *l4proto; struct rcu_head rcu; @@ -22,6 +23,22 @@ struct nf_conn_timeout { struct nf_ct_timeout __rcu *timeout; }; +static inline void nf_ct_timeout_put(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + struct nf_conn_timeout *timeout_ext; + struct nf_ct_timeout *timeout; + + timeout_ext = nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT); + if (!timeout_ext) + return; + + timeout = rcu_dereference(timeout_ext->timeout); + if (timeout && refcount_dec_and_test(&timeout->refcnt)) + kfree_rcu(timeout, rcu); +#endif +} + static inline unsigned int * nf_ct_timeout_data(const struct nf_conn_timeout *t) { @@ -56,8 +73,14 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; + if (!timeout) + return NULL; + timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp); - if (timeout_ext == NULL) + if (!timeout_ext || timeout_ext->timeout) + return NULL; + + if (!refcount_inc_not_zero(&timeout->refcnt)) return NULL; rcu_assign_pointer(timeout_ext->timeout, timeout); @@ -75,7 +98,7 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) struct nf_conn_timeout *timeout_ext; timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) + if (timeout_ext && rcu_access_pointer(timeout_ext->timeout)) timeouts = nf_ct_timeout_data(timeout_ext); #endif return timeouts; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c072a14a306a..a45b73239369 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1737,16 +1737,18 @@ void nf_conntrack_free(struct nf_conn *ct) */ WARN_ON(refcount_read(&ct->ct_general.use) != 0); + rcu_read_lock(); if (ct->status & IPS_SRC_NAT_DONE) { const struct nf_nat_hook *nat_hook; - rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook) nat_hook->remove_nat_bysrc(ct); - rcu_read_unlock(); } + nf_ct_timeout_put(ct); + rcu_read_unlock(); + kfree(ct->ext); kmem_cache_free(nf_conntrack_cachep, ct); cnet = nf_ct_pernet(net); diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 0cc584d3dbb1..c81becde2afa 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,17 +25,32 @@ const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_hook); +/* nf_ct_iterate_cleanup() holds the conntrack lock. */ static int untimeout(struct nf_conn *ct, void *timeout) { struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) { - const struct nf_ct_timeout *t; + struct nf_ct_timeout *t; - t = rcu_access_pointer(timeout_ext->timeout); + rcu_read_lock(); + t = rcu_dereference(timeout_ext->timeout); + if (!t) { + rcu_read_unlock(); + return 0; + } - if (!timeout || t == timeout) + if (!timeout || t == timeout) { RCU_INIT_POINTER(timeout_ext->timeout, NULL); + + /* No race with nf_conntrack_free() which is called + * only after the conntrack has been removed from + * the hashes. + */ + if (refcount_dec_and_test(&t->refcnt)) + kfree_rcu(t, rcu); + } + rcu_read_unlock(); } /* We are not intended to delete this conntrack. */ @@ -70,6 +85,8 @@ int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, const char *errmsg = NULL; int ret = 0; + WARN_ON_ONCE(!nf_ct_is_template(ct)); + rcu_read_lock(); h = rcu_dereference(nf_ct_timeout_hook); if (!h) { @@ -127,6 +144,8 @@ void nf_ct_destroy_timeout(struct nf_conn *ct) struct nf_conn_timeout *timeout_ext; const struct nf_ct_timeout_hooks *h; + WARN_ON_ONCE(!nf_ct_is_template(ct)); + rcu_read_lock(); h = rcu_dereference(nf_ct_timeout_hook); @@ -139,6 +158,8 @@ void nf_ct_destroy_timeout(struct nf_conn *ct) if (t) h->timeout_put(t); RCU_INIT_POINTER(timeout_ext->timeout, NULL); + if (t && refcount_dec_and_test(&t->refcnt)) + kfree_rcu(t, rcu); } } rcu_read_unlock(); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index dca6826af7de..170d3db860c5 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -37,11 +37,8 @@ struct ctnl_timeout { struct list_head head; struct list_head free_head; struct rcu_head rcu_head; - refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; - - /* must be at the end */ - struct nf_ct_timeout timeout; + struct nf_ct_timeout *timeout; }; struct nfct_timeout_pernet { @@ -132,12 +129,12 @@ static int cttimeout_new_timeout(struct sk_buff *skb, /* You cannot replace one timeout policy by another of * different kind, sorry. */ - if (matching->timeout.l3num != l3num || - matching->timeout.l4proto->l4proto != l4num) + if (matching->timeout->l3num != l3num || + matching->timeout->l4proto->l4proto != l4num) return -EINVAL; - return ctnl_timeout_parse_policy(&matching->timeout.data, - matching->timeout.l4proto, + return ctnl_timeout_parse_policy(&matching->timeout->data, + matching->timeout->l4proto, info->net, cda[CTA_TIMEOUT_DATA]); } @@ -153,26 +150,35 @@ static int cttimeout_new_timeout(struct sk_buff *skb, goto err_proto_put; } - timeout = kzalloc(sizeof(struct ctnl_timeout) + - l4proto->ctnl_timeout.obj_size, GFP_KERNEL); + timeout = kzalloc(sizeof(*timeout), GFP_KERNEL); if (timeout == NULL) { ret = -ENOMEM; goto err_proto_put; } - ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, + timeout->timeout = kzalloc(sizeof(*timeout->timeout) + + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); + if (!timeout->timeout) { + ret = -ENOMEM; + goto err; + } + + ret = ctnl_timeout_parse_policy(&timeout->timeout->data, l4proto, info->net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) - goto err; + goto err_free_timeout_policy; strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); - timeout->timeout.l3num = l3num; - timeout->timeout.l4proto = l4proto; - refcount_set(&timeout->refcnt, 1); + timeout->timeout->l3num = l3num; + timeout->timeout->l4proto = l4proto; + refcount_set(&timeout->timeout->refcnt, 1); __module_get(THIS_MODULE); list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list); return 0; + +err_free_timeout_policy: + kfree(timeout->timeout); err: kfree(timeout); err_proto_put: @@ -185,7 +191,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; - const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; + const struct nf_conntrack_l4proto *l4proto = timeout->timeout->l4proto; struct nlattr *nest_parms; int ret; @@ -197,17 +203,17 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, - htons(timeout->timeout.l3num)) || + htons(timeout->timeout->l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto) || nla_put_be32(skb, CTA_TIMEOUT_USE, - htonl(refcount_read(&timeout->refcnt)))) + htonl(refcount_read(&timeout->timeout->refcnt)))) goto nla_put_failure; nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; - ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data); + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout->data); if (ret < 0) goto nla_put_failure; @@ -307,23 +313,17 @@ static int cttimeout_get_timeout(struct sk_buff *skb, return ret; } -/* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) +static void ctnl_timeout_del(struct net *net, struct ctnl_timeout *timeout) { - int ret = 0; + /* We are protected by nfnl mutex. */ + list_del_rcu(&timeout->head); + nf_ct_untimeout(net, timeout->timeout); - /* We want to avoid races with ctnl_timeout_put. So only when the - * current refcnt is 1, we decrease it to 0. - */ - if (refcount_dec_if_one(&timeout->refcnt)) { - /* We are protected by nfnl mutex. */ - list_del_rcu(&timeout->head); - nf_ct_untimeout(net, &timeout->timeout); - kfree_rcu(timeout, rcu_head); - } else { - ret = -EBUSY; - } - return ret; + if (refcount_dec_and_test(&timeout->timeout->refcnt)) + kfree_rcu(timeout->timeout, rcu); + + kfree_rcu(timeout, rcu_head); + module_put(THIS_MODULE); } static int cttimeout_del_timeout(struct sk_buff *skb, @@ -338,7 +338,7 @@ static int cttimeout_del_timeout(struct sk_buff *skb, if (!cda[CTA_TIMEOUT_NAME]) { list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) - ctnl_timeout_try_del(info->net, cur); + ctnl_timeout_del(info->net, cur); return 0; } @@ -348,10 +348,8 @@ static int cttimeout_del_timeout(struct sk_buff *skb, if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(info->net, cur); - if (ret < 0) - return ret; - + ctnl_timeout_del(info->net, cur); + ret = 0; break; } return ret; @@ -511,24 +509,22 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - if (!refcount_inc_not_zero(&timeout->refcnt)) + if (!refcount_inc_not_zero(&timeout->timeout->refcnt)) goto err; matching = timeout; + __module_get(THIS_MODULE); break; } err: - return matching ? &matching->timeout : NULL; + return matching ? matching->timeout : NULL; } -static void ctnl_timeout_put(struct nf_ct_timeout *t) +static void ctnl_timeout_put(struct nf_ct_timeout *timeout) { - struct ctnl_timeout *timeout = - container_of(t, struct ctnl_timeout, timeout); + if (refcount_dec_and_test(&timeout->refcnt)) + kfree_rcu(timeout, rcu); - if (refcount_dec_and_test(&timeout->refcnt)) { - kfree_rcu(timeout, rcu_head); - module_put(THIS_MODULE); - } + module_put(THIS_MODULE); } static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { @@ -609,8 +605,11 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) { list_del(&cur->free_head); - if (refcount_dec_and_test(&cur->refcnt)) - kfree_rcu(cur, rcu_head); + if (refcount_dec_and_test(&cur->timeout->refcnt)) + kfree_rcu(cur->timeout, rcu); + + kfree_rcu(cur, rcu_head); + module_put(THIS_MODULE); } } @@ -649,24 +648,13 @@ err_out: return ret; } -static int untimeout(struct nf_conn *ct, void *timeout) -{ - struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); - - if (timeout_ext) - RCU_INIT_POINTER(timeout_ext->timeout, NULL); - - return 0; -} - static void __exit cttimeout_exit(void) { nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); RCU_INIT_POINTER(nf_ct_timeout_hook, NULL); - - nf_ct_iterate_destroy(untimeout, NULL); + synchronize_net(); } module_init(cttimeout_init); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 357513c6dcea..801c01c6af95 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -897,8 +897,6 @@ static void nft_ct_timeout_obj_eval(struct nft_object *obj, } } - rcu_assign_pointer(timeout->timeout, priv->timeout); - /* adjust the timeout as per 'new' state. ct is unconfirmed, * so the current timestamp must not be added. */ @@ -949,6 +947,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, timeout->l3num = l3num; timeout->l4proto = l4proto; + refcount_set(&timeout->refcnt, 1); ret = nf_ct_netns_get(ctx->net, ctx->family); if (ret < 0) @@ -969,10 +968,10 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, struct nft_ct_timeout_obj *priv = nft_obj_data(obj); struct nf_ct_timeout *timeout = priv->timeout; - nf_queue_nf_hook_drop(ctx->net); nf_ct_untimeout(ctx->net, timeout); nf_ct_netns_put(ctx->net, ctx->family); - kfree_rcu(priv->timeout, rcu); + if (refcount_dec_and_test(&timeout->refcnt)) + kfree_rcu(priv->timeout, rcu); } static int nft_ct_timeout_obj_dump(struct sk_buff *skb, diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index d2aeacf94230..b94f004d5f5c 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -284,7 +284,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn_help *help; if (ct) { - if (info->helper[0] || info->timeout[0]) + if (info->helper[0]) nf_queue_nf_hook_drop(par->net); help = nfct_help(ct); -- cgit v1.2.3 From 6031487d4e273d7e7c8c7deea5061b7a9aaa9db3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jun 2026 08:21:09 +0200 Subject: netfilter: nf_conntrack_helper: dynamically allocate struct nf_conntrack_helper Adapt all existing helpers to use a modified version of nf_ct_helper_init(), to dynamically allocate struct nf_conntrack_helper. Allocate expect_policy[] built-in into the helper to ensure this area is reachable after helper removal since a follow up patch adds refcount to track use of the nf_conntrack_helper structure from packet path so it remains around until last reference from ct helper extension is dropped. Export __nf_conntrack_helper_register() which allows to register nfnetlink_cthelper dynamically allocated helper. Adapt nfnetlink_cthelper to use the built-in expect_policy[]. This is a preparation patch to add packet path refcounting to helpers. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 16 +++-- net/ipv4/netfilter/nf_nat_snmp_basic_main.c | 27 ++++--- net/netfilter/nf_conntrack_amanda.c | 39 ++++------ net/netfilter/nf_conntrack_ftp.c | 5 +- net/netfilter/nf_conntrack_h323_main.c | 107 +++++++++++----------------- net/netfilter/nf_conntrack_helper.c | 75 +++++++++++++++---- net/netfilter/nf_conntrack_irc.c | 5 +- net/netfilter/nf_conntrack_netbios_ns.c | 20 +++--- net/netfilter/nf_conntrack_pptp.c | 22 +++--- net/netfilter/nf_conntrack_sane.c | 5 +- net/netfilter/nf_conntrack_sip.c | 5 +- net/netfilter/nf_conntrack_snmp.c | 21 +++--- net/netfilter/nf_conntrack_tftp.c | 5 +- net/netfilter/nfnetlink_cthelper.c | 47 ++++++------ 14 files changed, 210 insertions(+), 189 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index de2f956abf34..1956bc12bf56 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -29,13 +29,16 @@ enum nf_ct_helper_flags { #define NF_CT_HELPER_NAME_LEN 16 +/* Must be kept in sync with the classes defined by helpers */ +#define NF_CT_MAX_EXPECT_CLASSES 4 + struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ refcount_t refcnt; struct module *me; /* pointer to self */ - const struct nf_conntrack_expect_policy *expect_policy; + struct nf_conntrack_expect_policy expect_policy[NF_CT_MAX_EXPECT_CLASSES]; /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -63,9 +66,6 @@ struct nf_conntrack_helper { char nat_mod_name[NF_CT_HELPER_NAME_LEN]; }; -/* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 4 - /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ @@ -103,11 +103,13 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, struct nf_conn *ct), struct module *module); -int nf_conntrack_helper_register(struct nf_conntrack_helper *); +int nf_conntrack_helper_register(struct nf_conntrack_helper *, struct nf_conntrack_helper **); +int __nf_conntrack_helper_register(struct nf_conntrack_helper *); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); -int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int); -void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, +int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int, + struct nf_conntrack_helper **); +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper **, unsigned int); struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c index 717b726504fe..0ede138dfd29 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -202,29 +202,34 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp_trap", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; +static struct nf_conntrack_helper snmp_trap_helper __read_mostly; +static struct nf_conntrack_helper *snmp_trap_helper_ptr __read_mostly; static int __init nf_nat_snmp_basic_init(void) { + int err; + BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - return nf_conntrack_helper_register(&snmp_trap_helper); + nf_ct_helper_init(&snmp_trap_helper, AF_INET, IPPROTO_UDP, + "snmp_trap", SNMP_TRAP_PORT, SNMP_TRAP_PORT, SNMP_TRAP_PORT, + &snmp_exp_policy, 0, help, NULL, THIS_MODULE); + + err = nf_conntrack_helper_register(&snmp_trap_helper, &snmp_trap_helper_ptr); + if (err < 0) { + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + return err; + } + + return 0; } static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); synchronize_rcu(); - nf_conntrack_helper_unregister(&snmp_trap_helper); + nf_conntrack_helper_unregister(snmp_trap_helper_ptr); } module_init(nf_nat_snmp_basic_init); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index d2c09e8dd872..ddafbdfc96dc 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -169,35 +169,15 @@ static const struct nf_conntrack_expect_policy amanda_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { - { - .name = HELPER_NAME, - .me = THIS_MODULE, - .help = amanda_help, - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(10080), - .tuple.dst.protonum = IPPROTO_UDP, - .expect_policy = &amanda_exp_policy, - .nat_mod_name = NF_NAT_HELPER_NAME(HELPER_NAME), - }, - { - .name = "amanda", - .me = THIS_MODULE, - .help = amanda_help, - .tuple.src.l3num = AF_INET6, - .tuple.src.u.udp.port = cpu_to_be16(10080), - .tuple.dst.protonum = IPPROTO_UDP, - .expect_policy = &amanda_exp_policy, - .nat_mod_name = NF_NAT_HELPER_NAME(HELPER_NAME), - }, -}; +static struct nf_conntrack_helper amanda_helper[2] __read_mostly; +static struct nf_conntrack_helper *amanda_helper_ptr[2] __read_mostly; static void __exit nf_conntrack_amanda_fini(void) { int i; - nf_conntrack_helpers_unregister(amanda_helper, - ARRAY_SIZE(amanda_helper)); + nf_conntrack_helpers_unregister(amanda_helper_ptr, + ARRAY_SIZE(amanda_helper_ptr)); for (i = 0; i < ARRAY_SIZE(search); i++) textsearch_destroy(search[i].ts); } @@ -217,8 +197,17 @@ static int __init nf_conntrack_amanda_init(void) goto err1; } } + + nf_ct_helper_init(&amanda_helper[0], AF_INET, IPPROTO_UDP, + HELPER_NAME, 10080, 10080, 10080, + &amanda_exp_policy, 0, amanda_help, NULL, THIS_MODULE); + nf_ct_helper_init(&amanda_helper[1], AF_INET6, IPPROTO_UDP, + HELPER_NAME, 10080, 10080, 10080, + &amanda_exp_policy, 0, amanda_help, NULL, THIS_MODULE); + ret = nf_conntrack_helpers_register(amanda_helper, - ARRAY_SIZE(amanda_helper)); + ARRAY_SIZE(amanda_helper), + amanda_helper_ptr); if (ret < 0) goto err1; return 0; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index dc6f0017ca6b..c7777f37371a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -555,6 +555,7 @@ static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct) } static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly; +static struct nf_conntrack_helper *ftp_ptr[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { .max_expected = 1, @@ -563,7 +564,7 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = { static void __exit nf_conntrack_ftp_fini(void) { - nf_conntrack_helpers_unregister(ftp, ports_c * 2); + nf_conntrack_helpers_unregister(ftp_ptr, ports_c * 2); } static int __init nf_conntrack_ftp_init(void) @@ -588,7 +589,7 @@ static int __init nf_conntrack_ftp_init(void) nf_ct_ftp_from_nlattr, THIS_MODULE); } - ret = nf_conntrack_helpers_register(ftp, ports_c * 2); + ret = nf_conntrack_helpers_register(ftp, ports_c * 2, ftp_ptr); if (ret < 0) { pr_err("failed to register helpers\n"); return ret; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index b2fe6554b9cf..ebae9fdab897 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -577,14 +577,8 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { .timeout = 240, }; -static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { - .name = "H.245", - .me = THIS_MODULE, - .tuple.src.l3num = AF_UNSPEC, - .tuple.dst.protonum = IPPROTO_UDP, - .help = h245_help, - .expect_policy = &h245_exp_policy, -}; +static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly; +static struct nf_conntrack_helper *nf_conntrack_helper_h245_ptr __read_mostly; int get_h225_addr(struct nf_conn *ct, unsigned char *data, TransportAddress *taddr, @@ -643,7 +637,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->assign_helper, &nf_conntrack_helper_h245); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_h245_ptr); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -732,6 +726,9 @@ static int callforward_do_filter(struct net *net, } +static struct nf_conntrack_helper nf_conntrack_helper_q931[2] __read_mostly; +static struct nf_conntrack_helper *nf_conntrack_helper_q931_ptr[2] __read_mostly; + static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -767,7 +764,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931_ptr[0]); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1140,27 +1137,6 @@ static const struct nf_conntrack_expect_policy q931_exp_policy = { .timeout = 240, }; -static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { - { - .name = "Q.931", - .me = THIS_MODULE, - .tuple.src.l3num = AF_INET, - .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), - .tuple.dst.protonum = IPPROTO_TCP, - .help = q931_help, - .expect_policy = &q931_exp_policy, - }, - { - .name = "Q.931", - .me = THIS_MODULE, - .tuple.src.l3num = AF_INET6, - .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), - .tuple.dst.protonum = IPPROTO_TCP, - .help = q931_help, - .expect_policy = &q931_exp_policy, - }, -}; - static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, int *datalen) { @@ -1234,7 +1210,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931_ptr[0]); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1275,6 +1251,9 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, return 0; } +static struct nf_conntrack_helper nf_conntrack_helper_ras[2] __read_mostly; +static struct nf_conntrack_helper *nf_conntrack_helper_ras_ptr[2] __read_mostly; + static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1306,7 +1285,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras_ptr[0]); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1502,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931_ptr[0]); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1556,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931_ptr[0]); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1711,59 +1690,57 @@ static const struct nf_conntrack_expect_policy ras_exp_policy = { .timeout = 240, }; -static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { - { - .name = "RAS", - .me = THIS_MODULE, - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), - .tuple.dst.protonum = IPPROTO_UDP, - .help = ras_help, - .expect_policy = &ras_exp_policy, - }, - { - .name = "RAS", - .me = THIS_MODULE, - .tuple.src.l3num = AF_INET6, - .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), - .tuple.dst.protonum = IPPROTO_UDP, - .help = ras_help, - .expect_policy = &ras_exp_policy, - }, -}; - static int __init h323_helper_init(void) { int ret; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); + nf_ct_helper_init(&nf_conntrack_helper_ras[0], AF_INET, IPPROTO_UDP, + "RAS", RAS_PORT, RAS_PORT, RAS_PORT, + &ras_exp_policy, 0, ras_help, NULL, THIS_MODULE); + nf_ct_helper_init(&nf_conntrack_helper_ras[1], AF_INET6, IPPROTO_UDP, + "RAS", RAS_PORT, RAS_PORT, RAS_PORT, + &ras_exp_policy, 0, ras_help, NULL, THIS_MODULE); + nf_ct_helper_init(&nf_conntrack_helper_h245, AF_UNSPEC, IPPROTO_UDP, + "H.245", 0, 0, 0, + &h245_exp_policy, 0, h245_help, NULL, THIS_MODULE); + nf_ct_helper_init(&nf_conntrack_helper_q931[0], AF_INET, IPPROTO_TCP, + "Q.931", Q931_PORT, Q931_PORT, Q931_PORT, + &q931_exp_policy, 0, q931_help, NULL, THIS_MODULE); + nf_ct_helper_init(&nf_conntrack_helper_q931[1], AF_INET6, IPPROTO_TCP, + "Q.931", Q931_PORT, Q931_PORT, Q931_PORT, + &q931_exp_policy, 0, q931_help, NULL, THIS_MODULE); + + ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245, + &nf_conntrack_helper_h245_ptr); if (ret < 0) return ret; ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931, - ARRAY_SIZE(nf_conntrack_helper_q931)); + ARRAY_SIZE(nf_conntrack_helper_q931), + nf_conntrack_helper_q931_ptr); if (ret < 0) goto err1; ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras, - ARRAY_SIZE(nf_conntrack_helper_ras)); + ARRAY_SIZE(nf_conntrack_helper_ras), + nf_conntrack_helper_ras_ptr); if (ret < 0) goto err2; return 0; err2: - nf_conntrack_helpers_unregister(nf_conntrack_helper_q931, - ARRAY_SIZE(nf_conntrack_helper_q931)); + nf_conntrack_helpers_unregister(nf_conntrack_helper_q931_ptr, + ARRAY_SIZE(nf_conntrack_helper_q931_ptr)); err1: - nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); + nf_conntrack_helper_unregister(nf_conntrack_helper_h245_ptr); return ret; } static void __exit h323_helper_exit(void) { - nf_conntrack_helpers_unregister(nf_conntrack_helper_ras, + nf_conntrack_helpers_unregister(nf_conntrack_helper_ras_ptr, ARRAY_SIZE(nf_conntrack_helper_ras)); - nf_conntrack_helpers_unregister(nf_conntrack_helper_q931, + nf_conntrack_helpers_unregister(nf_conntrack_helper_q931_ptr, ARRAY_SIZE(nf_conntrack_helper_q931)); - nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); + nf_conntrack_helper_unregister(nf_conntrack_helper_h245_ptr); } static void __exit nf_conntrack_h323_fini(void) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 17e971bd4c74..ce2d59331dfb 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -347,14 +347,13 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, } EXPORT_SYMBOL_GPL(nf_ct_helper_log); -int nf_conntrack_helper_register(struct nf_conntrack_helper *me) +int __nf_conntrack_helper_register(struct nf_conntrack_helper *me) { struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); struct nf_conntrack_helper *cur; int ret = 0, i; - BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); @@ -394,6 +393,33 @@ out: mutex_unlock(&nf_ct_helper_mutex); return ret; } +EXPORT_SYMBOL_GPL(__nf_conntrack_helper_register); + +int nf_conntrack_helper_register(struct nf_conntrack_helper *me, + struct nf_conntrack_helper **helper_ptr) +{ + struct nf_conntrack_helper *new_helper; + int err; + + new_helper = kzalloc_obj(*new_helper, GFP_KERNEL_ACCOUNT); + if (!new_helper) + return -ENOMEM; + + memcpy(new_helper, me, sizeof(*new_helper)); + *helper_ptr = new_helper; + + err = __nf_conntrack_helper_register(new_helper); + if (err < 0) + goto err_helper; + + return 0; + +err_helper: + *helper_ptr = NULL; + kfree(new_helper); + + return err; +} EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) @@ -430,6 +456,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) * last step, this ensures rcu readers of exp->helper are done. * No need for another synchronize_rcu() here. */ + kfree(me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); @@ -445,11 +472,12 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, struct nf_conn *ct), struct module *module) { + memset(helper, 0, sizeof(*helper)); + helper->tuple.src.l3num = l3num; helper->tuple.dst.protonum = protonum; helper->tuple.src.u.all = htons(spec_port); - helper->expect_policy = exp_pol; - helper->expect_class_max = expect_class_max; + helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; @@ -460,34 +488,57 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, snprintf(helper->name, sizeof(helper->name), "%s", name); else snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); + + if (WARN_ON_ONCE(expect_class_max >= NF_CT_MAX_EXPECT_CLASSES)) + return; + + memcpy(helper->expect_policy, exp_pol, + (expect_class_max + 1) * sizeof(*exp_pol)); + helper->expect_class_max = expect_class_max; } EXPORT_SYMBOL_GPL(nf_ct_helper_init); int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, - unsigned int n) + unsigned int n, struct nf_conntrack_helper **helper_ptr) { + struct nf_conntrack_helper *new_helper; unsigned int i; int err = 0; for (i = 0; i < n; i++) { - err = nf_conntrack_helper_register(&helper[i]); - if (err < 0) + new_helper = kzalloc_obj(*new_helper, GFP_KERNEL_ACCOUNT); + if (!new_helper) { + err = -ENOMEM; goto err; + } + + memcpy(new_helper, &helper[i], sizeof(*new_helper)); + helper_ptr[i] = new_helper; + + err = __nf_conntrack_helper_register(new_helper); + if (err < 0) { + helper_ptr[i] = NULL; + goto err_helper; + } } return err; +err_helper: + kfree(new_helper); err: if (i > 0) - nf_conntrack_helpers_unregister(helper, i); + nf_conntrack_helpers_unregister(helper_ptr, i); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); -void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, - unsigned int n) +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper **helper, + unsigned int n) { - while (n-- > 0) - nf_conntrack_helper_unregister(&helper[n]); + while (n-- > 0) { + nf_conntrack_helper_unregister(helper[n]); + helper[n] = NULL; + } } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 4d539657d4cb..0c117b8492e9 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -255,6 +255,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly; +static struct nf_conntrack_helper *irc_ptr[MAX_PORTS] __read_mostly; static struct nf_conntrack_expect_policy irc_exp_policy; static int __init nf_conntrack_irc_init(void) @@ -289,7 +290,7 @@ static int __init nf_conntrack_irc_init(void) 0, help, NULL, THIS_MODULE); } - ret = nf_conntrack_helpers_register(&irc[0], ports_c); + ret = nf_conntrack_helpers_register(&irc[0], ports_c, irc_ptr); if (ret) { pr_err("failed to register helpers\n"); kfree(irc_buffer); @@ -301,7 +302,7 @@ static int __init nf_conntrack_irc_init(void) static void __exit nf_conntrack_irc_fini(void) { - nf_conntrack_helpers_unregister(irc, ports_c); + nf_conntrack_helpers_unregister(irc_ptr, ports_c); kfree(irc_buffer); } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 55415f011943..89d1cf7d6512 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -44,27 +44,25 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, return nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout); } -static struct nf_conntrack_helper helper __read_mostly = { - .name = HELPER_NAME, - .tuple.src.l3num = NFPROTO_IPV4, - .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), - .tuple.dst.protonum = IPPROTO_UDP, - .me = THIS_MODULE, - .help = netbios_ns_help, - .expect_policy = &exp_policy, -}; +static struct nf_conntrack_helper helper __read_mostly; +static struct nf_conntrack_helper *helper_ptr __read_mostly; static int __init nf_conntrack_netbios_ns_init(void) { NF_CT_HELPER_BUILD_BUG_ON(0); exp_policy.timeout = timeout; - return nf_conntrack_helper_register(&helper); + + nf_ct_helper_init(&helper, AF_INET, IPPROTO_UDP, HELPER_NAME, + NMBD_PORT, NMBD_PORT, NMBD_PORT, + &exp_policy, 0, netbios_ns_help, NULL, THIS_MODULE); + + return nf_conntrack_helper_register(&helper, &helper_ptr); } static void __exit nf_conntrack_netbios_ns_fini(void) { - nf_conntrack_helper_unregister(&helper); + nf_conntrack_helper_unregister(helper_ptr); } module_init(nf_conntrack_netbios_ns_init); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index dc23e4181618..edc85a3eef1e 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -582,27 +582,25 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = { }; /* control protocol helper */ -static struct nf_conntrack_helper pptp __read_mostly = { - .name = "pptp", - .me = THIS_MODULE, - .tuple.src.l3num = AF_INET, - .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), - .tuple.dst.protonum = IPPROTO_TCP, - .help = conntrack_pptp_help, - .destroy = pptp_destroy_siblings, - .expect_policy = &pptp_exp_policy, -}; +static struct nf_conntrack_helper pptp __read_mostly; +static struct nf_conntrack_helper *pptp_ptr __read_mostly; static int __init nf_conntrack_pptp_init(void) { NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_pptp_master)); - return nf_conntrack_helper_register(&pptp); + nf_ct_helper_init(&pptp, AF_INET, IPPROTO_TCP, + "pptp", PPTP_CONTROL_PORT, PPTP_CONTROL_PORT, PPTP_CONTROL_PORT, + &pptp_exp_policy, 0, conntrack_pptp_help, NULL, THIS_MODULE); + + pptp.destroy = pptp_destroy_siblings; + + return nf_conntrack_helper_register(&pptp, &pptp_ptr); } static void __exit nf_conntrack_pptp_fini(void) { - nf_conntrack_helper_unregister(&pptp); + nf_conntrack_helper_unregister(pptp_ptr); } module_init(nf_conntrack_pptp_init); diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 13dc421fc4f5..a7f7b07ba0c2 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -167,6 +167,7 @@ static int help(struct sk_buff *skb, } static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly; +static struct nf_conntrack_helper *sane_ptr[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy sane_exp_policy = { .max_expected = 1, @@ -175,7 +176,7 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = { static void __exit nf_conntrack_sane_fini(void) { - nf_conntrack_helpers_unregister(sane, ports_c * 2); + nf_conntrack_helpers_unregister(sane_ptr, ports_c * 2); } static int __init nf_conntrack_sane_init(void) @@ -200,7 +201,7 @@ static int __init nf_conntrack_sane_init(void) THIS_MODULE); } - ret = nf_conntrack_helpers_register(sane, ports_c * 2); + ret = nf_conntrack_helpers_register(sane, ports_c * 2, sane_ptr); if (ret < 0) { pr_err("failed to register helpers\n"); return ret; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index e69941f1a101..2c78a3e1dab5 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1731,6 +1731,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly; +static struct nf_conntrack_helper *sip_ptr[MAX_PORTS * 4] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1757,7 +1758,7 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 static void __exit nf_conntrack_sip_fini(void) { - nf_conntrack_helpers_unregister(sip, ports_c * 4); + nf_conntrack_helpers_unregister(sip_ptr, ports_c * 4); } static int __init nf_conntrack_sip_init(void) @@ -1788,7 +1789,7 @@ static int __init nf_conntrack_sip_init(void) NULL, THIS_MODULE); } - ret = nf_conntrack_helpers_register(sip, ports_c * 4); + ret = nf_conntrack_helpers_register(sip, ports_c * 4, sip_ptr); if (ret < 0) { pr_err("failed to register helpers\n"); return ret; diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 7b7eed43c54f..b6fce5703fce 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -47,25 +47,24 @@ static struct nf_conntrack_expect_policy exp_policy = { .max_expected = 1, }; -static struct nf_conntrack_helper helper __read_mostly = { - .name = "snmp", - .tuple.src.l3num = NFPROTO_IPV4, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, - .me = THIS_MODULE, - .help = snmp_conntrack_help, - .expect_policy = &exp_policy, -}; +static struct nf_conntrack_helper helper __read_mostly; +static struct nf_conntrack_helper *helper_ptr __read_mostly; static int __init nf_conntrack_snmp_init(void) { exp_policy.timeout = timeout; - return nf_conntrack_helper_register(&helper); + + nf_ct_helper_init(&helper, AF_INET, IPPROTO_UDP, + "snmp", SNMP_PORT, SNMP_PORT, SNMP_PORT, + &exp_policy, 0, snmp_conntrack_help, NULL, + THIS_MODULE); + + return nf_conntrack_helper_register(&helper, &helper_ptr); } static void __exit nf_conntrack_snmp_fini(void) { - nf_conntrack_helper_unregister(&helper); + nf_conntrack_helper_unregister(helper_ptr); } module_init(nf_conntrack_snmp_init); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index a2e6833a0bf7..4393c435aa35 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -96,6 +96,7 @@ static int tftp_help(struct sk_buff *skb, } static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly; +static struct nf_conntrack_helper *tftp_ptr[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy tftp_exp_policy = { .max_expected = 1, @@ -104,7 +105,7 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = { static void __exit nf_conntrack_tftp_fini(void) { - nf_conntrack_helpers_unregister(tftp, ports_c * 2); + nf_conntrack_helpers_unregister(tftp_ptr, ports_c * 2); } static int __init nf_conntrack_tftp_init(void) @@ -127,7 +128,7 @@ static int __init nf_conntrack_tftp_init(void) THIS_MODULE); } - ret = nf_conntrack_helpers_register(tftp, ports_c * 2); + ret = nf_conntrack_helpers_register(tftp, ports_c * 2, tftp_ptr); if (ret < 0) { pr_err("failed to register helpers\n"); return ret; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 267eac1167f3..338515697c91 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,7 +32,7 @@ MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); struct nfnl_cthelper { struct list_head list; - struct nf_conntrack_helper helper; + struct nf_conntrack_helper *helper; }; static LIST_HEAD(nfnl_cthelper_list); @@ -176,7 +176,6 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, const struct nlattr *attr) { int i, ret; - struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; unsigned int class_max; @@ -195,26 +194,19 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; - expect_policy = kzalloc_objs(struct nf_conntrack_expect_policy, - class_max); - if (expect_policy == NULL) - return -ENOMEM; - for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; - ret = nfnl_cthelper_expect_policy(&expect_policy[i], + ret = nfnl_cthelper_expect_policy(&helper->expect_policy[i], tb[NFCTH_POLICY_SET+i]); if (ret < 0) goto err; } helper->expect_class_max = class_max - 1; - helper->expect_policy = expect_policy; return 0; err: - kfree(expect_policy); return -EINVAL; } @@ -230,21 +222,28 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - nfcth = kzalloc_obj(*nfcth); + nfcth = kzalloc_obj(*nfcth, GFP_KERNEL_ACCOUNT); if (nfcth == NULL) return -ENOMEM; - helper = &nfcth->helper; + + helper = kzalloc_obj(*helper, GFP_KERNEL_ACCOUNT); + if (!helper) { + ret = -ENOMEM; + goto err_cth; + } + + nfcth->helper = helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err1; + goto err_helper; nla_strscpy(helper->name, tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > sizeof_field(struct nf_conn_help, data)) { ret = -ENOMEM; - goto err2; + goto err_helper; } helper->data_len = size; @@ -273,15 +272,15 @@ nfnl_cthelper_create(const struct nlattr * const tb[], } } - ret = nf_conntrack_helper_register(helper); + ret = __nf_conntrack_helper_register(helper); if (ret < 0) - goto err2; + goto err_helper; list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; -err2: - kfree(helper->expect_policy); -err1: +err_helper: + kfree(helper); +err_cth: kfree(nfcth); return ret; } @@ -439,7 +438,7 @@ static int nfnl_cthelper_new(struct sk_buff *skb, const struct nfnl_info *info, return ret; list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { - cur = &nlcth->helper; + cur = nlcth->helper; if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; @@ -650,7 +649,7 @@ static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info, } list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { - cur = &nlcth->helper; + cur = nlcth->helper; if (helper_name && strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; @@ -708,7 +707,7 @@ static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info, ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { - cur = &nlcth->helper; + cur = nlcth->helper; j++; if (helper_name && @@ -723,7 +722,6 @@ static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info, if (refcount_dec_if_one(&cur->refcnt)) { found = true; nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); list_del(&nlcth->list); kfree(nlcth); @@ -796,10 +794,9 @@ static void __exit nfnl_cthelper_exit(void) nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { - cur = &nlcth->helper; + cur = nlcth->helper; nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); kfree(nlcth); } } -- cgit v1.2.3 From fe97fd540a03034a780224f24b0b2f0e21c9c763 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jun 2026 08:21:10 +0200 Subject: netfilter: nf_conntrack_pptp: move GRE specific cleanup to GRE tracker Move the GRE specific cleanup to nf_conntrack_proto_gre.c to ensure that the .destroy callback for the pptp helper is still reachable by existing conntrack entries while pptp module is being removed. This is a preparation patch, no functional changes are intended. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 4 ++ net/netfilter/nf_conntrack_pptp.c | 63 +------------------------- net/netfilter/nf_conntrack_proto_gre.c | 61 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index b39417ad955e..0b07d5e69c15 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -20,4 +20,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK_PPTP) +void gre_pptp_destroy_siblings(struct nf_conn *ct); +#endif + #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index edc85a3eef1e..ed567a1cf7fd 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -124,65 +124,6 @@ static void pptp_expectfn(struct nf_conn *ct, } } -static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, - const struct nf_conntrack_tuple *t) -{ - const struct nf_conntrack_tuple_hash *h; - const struct nf_conntrack_zone *zone; - struct nf_conntrack_expect *exp; - struct nf_conn *sibling; - - pr_debug("trying to timeout ct or exp for tuple "); - nf_ct_dump_tuple(t); - - zone = nf_ct_zone(ct); - h = nf_conntrack_find_get(net, zone, t); - if (h) { - sibling = nf_ct_tuplehash_to_ctrack(h); - pr_debug("setting timeout of conntrack %p to 0\n", sibling); - sibling->proto.gre.timeout = 0; - sibling->proto.gre.stream_timeout = 0; - nf_ct_kill(sibling); - nf_ct_put(sibling); - return 1; - } else { - exp = nf_ct_expect_find_get(net, zone, t); - if (exp) { - pr_debug("unexpect_related of expect %p\n", exp); - nf_ct_unexpect_related(exp); - nf_ct_expect_put(exp); - return 1; - } - } - return 0; -} - -/* timeout GRE data connections */ -static void pptp_destroy_siblings(struct nf_conn *ct) -{ - struct net *net = nf_ct_net(ct); - const struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); - struct nf_conntrack_tuple t; - - nf_ct_gre_keymap_destroy(ct); - - /* try original (pns->pac) tuple */ - memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); - t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = ct_pptp_info->pns_call_id; - t.dst.u.gre.key = ct_pptp_info->pac_call_id; - if (!destroy_sibling_or_exp(net, ct, &t)) - pr_debug("failed to timeout original pns->pac ct/exp\n"); - - /* try reply (pac->pns) tuple */ - memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); - t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = ct_pptp_info->pac_call_id; - t.dst.u.gre.key = ct_pptp_info->pns_call_id; - if (!destroy_sibling_or_exp(net, ct, &t)) - pr_debug("failed to timeout reply pac->pns ct/exp\n"); -} - /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) { @@ -343,7 +284,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); + gre_pptp_destroy_siblings(ct); break; case PPTP_WAN_ERROR_NOTIFY: @@ -593,7 +534,7 @@ static int __init nf_conntrack_pptp_init(void) "pptp", PPTP_CONTROL_PORT, PPTP_CONTROL_PORT, PPTP_CONTROL_PORT, &pptp_exp_policy, 0, conntrack_pptp_help, NULL, THIS_MODULE); - pptp.destroy = pptp_destroy_siblings; + pptp.destroy = gre_pptp_destroy_siblings; return nf_conntrack_helper_register(&pptp, &pptp_ptr); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 35e22082d65a..473658259f1a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -349,6 +349,67 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ +#if IS_ENABLED(CONFIG_NF_CONNTRACK_PPTP) +static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, + const struct nf_conntrack_tuple *t) +{ + const struct nf_conntrack_tuple_hash *h; + const struct nf_conntrack_zone *zone; + struct nf_conntrack_expect *exp; + struct nf_conn *sibling; + + pr_debug("trying to timeout ct or exp for tuple "); + nf_ct_dump_tuple(t); + + zone = nf_ct_zone(ct); + h = nf_conntrack_find_get(net, zone, t); + if (h) { + sibling = nf_ct_tuplehash_to_ctrack(h); + pr_debug("setting timeout of conntrack %p to 0\n", sibling); + sibling->proto.gre.timeout = 0; + sibling->proto.gre.stream_timeout = 0; + nf_ct_kill(sibling); + nf_ct_put(sibling); + return 1; + } else { + exp = nf_ct_expect_find_get(net, zone, t); + if (exp) { + pr_debug("unexpect_related of expect %p\n", exp); + nf_ct_unexpect_related(exp); + nf_ct_expect_put(exp); + return 1; + } + } + return 0; +} + +void gre_pptp_destroy_siblings(struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + const struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); + struct nf_conntrack_tuple t; + + nf_ct_gre_keymap_destroy(ct); + + /* try original (pns->pac) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.u.gre.key = ct_pptp_info->pac_call_id; + if (!destroy_sibling_or_exp(net, ct, &t)) + pr_debug("failed to timeout original pns->pac ct/exp\n"); + + /* try reply (pac->pns) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; + if (!destroy_sibling_or_exp(net, ct, &t)) + pr_debug("failed to timeout reply pac->pns ct/exp\n"); +} +EXPORT_SYMBOL_GPL(gre_pptp_destroy_siblings); +#endif + void nf_conntrack_gre_init_net(struct net *net) { struct nf_gre_net *net_gre = gre_pernet(net); -- cgit v1.2.3 From ac46f3f35b6e68fb062ae7cf780d516c0cf4c00a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jun 2026 08:21:11 +0200 Subject: netfilter: nf_conntrack_helper: add refcounting from datapath This patch adds a new ->ct_refcnt field to struct nf_conntrack_helper which is bumped when the helper is used by the ct helper extension. Drop this reference count when the conntrack entry is released. This is a packet path refcount which ensures that struct nf_conntrack_helper remains in place for tricky scenarios where a packet sits in nfqueue, or elsewhere, with a conntrack that refers to this helper. For simplicity, this leaves a single refcount for helper objects in place, remove the existing refcount for control plane that ensures that the helper does not go away if it is used by ruleset. On helper removal, the help callback is set to NULL to disable it from packet path and, after rcu grace period, existing expectations are removed. Update ctnetlink to disable access to .to_nlattr and .from_nlattr if the helper is going away. Remove nf_queue_nf_hook_drop() since it has proven not to be effective because packets with unconfirmed conntracks which are still flying to sit in nfqueue. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 26 ++++++++++++--- net/netfilter/nf_conntrack_core.c | 3 +- net/netfilter/nf_conntrack_helper.c | 52 +++++++++++++---------------- net/netfilter/nf_conntrack_netlink.c | 28 ++++++++++------ net/netfilter/nf_conntrack_ovs.c | 9 ++++- net/netfilter/nf_conntrack_proto.c | 15 ++++++--- net/netfilter/nfnetlink_cthelper.c | 14 +++----- net/netfilter/nft_ct.c | 3 +- net/netfilter/xt_CT.c | 3 -- 9 files changed, 89 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1956bc12bf56..ed93a5a1adc8 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -35,20 +35,22 @@ enum nf_ct_helper_flags { struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ + struct rcu_head rcu; + char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ - refcount_t refcnt; struct module *me; /* pointer to self */ struct nf_conntrack_expect_policy expect_policy[NF_CT_MAX_EXPECT_CLASSES]; + refcount_t ct_refcnt; + /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; /* Function to call when data passes; return verdict, or -1 to invalidate. */ - int (*help)(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info conntrackinfo); + int __rcu (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info conntrackinfo); void (*destroy)(struct nf_conn *ct); @@ -138,6 +140,20 @@ static inline void *nfct_help_data(const struct nf_conn *ct) return (void *)help->data; } +static inline void nf_ct_help_put(const struct nf_conn *ct) +{ + struct nf_conntrack_helper *helper; + struct nf_conn_help *help; + + help = nfct_help(ct); + if (!help) + return; + + helper = rcu_dereference(help->helper); + if (helper && refcount_dec_and_test(&helper->ct_refcnt)) + kfree_rcu(helper, rcu); +} + int nf_conntrack_helper_init(void); void nf_conntrack_helper_fini(void); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a45b73239369..7c135fe3dd03 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1746,6 +1746,7 @@ void nf_conntrack_free(struct nf_conn *ct) nat_hook->remove_nat_bysrc(ct); } + nf_ct_help_put(ct); nf_ct_timeout_put(ct); rcu_read_unlock(); @@ -1829,7 +1830,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, assign_helper = rcu_dereference(exp->assign_helper); if (assign_helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help) + if (help && refcount_inc_not_zero(&assign_helper->ct_refcnt)) rcu_assign_pointer(help->helper, assign_helper); } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ce2d59331dfb..83dfdb06bfdd 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -92,7 +92,7 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) #endif if (h != NULL && !try_module_get(h->me)) h = NULL; - if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { + if (h != NULL && !refcount_inc_not_zero(&h->ct_refcnt)) { module_put(h->me); h = NULL; } @@ -105,8 +105,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { - refcount_dec(&helper->refcnt); module_put(helper->me); + if (refcount_dec_and_test(&helper->ct_refcnt)) + kfree_rcu(helper, rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); @@ -210,8 +211,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, help = nfct_help(ct); if (helper == NULL) { - if (help) + if (help) { + struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); + RCU_INIT_POINTER(help->helper, NULL); + if (tmp && refcount_dec_and_test(&tmp->ct_refcnt)) + kfree_rcu(tmp, rcu); + } return 0; } @@ -225,32 +231,23 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, */ struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); - if (tmp && tmp->help != helper->help) { - RCU_INIT_POINTER(help->helper, NULL); + if (tmp) { + if (tmp->help != helper->help) { + RCU_INIT_POINTER(help->helper, NULL); + if (refcount_dec_and_test(&tmp->ct_refcnt)) + kfree_rcu(tmp, rcu); + } return 0; } } - rcu_assign_pointer(help->helper, helper); + if (refcount_inc_not_zero(&helper->ct_refcnt)) + rcu_assign_pointer(help->helper, helper); return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); -/* appropriate ct lock protecting must be taken by caller */ -static int unhelp(struct nf_conn *ct, void *me) -{ - struct nf_conn_help *help = nfct_help(ct); - - if (help && rcu_dereference_raw(help->helper) == me) { - nf_conntrack_event(IPCT_HELPER, ct); - RCU_INIT_POINTER(help->helper, NULL); - } - - /* We are not intended to delete this conntrack. */ - return 0; -} - void nf_ct_helper_destroy(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); @@ -386,7 +383,7 @@ int __nf_conntrack_helper_register(struct nf_conntrack_helper *me) } } } - refcount_set(&me->refcnt, 1); + refcount_set(&me->ct_refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: @@ -444,19 +441,18 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_helper_count--; mutex_unlock(&nf_ct_helper_mutex); + /* This helper is going away, disable it. */ + rcu_assign_pointer(me->help, NULL); + /* Make sure every nothing is still using the helper unless its a * connection in the hash. */ synchronize_rcu(); nf_ct_expect_iterate_destroy(expect_iter_me, me); - nf_ct_iterate_destroy(unhelp, me); - /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as - * last step, this ensures rcu readers of exp->helper are done. - * No need for another synchronize_rcu() here. - */ - kfree(me); + if (refcount_dec_and_test(&me->ct_refcnt)) + kfree_rcu(me, rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); @@ -478,7 +474,7 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, helper->tuple.dst.protonum = protonum; helper->tuple.src.u.all = htons(spec_port); - helper->help = help; + rcu_assign_pointer(helper->help, help); helper->from_nlattr = from_nlattr; helper->me = module; snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d429f9c9546c..b429e648f06c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -240,7 +240,8 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, if (nla_put_string(skb, CTA_HELP_NAME, helper->name)) goto nla_put_failure; - if (helper->to_nlattr) + if (rcu_access_pointer(helper->help) && + helper->to_nlattr) helper->to_nlattr(skb, ct); nla_nest_end(skb, nest_helper); @@ -1935,6 +1936,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct, if (err < 0) return err; + rcu_read_lock(); /* don't change helper of sibling connections */ if (ct->master) { /* If we try to change the helper to the same thing twice, @@ -1943,27 +1945,27 @@ static int ctnetlink_change_helper(struct nf_conn *ct, */ err = -EBUSY; if (help) { - rcu_read_lock(); helper = rcu_dereference(help->helper); if (helper && !strcmp(helper->name, helpname)) err = 0; - rcu_read_unlock(); } - + rcu_read_unlock(); return err; } - if (!strcmp(helpname, "")) { - if (help && help->helper) { + if (!strcmp(helpname, "") && help) { + helper = rcu_dereference(help->helper); + if (helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); RCU_INIT_POINTER(help->helper, NULL); + if (refcount_dec_and_test(&helper->ct_refcnt)) + kfree_rcu(helper, rcu); } - + rcu_read_unlock(); return 0; } - rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); if (helper == NULL) { @@ -1974,7 +1976,8 @@ static int ctnetlink_change_helper(struct nf_conn *ct, if (help) { if (rcu_access_pointer(help->helper) == helper) { /* update private helper data if allowed. */ - if (helper->from_nlattr) + if (rcu_access_pointer(helper->help) && + helper->from_nlattr) helper->from_nlattr(helpinfo, ct); err = 0; } else @@ -2289,11 +2292,16 @@ ctnetlink_create_conntrack(struct net *net, goto err2; } /* set private helper data if allowed. */ - if (helper->from_nlattr) + if (rcu_access_pointer(helper->help) && + helper->from_nlattr) helper->from_nlattr(helpinfo, ct); /* disable helper auto-assignment for this entry */ ct->status |= IPS_HELPER; + if (!refcount_inc_not_zero(&helper->ct_refcnt)) { + err = -ENOENT; + goto err2; + } RCU_INIT_POINTER(help->helper, helper); } } diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c index a6988eeb1579..49d1511e9921 100644 --- a/net/netfilter/nf_conntrack_ovs.c +++ b/net/netfilter/nf_conntrack_ovs.c @@ -12,6 +12,9 @@ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, u16 proto) { + int (*helper_cb)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info conntrackinfo); const struct nf_conntrack_helper *helper; const struct nf_conn_help *help; unsigned int protoff; @@ -60,7 +63,11 @@ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, if (helper->tuple.dst.protonum != proto) return NF_ACCEPT; - err = helper->help(skb, protoff, ct, ctinfo); + helper_cb = rcu_dereference(helper->help); + if (!helper_cb) + return NF_ACCEPT; + + err = helper_cb(skb, protoff, ct, ctinfo); if (err != NF_ACCEPT) return err; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 50ddd3d613e1..ad96896516b6 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -129,6 +129,9 @@ unsigned int nf_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + int (*helper_cb)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info conntrackinfo); const struct nf_conn_help *help; enum ip_conntrack_info ctinfo; unsigned int protoff; @@ -175,11 +178,13 @@ unsigned int nf_confirm(void *priv, /* rcu_read_lock()ed by nf_hook */ helper = rcu_dereference(help->helper); if (helper) { - ret = helper->help(skb, - protoff, - ct, ctinfo); - if (ret != NF_ACCEPT) - return ret; + helper_cb = rcu_dereference(helper->help); + if (helper_cb) { + ret = helper_cb(skb, protoff, + ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } } } diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 338515697c91..033ea90c4401 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -719,15 +719,11 @@ static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info, tuple.dst.protonum != cur->tuple.dst.protonum)) continue; - if (refcount_dec_if_one(&cur->refcnt)) { - found = true; - nf_conntrack_helper_unregister(cur); - - list_del(&nlcth->list); - kfree(nlcth); - } else { - ret = -EBUSY; - } + found = true; + nf_conntrack_helper_unregister(cur); + + list_del(&nlcth->list); + kfree(nlcth); } /* Make sure we return success if we flush and there is no helpers */ diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 801c01c6af95..9fe179d688da 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1101,7 +1101,6 @@ static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx, { struct nft_ct_helper_obj *priv = nft_obj_data(obj); - nf_queue_nf_hook_drop(ctx->net); if (priv->helper4) nf_conntrack_helper_put(priv->helper4); if (priv->helper6) @@ -1144,7 +1143,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj, return; help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help) { + if (help && refcount_inc_not_zero(&to_assign->ct_refcnt)) { rcu_assign_pointer(help->helper, to_assign); set_bit(IPS_HELPER_BIT, &ct->status); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index b94f004d5f5c..e78660dfdf4b 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -284,9 +284,6 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn_help *help; if (ct) { - if (info->helper[0]) - nf_queue_nf_hook_drop(par->net); - help = nfct_help(ct); xt_ct_put_helper(help); -- cgit v1.2.3 From 35e21a4dccc5c255ba59ccfbfeb4629ed21da972 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jun 2026 08:21:12 +0200 Subject: netfilter: conntrack: revert ct extension genid infrastructure This infrastructure is not used anymore after moving ct timeout and helper to use datapath refcount to track object use. Revert commit c56716c69ce1 ("netfilter: extensions: introduce extension genid count") this patch disables all ct extensions (leading to NULL) for unconfirmed conntracks, when this is only targeted at ct helper and ct timeout. There is also codebase that dereferences the ct extension without checking for NULL which could lead to crash. Fixes: c56716c69ce1 ("netfilter: extensions: introduce extension genid count") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 12 ------ net/netfilter/nf_conntrack_core.c | 61 +---------------------------- net/netfilter/nf_conntrack_extend.c | 32 +-------------- 3 files changed, 2 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 0b247248b032..fd5c4dbf72ca 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -38,7 +38,6 @@ enum nf_ct_ext_id { struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; - unsigned int gen_id; char data[] __aligned(8); }; @@ -52,8 +51,6 @@ static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); } -void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id); - static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) { struct nf_ct_ext *ext = ct->ext; @@ -61,19 +58,10 @@ static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) if (!ext || !__nf_ct_ext_exist(ext, id)) return NULL; - if (unlikely(ext->gen_id)) - return __nf_ct_ext_find(ext, id); - return (void *)ct->ext + ct->ext->offset[id]; } /* Add this type, returns pointer to data or NULL. */ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); -/* ext genid. if ext->id != ext_genid, extensions cannot be used - * anymore unless conntrack has CONFIRMED bit set. - */ -extern atomic_t nf_conntrack_ext_genid; -void nf_ct_ext_bump_genid(void); - #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c135fe3dd03..91255fd3b35d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -840,33 +840,6 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, &nf_conntrack_hash[reply_hash]); } -static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext) -{ - /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions - * may contain stale pointers to e.g. helper that has been removed. - * - * The helper can't clear this because the nf_conn object isn't in - * any hash and synchronize_rcu() isn't enough because associated skb - * might sit in a queue. - */ - return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid); -} - -static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext) -{ - if (!ext) - return true; - - if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid)) - return false; - - /* inserted into conntrack table, nf_ct_iterate_cleanup() - * will find it. Disable nf_ct_ext_find() id check. - */ - WRITE_ONCE(ext->gen_id, 0); - return true; -} - int nf_conntrack_hash_check_insert(struct nf_conn *ct) { @@ -882,9 +855,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone = nf_ct_zone(ct); - if (!nf_ct_ext_valid_pre(ct->ext)) - return -EAGAIN; - local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); @@ -918,18 +888,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) goto chaintoolong; } - /* If genid has changed, we can't insert anymore because ct - * extensions could have stale pointers and nf_ct_iterate_destroy - * might have completed its table scan already. - * - * Increment of the ext genid right after this check is fine: - * nf_ct_iterate_destroy blocks until locks are released. - */ - if (!nf_ct_ext_valid_post(ct->ext)) { - err = -EAGAIN; - goto out; - } - smp_wmb(); /* The caller holds a reference to this object */ refcount_set(&ct->ct_general.use, 2); @@ -1257,11 +1215,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_DROP; } - if (!nf_ct_ext_valid_pre(ct->ext)) { - NF_CT_STAT_INC(net, insert_failed); - goto dying; - } - /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from * user context, else we insert an already 'dead' hash, blocking @@ -1324,16 +1277,6 @@ chaintoolong: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); - /* ext area is still valid (rcu read lock is held, - * but will go out of scope soon, we need to remove - * this conntrack again. - */ - if (!nf_ct_ext_valid_post(ct->ext)) { - nf_ct_kill(ct); - NF_CT_STAT_INC_ATOMIC(net, drop); - return NF_DROP; - } - help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); @@ -2441,13 +2384,11 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) */ synchronize_net(); - nf_ct_ext_bump_genid(); iter_data.data = data; nf_ct_iterate_cleanup(iter, &iter_data); /* Another cpu might be in a rcu read section with - * rcu protected pointer cleared in iter callback - * or hidden via nf_ct_ext_bump_genid() above. + * rcu protected pointer cleared in iter callback. * * Wait until those are done. */ diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index dd62cc12e775..0da105e1ded9 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -27,8 +27,6 @@ #define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */ -atomic_t nf_conntrack_ext_genid __read_mostly = ATOMIC_INIT(1); - static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = { [NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help), #if IS_ENABLED(CONFIG_NF_NAT) @@ -118,10 +116,8 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) if (!new) return NULL; - if (!ct->ext) { + if (!ct->ext) memset(new->offset, 0, sizeof(new->offset)); - new->gen_id = atomic_read(&nf_conntrack_ext_genid); - } new->offset[id] = newoff; new->len = newlen; @@ -131,29 +127,3 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) return (void *)new + newoff; } EXPORT_SYMBOL(nf_ct_ext_add); - -/* Use nf_ct_ext_find wrapper. This is only useful for unconfirmed entries. */ -void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id) -{ - unsigned int gen_id = atomic_read(&nf_conntrack_ext_genid); - unsigned int this_id = READ_ONCE(ext->gen_id); - - if (!__nf_ct_ext_exist(ext, id)) - return NULL; - - if (this_id == 0 || ext->gen_id == gen_id) - return (void *)ext + ext->offset[id]; - - return NULL; -} -EXPORT_SYMBOL(__nf_ct_ext_find); - -void nf_ct_ext_bump_genid(void) -{ - unsigned int value = atomic_inc_return(&nf_conntrack_ext_genid); - - if (value == UINT_MAX) - atomic_set(&nf_conntrack_ext_genid, 1); - - msleep(HZ); -} -- cgit v1.2.3