From 478374a3c15f369e57fdd79d64d7a1d2eb307e16 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Aug 2021 17:16:22 +0200 Subject: netfilter: ecache: remove one indent level nf_conntrack_eventmask_report and nf_ct_deliver_cached_events shared most of their code. This unifies the layout by changing if (nf_ct_is_confirmed(ct)) { foo } to if (!nf_ct_is_confirmed(ct))) return foo This removes one level of indentation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index d00ba6048e44..3734bacf9763 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -73,7 +73,7 @@ struct nf_ct_event { }; struct nf_ct_event_notifier { - int (*fcn)(unsigned int events, struct nf_ct_event *item); + int (*fcn)(unsigned int events, const struct nf_ct_event *item); }; int nf_conntrack_register_notifier(struct net *net, -- cgit v1.2.3 From b86c0e6429dac2458694495aeebf15f4fe6b269d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Aug 2021 17:16:25 +0200 Subject: netfilter: ecache: prepare for event notifier merge This prepares for merge for ct and exp notifier structs. The 'fcn' member is renamed to something unique. Second, the register/unregister api is simplified. There is only one implementation so there is no need to do any error checking. Replace the EBUSY logic with WARN_ON_ONCE. This allows to remove error unwinding. The exp notifier register/unregister function is removed in a followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 11 +++++------ net/netfilter/nf_conntrack_ecache.c | 26 ++++++-------------------- net/netfilter/nf_conntrack_netlink.c | 22 ++++++---------------- 3 files changed, 17 insertions(+), 42 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 3734bacf9763..061a93a03b82 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -73,13 +73,12 @@ struct nf_ct_event { }; struct nf_ct_event_notifier { - int (*fcn)(unsigned int events, const struct nf_ct_event *item); + int (*ct_event)(unsigned int events, const struct nf_ct_event *item); }; -int nf_conntrack_register_notifier(struct net *net, - struct nf_ct_event_notifier *nb); -void nf_conntrack_unregister_notifier(struct net *net, - struct nf_ct_event_notifier *nb); +void nf_conntrack_register_notifier(struct net *net, + const struct nf_ct_event_notifier *nb); +void nf_conntrack_unregister_notifier(struct net *net); void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, @@ -159,7 +158,7 @@ struct nf_exp_event { }; struct nf_exp_event_notifier { - int (*fcn)(unsigned int events, struct nf_exp_event *item); + int (*exp_event)(unsigned int events, struct nf_exp_event *item); }; int nf_ct_expect_register_notifier(struct net *net, diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index fbe04e16280a..d92f78e4bc7c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -151,7 +151,7 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, return 0; } - ret = notify->fcn(events | missed, item); + ret = notify->ct_event(events | missed, item); rcu_read_unlock(); if (likely(ret >= 0 && missed == 0)) @@ -258,43 +258,29 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, .portid = portid, .report = report }; - notify->fcn(1 << event, &item); + notify->exp_event(1 << event, &item); } out_unlock: rcu_read_unlock(); } -int nf_conntrack_register_notifier(struct net *net, - struct nf_ct_event_notifier *new) +void nf_conntrack_register_notifier(struct net *net, + const struct nf_ct_event_notifier *new) { - int ret; struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); - if (notify != NULL) { - ret = -EBUSY; - goto out_unlock; - } + WARN_ON_ONCE(notify); rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); - ret = 0; - -out_unlock: mutex_unlock(&nf_ct_ecache_mutex); - return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -void nf_conntrack_unregister_notifier(struct net *net, - struct nf_ct_event_notifier *new) +void nf_conntrack_unregister_notifier(struct net *net) { - struct nf_ct_event_notifier *notify; - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); /* synchronize_rcu() is called from ctnetlink_exit. */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 43b891a902de..6d6f7cd70753 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3755,11 +3755,11 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { - .fcn = ctnetlink_conntrack_event, + .ct_event = ctnetlink_conntrack_event, }; static struct nf_exp_event_notifier ctnl_notifier_exp = { - .fcn = ctnetlink_expect_event, + .exp_event = ctnetlink_expect_event, }; #endif @@ -3854,33 +3854,23 @@ static int __net_init ctnetlink_net_init(struct net *net) #ifdef CONFIG_NF_CONNTRACK_EVENTS int ret; - ret = nf_conntrack_register_notifier(net, &ctnl_notifier); - if (ret < 0) { - pr_err("ctnetlink_init: cannot register notifier.\n"); - goto err_out; - } + nf_conntrack_register_notifier(net, &ctnl_notifier); ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); if (ret < 0) { pr_err("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; + nf_conntrack_unregister_notifier(net); + return ret; } #endif return 0; - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: - nf_conntrack_unregister_notifier(net, &ctnl_notifier); -err_out: - return ret; -#endif } static void ctnetlink_net_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); - nf_conntrack_unregister_notifier(net, &ctnl_notifier); + nf_conntrack_unregister_notifier(net); #endif } -- cgit v1.2.3 From bd1431db0b8131098a285c8cc6a357629b4362e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Aug 2021 17:16:26 +0200 Subject: netfilter: ecache: remove nf_exp_event_notifier structure Reuse the conntrack event notofier struct, this allows to remove the extra register/unregister functions and avoids a pointer in struct net. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 23 +++++---------- include/net/netns/conntrack.h | 1 - net/netfilter/nf_conntrack_ecache.c | 43 ++--------------------------- net/netfilter/nf_conntrack_netlink.c | 30 ++------------------ 4 files changed, 13 insertions(+), 84 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 061a93a03b82..d932e22edcb4 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -72,8 +72,15 @@ struct nf_ct_event { int report; }; +struct nf_exp_event { + struct nf_conntrack_expect *exp; + u32 portid; + int report; +}; + struct nf_ct_event_notifier { int (*ct_event)(unsigned int events, const struct nf_ct_event *item); + int (*exp_event)(unsigned int events, const struct nf_exp_event *item); }; void nf_conntrack_register_notifier(struct net *net, @@ -150,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_EVENTS - -struct nf_exp_event { - struct nf_conntrack_expect *exp; - u32 portid; - int report; -}; - -struct nf_exp_event_notifier { - int (*exp_event)(unsigned int events, struct nf_exp_event *item); -}; - -int nf_ct_expect_register_notifier(struct net *net, - struct nf_exp_event_notifier *nb); -void nf_ct_expect_unregister_notifier(struct net *net, - struct nf_exp_event_notifier *nb); - void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fefd38db95b3..0294f3d473af 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -113,7 +113,6 @@ struct netns_ct { struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; - struct nf_exp_event_notifier __rcu *nf_expect_event_cb; struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d92f78e4bc7c..41768ff19464 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -240,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, { struct net *net = nf_ct_exp_net(exp); - struct nf_exp_event_notifier *notify; + struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; rcu_read_lock(); - notify = rcu_dereference(net->ct.nf_expect_event_cb); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) goto out_unlock; @@ -283,47 +283,10 @@ void nf_conntrack_unregister_notifier(struct net *net) mutex_lock(&nf_ct_ecache_mutex); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); - /* synchronize_rcu() is called from ctnetlink_exit. */ + /* synchronize_rcu() is called after netns pre_exit */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct net *net, - struct nf_exp_event_notifier *new) -{ - int ret; - struct nf_exp_event_notifier *notify; - - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - if (notify != NULL) { - ret = -EBUSY; - goto out_unlock; - } - rcu_assign_pointer(net->ct.nf_expect_event_cb, new); - ret = 0; - -out_unlock: - mutex_unlock(&nf_ct_ecache_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); - -void nf_ct_expect_unregister_notifier(struct net *net, - struct nf_exp_event_notifier *new) -{ - struct nf_exp_event_notifier *notify; - - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - BUG_ON(notify != new); - RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); - mutex_unlock(&nf_ct_ecache_mutex); - /* synchronize_rcu() is called from ctnetlink_exit. */ -} -EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); - void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6d6f7cd70753..5008fa0891b3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3104,7 +3104,7 @@ nla_put_failure: #ifdef CONFIG_NF_CONNTRACK_EVENTS static int -ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) +ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) { struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); @@ -3756,9 +3756,6 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { .ct_event = ctnetlink_conntrack_event, -}; - -static struct nf_exp_event_notifier ctnl_notifier_exp = { .exp_event = ctnetlink_expect_event, }; #endif @@ -3852,42 +3849,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); static int __net_init ctnetlink_net_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - int ret; - nf_conntrack_register_notifier(net, &ctnl_notifier); - - ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); - if (ret < 0) { - pr_err("ctnetlink_init: cannot expect register notifier.\n"); - nf_conntrack_unregister_notifier(net); - return ret; - } #endif return 0; } -static void ctnetlink_net_exit(struct net *net) +static void ctnetlink_net_pre_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); nf_conntrack_unregister_notifier(net); #endif } -static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) -{ - struct net *net; - - list_for_each_entry(net, net_exit_list, exit_list) - ctnetlink_net_exit(net); - - /* wait for other cpus until they are done with ctnl_notifiers */ - synchronize_rcu(); -} - static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, - .exit_batch = ctnetlink_net_exit_batch, + .pre_exit = ctnetlink_net_pre_exit, }; static int __init ctnetlink_init(void) -- cgit v1.2.3 From 7a3f5b0de3647c854e34269c3332d7a1e902901a Mon Sep 17 00:00:00 2001 From: Ryoga Saito Date: Tue, 17 Aug 2021 08:39:37 +0000 Subject: netfilter: add netfilter hooks to SRv6 data plane This patch introduces netfilter hooks for solving the problem that conntrack couldn't record both inner flows and outer flows. This patch also introduces a new sysctl toggle for enabling lightweight tunnel netfilter hooks. Signed-off-by: Ryoga Saito Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 7 ++ include/net/lwtunnel.h | 3 + include/net/netfilter/nf_hooks_lwtunnel.h | 7 ++ net/core/lwtunnel.c | 3 + net/ipv6/seg6_iptunnel.c | 75 ++++++++++++++- net/ipv6/seg6_local.c | 111 ++++++++++++++++------- net/netfilter/Makefile | 3 + net/netfilter/nf_conntrack_standalone.c | 15 +++ net/netfilter/nf_hooks_lwtunnel.c | 53 +++++++++++ 9 files changed, 241 insertions(+), 36 deletions(-) create mode 100644 include/net/netfilter/nf_hooks_lwtunnel.h create mode 100644 net/netfilter/nf_hooks_lwtunnel.c (limited to 'include/net') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 024d784157c8..34ca762ea56f 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds) This extended timeout will be used in case there is an GRE stream detected. +nf_hooks_lwtunnel - BOOLEAN + - 0 - disabled (default) + - not 0 - enabled + + If this option is enabled, the lightweight tunnel netfilter hooks are + enabled. This option cannot be disabled once it is enabled. + nf_flowtable_tcp_timeout - INTEGER (seconds) default 30 diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 05cfd6ff6528..6f15e6fa154e 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -51,6 +51,9 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL + +DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); + void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h new file mode 100644 index 000000000000..52e27920f829 --- /dev/null +++ b/include/net/netfilter/nf_hooks_lwtunnel.h @@ -0,0 +1,7 @@ +#include +#include + +#ifdef CONFIG_SYSCTL +int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +#endif diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index d0ae987d2de9..2820aca2173a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -23,6 +23,9 @@ #include #include +DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); +EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); + #ifdef CONFIG_MODULES static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 897fa59c47de..6ebc7aa24466 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,6 +26,8 @@ #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif +#include +#include static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { @@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); return 0; } -static int seg6_input(struct sk_buff *skb) +static int seg6_input_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + +static int seg6_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb) if (unlikely(err)) return err; - return dst_input(skb); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, seg6_input_finish); + + return seg6_input_finish(dev_net(skb->dev), NULL, skb); } -static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int seg6_input_nf(struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(skb->dev); + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + } + + return -EINVAL; +} + +static int seg6_input(struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_input_nf(skb); + + return seg6_input_core(dev_net(skb->dev), NULL, skb); +} + +static int seg6_output_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(err)) goto drop; + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, skb_dst(skb)->dev, dst_output); + return dst_output(net, sk, skb); drop: kfree_skb(skb); return err; } +static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + } + + return -EINVAL; +} + +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_output_nf(net, sk, skb); + + return seg6_output_core(net, sk, skb); +} + static int seg6_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60bf3b877957..ddc8dfcd4e2b 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #define SEG6_F_ATTR(i) BIT(i) @@ -413,12 +415,33 @@ drop: return -EINVAL; } +static int input_action_end_dx6_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct in6_addr *nhaddr = NULL; + struct seg6_local_lwt *slwt; + + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); + + /* The inner packet is not associated to any local interface, + * so we do not call netif_rx(). + * + * If slwt->nh6 is set to ::, then lookup the nexthop for the + * inner packet's DA. Otherwise, use the specified nexthop. + */ + if (!ipv6_addr_any(&slwt->nh6)) + nhaddr = &slwt->nh6; + + seg6_lookup_nexthop(skb, nhaddr, 0); + + return dst_input(skb); +} + /* decapsulate and forward to specified nexthop */ static int input_action_end_dx6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct in6_addr *nhaddr = NULL; - /* this function accepts IPv6 encapsulated packets, with either * an SRH with SL=0, or no SRH. */ @@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; - /* The inner packet is not associated to any local interface, - * so we do not call netif_rx(). - * - * If slwt->nh6 is set to ::, then lookup the nexthop for the - * inner packet's DA. Otherwise, use the specified nexthop. - */ - - if (!ipv6_addr_any(&slwt->nh6)) - nhaddr = &slwt->nh6; - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); - seg6_lookup_nexthop(skb, nhaddr, 0); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx6_finish); - return dst_input(skb); + return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; } -static int input_action_end_dx4(struct sk_buff *skb, - struct seg6_local_lwt *slwt) +static int input_action_end_dx4_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct seg6_local_lwt *slwt; struct iphdr *iph; __be32 nhaddr; int err; - if (!decap_and_validate(skb, IPPROTO_IPIP)) - goto drop; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto drop; - - skb->protocol = htons(ETH_P_IP); + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); iph = ip_hdr(skb); @@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb, skb_dst_drop(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); - if (err) - goto drop; + if (err) { + kfree_skb(skb); + return -EINVAL; + } return dst_input(skb); +} + +static int input_action_end_dx4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb->protocol = htons(ETH_P_IP); + skb_set_transport_header(skb, sizeof(struct iphdr)); + nf_reset_ct(skb); + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx4_finish); + return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; @@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, skb_dst_drop(skb); skb_set_transport_header(skb, hdrlen); + nf_reset_ct(skb); return end_dt_vrf_rcv(skb, family, vrf); @@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt, u64_stats_update_end(&pcounters->syncp); } -static int seg6_local_input(struct sk_buff *skb) +static int seg6_local_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct seg6_action_desc *desc; @@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb) unsigned int len = skb->len; int rc; - if (skb->protocol != htons(ETH_P_IPV6)) { - kfree_skb(skb); - return -EINVAL; - } - slwt = seg6_local_lwtunnel(orig_dst->lwtstate); desc = slwt->desc; @@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb) return rc; } +static int seg6_local_input(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return -EINVAL; + } + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + seg6_local_input_core); + + return seg6_local_input_core(dev_net(skb->dev), NULL, skb); +} + static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 049890e00a3d..aab20e575ecd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/ # IPVS obj-$(CONFIG_IP_VS) += ipvs/ + +# lwtunnel +obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e84b499b7bfa..7e0d956da51d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,6 +22,9 @@ #include #include #include +#ifdef CONFIG_LWTUNNEL +#include +#endif #include static bool enable_hooks __read_mostly; @@ -612,6 +615,9 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif +#ifdef CONFIG_LWTUNNEL + NF_SYSCTL_CT_LWTUNNEL, +#endif __NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, +#endif +#ifdef CONFIG_LWTUNNEL + [NF_SYSCTL_CT_LWTUNNEL] = { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, #endif {} }; diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c new file mode 100644 index 000000000000..00e89ffd78f6 --- /dev/null +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +static inline int nf_hooks_lwtunnel_get(void) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return 1; + else + return 0; +} + +static inline int nf_hooks_lwtunnel_set(int enable) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) { + if (!enable) + return -EBUSY; + } else if (enable) { + static_branch_enable(&nf_hooks_lwtunnel_enabled); + } + + return 0; +} + +#ifdef CONFIG_SYSCTL +int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int proc_nf_hooks_lwtunnel_enabled = 0; + struct ctl_table tmp = { + .procname = table->procname, + .data = &proc_nf_hooks_lwtunnel_enabled, + .maxlen = sizeof(int), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + int ret; + + if (!write) + proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get(); + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) + ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); +#endif /* CONFIG_SYSCTL */ -- cgit v1.2.3