summaryrefslogtreecommitdiff
path: root/net/sched/act_ct.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/act_ct.c')
-rw-r--r--net/sched/act_ct.c265
1 files changed, 113 insertions, 152 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b38d91d6b249..0ca2bb8ed026 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -24,6 +24,7 @@
#include <net/ipv6_frag.h>
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
@@ -33,6 +34,7 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_conntrack_act_ct.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <uapi/linux/netfilter/nf_nat.h>
static struct workqueue_struct *act_ct_wq;
@@ -178,7 +180,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
entry = tcf_ct_flow_table_flow_action_get_next(action);
entry->id = FLOW_ACTION_CT_METADATA;
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- entry->ct_metadata.mark = ct->mark;
+ entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
IP_CT_ESTABLISHED_REPLY;
@@ -345,11 +347,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
module_put(THIS_MODULE);
}
-static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
+static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft)
{
- struct tcf_ct_flow_table *ct_ft = params->ct_ft;
-
- if (refcount_dec_and_test(&params->ct_ft->ref)) {
+ if (refcount_dec_and_test(&ct_ft->ref)) {
rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
queue_rcu_work(act_ct_wq, &ct_ft->rwork);
@@ -657,7 +657,7 @@ struct tc_ct_action_net {
/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
- u16 zone_id, bool force)
+ struct tcf_ct_params *p)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
@@ -667,11 +667,19 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
return false;
if (!net_eq(net, read_pnet(&ct->ct_net)))
goto drop_ct;
- if (nf_ct_zone(ct)->id != zone_id)
+ if (nf_ct_zone(ct)->id != p->zone)
goto drop_ct;
+ if (p->helper) {
+ struct nf_conn_help *help;
+
+ help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
+ if (help && rcu_access_pointer(help->helper) != p->helper)
+ goto drop_ct;
+ }
/* Force conntrack entry direction. */
- if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+ if ((p->ct_action & TCA_CT_ACT_FORCE) &&
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
if (nf_ct_is_confirmed(ct))
nf_ct_kill(ct);
@@ -832,101 +840,29 @@ out_free:
return err;
}
-static void tcf_ct_params_free(struct rcu_head *head)
+static void tcf_ct_params_free(struct tcf_ct_params *params)
{
- struct tcf_ct_params *params = container_of(head,
- struct tcf_ct_params, rcu);
-
- tcf_ct_flow_table_put(params);
-
+ if (params->helper) {
+#if IS_ENABLED(CONFIG_NF_NAT)
+ if (params->ct_action & TCA_CT_ACT_NAT)
+ nf_nat_helper_put(params->helper);
+#endif
+ nf_conntrack_helper_put(params->helper);
+ }
+ if (params->ct_ft)
+ tcf_ct_flow_table_put(params->ct_ft);
if (params->tmpl)
nf_ct_put(params->tmpl);
kfree(params);
}
-#if IS_ENABLED(CONFIG_NF_NAT)
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
+static void tcf_ct_params_free_rcu(struct rcu_head *head)
{
- __be16 proto = skb_protocol(skb, true);
- int hooknum, err = NF_ACCEPT;
-
- /* See HOOK2MANIP(). */
- if (maniptype == NF_NAT_MANIP_SRC)
- hooknum = NF_INET_LOCAL_IN; /* Source NAT */
- else
- hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (proto == htons(ETH_P_IP) &&
- ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
- err = NF_DROP;
- goto out;
- } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
- __be16 frag_off;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- int hdrlen = ipv6_skip_exthdr(skb,
- sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct,
- ctinfo,
- hooknum,
- hdrlen))
- err = NF_DROP;
- goto out;
- }
- }
- /* Non-ICMP, fall thru to initialize if needed. */
- fallthrough;
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- /* Initialize according to the NAT action. */
- err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
- /* Action is set up to establish a new
- * mapping.
- */
- ? nf_nat_setup_info(ct, range, maniptype)
- : nf_nat_alloc_null_binding(ct, hooknum);
- if (err != NF_ACCEPT)
- goto out;
- }
- break;
-
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
-
- default:
- err = NF_DROP;
- goto out;
- }
+ struct tcf_ct_params *params;
- err = nf_nat_packet(ct, ctinfo, hooknum, skb);
- if (err == NF_ACCEPT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- tc_skb_cb(skb)->post_ct_snat = 1;
- if (maniptype == NF_NAT_MANIP_DST)
- tc_skb_cb(skb)->post_ct_dnat = 1;
- }
-out:
- return err;
+ params = container_of(head, struct tcf_ct_params, rcu);
+ tcf_ct_params_free(params);
}
-#endif /* CONFIG_NF_NAT */
static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
{
@@ -936,9 +872,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
if (!mask)
return;
- new_mark = mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
}
@@ -967,69 +903,40 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
bool commit)
{
#if IS_ENABLED(CONFIG_NF_NAT)
- int err;
- enum nf_nat_manip_type maniptype;
+ int err, action = 0;
if (!(ct_action & TCA_CT_ACT_NAT))
return NF_ACCEPT;
+ if (ct_action & TCA_CT_ACT_NAT_SRC)
+ action |= BIT(NF_NAT_MANIP_SRC);
+ if (ct_action & TCA_CT_ACT_NAT_DST)
+ action |= BIT(NF_NAT_MANIP_DST);
- /* Add NAT extension if not confirmed yet. */
- if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
- return NF_DROP; /* Can't NAT. */
-
- if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
- (ctinfo != IP_CT_RELATED || commit)) {
- /* NAT an established or related connection like before. */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
- /* This is the REPLY direction for a connection
- * for which NAT was applied in the forward
- * direction. Do the reverse NAT.
- */
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
- else
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
- } else if (ct_action & TCA_CT_ACT_NAT_SRC) {
- maniptype = NF_NAT_MANIP_SRC;
- } else if (ct_action & TCA_CT_ACT_NAT_DST) {
- maniptype = NF_NAT_MANIP_DST;
- } else {
- return NF_ACCEPT;
- }
+ err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit);
+
+ if (action & BIT(NF_NAT_MANIP_SRC))
+ tc_skb_cb(skb)->post_ct_snat = 1;
+ if (action & BIT(NF_NAT_MANIP_DST))
+ tc_skb_cb(skb)->post_ct_dnat = 1;
- err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
- if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
- if (ct->status & IPS_SRC_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ct_nat_execute(skb, ct, ctinfo, range,
- maniptype);
- } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
- err = ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
- }
- }
return err;
#else
return NF_ACCEPT;
#endif
}
-static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
{
struct net *net = dev_net(skb->dev);
- bool cached, commit, clear, force;
enum ip_conntrack_info ctinfo;
struct tcf_ct *c = to_ct(a);
struct nf_conn *tmpl = NULL;
struct nf_hook_state state;
+ bool cached, commit, clear;
int nh_ofs, err, retval;
struct tcf_ct_params *p;
+ bool add_helper = false;
bool skip_add = false;
bool defrag = false;
struct nf_conn *ct;
@@ -1040,7 +947,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
retval = READ_ONCE(c->tcf_action);
commit = p->ct_action & TCA_CT_ACT_COMMIT;
clear = p->ct_action & TCA_CT_ACT_CLEAR;
- force = p->ct_action & TCA_CT_ACT_FORCE;
tmpl = p->tmpl;
tcf_lastuse_update(&c->tcf_tm);
@@ -1083,7 +989,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
* actually run the packet through conntrack twice unless it's for a
* different zone.
*/
- cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
+ cached = tcf_ct_skb_nfct_cached(net, skb, p);
if (!cached) {
if (tcf_ct_flow_table_lookup(p, skb, family)) {
skip_add = true;
@@ -1116,6 +1022,22 @@ do_nat:
if (err != NF_ACCEPT)
goto drop;
+ if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) {
+ err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC);
+ if (err)
+ goto drop;
+ add_helper = true;
+ if (p->ct_action & TCA_CT_ACT_NAT && !nfct_seqadj(ct)) {
+ if (!nfct_seqadj_ext_add(ct))
+ goto drop;
+ }
+ }
+
+ if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) {
+ if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT)
+ goto drop;
+ }
+
if (commit) {
tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
@@ -1164,6 +1086,9 @@ static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
[TCA_CT_NAT_IPV6_MAX] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 },
[TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 },
+ [TCA_CT_HELPER_NAME] = { .type = NLA_STRING, .len = NF_CT_HELPER_NAME_LEN },
+ [TCA_CT_HELPER_FAMILY] = { .type = NLA_U8 },
+ [TCA_CT_HELPER_PROTO] = { .type = NLA_U8 },
};
static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
@@ -1253,8 +1178,9 @@ static int tcf_ct_fill_params(struct net *net,
{
struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
struct nf_conntrack_zone zone;
+ int err, family, proto, len;
struct nf_conn *tmpl;
- int err;
+ char *name;
p->zone = NF_CT_DEFAULT_ZONE_ID;
@@ -1315,10 +1241,31 @@ static int tcf_ct_fill_params(struct net *net,
NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template");
return -ENOMEM;
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
p->tmpl = tmpl;
+ if (tb[TCA_CT_HELPER_NAME]) {
+ name = nla_data(tb[TCA_CT_HELPER_NAME]);
+ len = nla_len(tb[TCA_CT_HELPER_NAME]);
+ if (len > 16 || name[len - 1] != '\0') {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to parse helper name.");
+ err = -EINVAL;
+ goto err;
+ }
+ family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET;
+ proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP;
+ err = nf_ct_add_helper(tmpl, name, family, proto,
+ p->ct_action & TCA_CT_ACT_NAT, &p->helper);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to add helper");
+ goto err;
+ }
+ }
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
+err:
+ nf_ct_put(p->tmpl);
+ p->tmpl = NULL;
+ return err;
}
static int tcf_ct_init(struct net *net, struct nlattr *nla,
@@ -1390,7 +1337,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
err = tcf_ct_flow_table_get(net, params);
if (err)
- goto cleanup_params;
+ goto cleanup;
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -1401,17 +1348,15 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
- call_rcu(&params->rcu, tcf_ct_params_free);
+ call_rcu(&params->rcu, tcf_ct_params_free_rcu);
return res;
-cleanup_params:
- if (params->tmpl)
- nf_ct_put(params->tmpl);
cleanup:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- kfree(params);
+ if (params)
+ tcf_ct_params_free(params);
tcf_idr_release(*a, bind);
return err;
}
@@ -1423,7 +1368,7 @@ static void tcf_ct_cleanup(struct tc_action *a)
params = rcu_dereference_protected(c->params, 1);
if (params)
- call_rcu(&params->rcu, tcf_ct_params_free);
+ call_rcu(&params->rcu, tcf_ct_params_free_rcu);
}
static int tcf_ct_dump_key_val(struct sk_buff *skb,
@@ -1489,6 +1434,19 @@ static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
return 0;
}
+static int tcf_ct_dump_helper(struct sk_buff *skb, struct nf_conntrack_helper *helper)
+{
+ if (!helper)
+ return 0;
+
+ if (nla_put_string(skb, TCA_CT_HELPER_NAME, helper->name) ||
+ nla_put_u8(skb, TCA_CT_HELPER_FAMILY, helper->tuple.src.l3num) ||
+ nla_put_u8(skb, TCA_CT_HELPER_PROTO, helper->tuple.dst.protonum))
+ return -1;
+
+ return 0;
+}
+
static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -1541,6 +1499,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
if (tcf_ct_dump_nat(skb, p))
goto nla_put_failure;
+ if (tcf_ct_dump_helper(skb, p->helper))
+ goto nla_put_failure;
+
skip_dump:
if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;