diff options
Diffstat (limited to 'net/sched/act_ct.c')
-rw-r--r-- | net/sched/act_ct.c | 265 |
1 files changed, 113 insertions, 152 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b38d91d6b249..0ca2bb8ed026 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -24,6 +24,7 @@ #include <net/ipv6_frag.h> #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> +#include <net/tc_wrapper.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> @@ -33,6 +34,7 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_conntrack_act_ct.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <uapi/linux/netfilter/nf_nat.h> static struct workqueue_struct *act_ct_wq; @@ -178,7 +180,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, entry = tcf_ct_flow_table_flow_action_get_next(action); entry->id = FLOW_ACTION_CT_METADATA; #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - entry->ct_metadata.mark = ct->mark; + entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; @@ -345,11 +347,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) module_put(THIS_MODULE); } -static void tcf_ct_flow_table_put(struct tcf_ct_params *params) +static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft) { - struct tcf_ct_flow_table *ct_ft = params->ct_ft; - - if (refcount_dec_and_test(¶ms->ct_ft->ref)) { + if (refcount_dec_and_test(&ct_ft->ref)) { rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); queue_rcu_work(act_ct_wq, &ct_ft->rwork); @@ -657,7 +657,7 @@ struct tc_ct_action_net { /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, - u16 zone_id, bool force) + struct tcf_ct_params *p) { enum ip_conntrack_info ctinfo; struct nf_conn *ct; @@ -667,11 +667,19 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, return false; if (!net_eq(net, read_pnet(&ct->ct_net))) goto drop_ct; - if (nf_ct_zone(ct)->id != zone_id) + if (nf_ct_zone(ct)->id != p->zone) goto drop_ct; + if (p->helper) { + struct nf_conn_help *help; + + help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); + if (help && rcu_access_pointer(help->helper) != p->helper) + goto drop_ct; + } /* Force conntrack entry direction. */ - if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { + if ((p->ct_action & TCA_CT_ACT_FORCE) && + CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { if (nf_ct_is_confirmed(ct)) nf_ct_kill(ct); @@ -832,101 +840,29 @@ out_free: return err; } -static void tcf_ct_params_free(struct rcu_head *head) +static void tcf_ct_params_free(struct tcf_ct_params *params) { - struct tcf_ct_params *params = container_of(head, - struct tcf_ct_params, rcu); - - tcf_ct_flow_table_put(params); - + if (params->helper) { +#if IS_ENABLED(CONFIG_NF_NAT) + if (params->ct_action & TCA_CT_ACT_NAT) + nf_nat_helper_put(params->helper); +#endif + nf_conntrack_helper_put(params->helper); + } + if (params->ct_ft) + tcf_ct_flow_table_put(params->ct_ft); if (params->tmpl) nf_ct_put(params->tmpl); kfree(params); } -#if IS_ENABLED(CONFIG_NF_NAT) -/* Modelled after nf_nat_ipv[46]_fn(). - * range is only used for new, uninitialized NAT state. - * Returns either NF_ACCEPT or NF_DROP. - */ -static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype) +static void tcf_ct_params_free_rcu(struct rcu_head *head) { - __be16 proto = skb_protocol(skb, true); - int hooknum, err = NF_ACCEPT; - - /* See HOOK2MANIP(). */ - if (maniptype == NF_NAT_MANIP_SRC) - hooknum = NF_INET_LOCAL_IN; /* Source NAT */ - else - hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - if (proto == htons(ETH_P_IP) && - ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) - err = NF_DROP; - goto out; - } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { - __be16 frag_off; - u8 nexthdr = ipv6_hdr(skb)->nexthdr; - int hdrlen = ipv6_skip_exthdr(skb, - sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, - ctinfo, - hooknum, - hdrlen)) - err = NF_DROP; - goto out; - } - } - /* Non-ICMP, fall thru to initialize if needed. */ - fallthrough; - case IP_CT_NEW: - /* Seen it before? This can happen for loopback, retrans, - * or local packets. - */ - if (!nf_nat_initialized(ct, maniptype)) { - /* Initialize according to the NAT action. */ - err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) - /* Action is set up to establish a new - * mapping. - */ - ? nf_nat_setup_info(ct, range, maniptype) - : nf_nat_alloc_null_binding(ct, hooknum); - if (err != NF_ACCEPT) - goto out; - } - break; - - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED_REPLY: - break; - - default: - err = NF_DROP; - goto out; - } + struct tcf_ct_params *params; - err = nf_nat_packet(ct, ctinfo, hooknum, skb); - if (err == NF_ACCEPT) { - if (maniptype == NF_NAT_MANIP_SRC) - tc_skb_cb(skb)->post_ct_snat = 1; - if (maniptype == NF_NAT_MANIP_DST) - tc_skb_cb(skb)->post_ct_dnat = 1; - } -out: - return err; + params = container_of(head, struct tcf_ct_params, rcu); + tcf_ct_params_free(params); } -#endif /* CONFIG_NF_NAT */ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) { @@ -936,9 +872,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) if (!mask) return; - new_mark = mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); } @@ -967,69 +903,40 @@ static int tcf_ct_act_nat(struct sk_buff *skb, bool commit) { #if IS_ENABLED(CONFIG_NF_NAT) - int err; - enum nf_nat_manip_type maniptype; + int err, action = 0; if (!(ct_action & TCA_CT_ACT_NAT)) return NF_ACCEPT; + if (ct_action & TCA_CT_ACT_NAT_SRC) + action |= BIT(NF_NAT_MANIP_SRC); + if (ct_action & TCA_CT_ACT_NAT_DST) + action |= BIT(NF_NAT_MANIP_DST); - /* Add NAT extension if not confirmed yet. */ - if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) - return NF_DROP; /* Can't NAT. */ - - if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && - (ctinfo != IP_CT_RELATED || commit)) { - /* NAT an established or related connection like before. */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) - /* This is the REPLY direction for a connection - * for which NAT was applied in the forward - * direction. Do the reverse NAT. - */ - maniptype = ct->status & IPS_SRC_NAT - ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; - else - maniptype = ct->status & IPS_SRC_NAT - ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; - } else if (ct_action & TCA_CT_ACT_NAT_SRC) { - maniptype = NF_NAT_MANIP_SRC; - } else if (ct_action & TCA_CT_ACT_NAT_DST) { - maniptype = NF_NAT_MANIP_DST; - } else { - return NF_ACCEPT; - } + err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit); + + if (action & BIT(NF_NAT_MANIP_SRC)) + tc_skb_cb(skb)->post_ct_snat = 1; + if (action & BIT(NF_NAT_MANIP_DST)) + tc_skb_cb(skb)->post_ct_dnat = 1; - err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); - if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { - if (ct->status & IPS_SRC_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ct_nat_execute(skb, ct, ctinfo, range, - maniptype); - } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { - err = ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC); - } - } return err; #else return NF_ACCEPT; #endif } -static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct net *net = dev_net(skb->dev); - bool cached, commit, clear, force; enum ip_conntrack_info ctinfo; struct tcf_ct *c = to_ct(a); struct nf_conn *tmpl = NULL; struct nf_hook_state state; + bool cached, commit, clear; int nh_ofs, err, retval; struct tcf_ct_params *p; + bool add_helper = false; bool skip_add = false; bool defrag = false; struct nf_conn *ct; @@ -1040,7 +947,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, retval = READ_ONCE(c->tcf_action); commit = p->ct_action & TCA_CT_ACT_COMMIT; clear = p->ct_action & TCA_CT_ACT_CLEAR; - force = p->ct_action & TCA_CT_ACT_FORCE; tmpl = p->tmpl; tcf_lastuse_update(&c->tcf_tm); @@ -1083,7 +989,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, * actually run the packet through conntrack twice unless it's for a * different zone. */ - cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); + cached = tcf_ct_skb_nfct_cached(net, skb, p); if (!cached) { if (tcf_ct_flow_table_lookup(p, skb, family)) { skip_add = true; @@ -1116,6 +1022,22 @@ do_nat: if (err != NF_ACCEPT) goto drop; + if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) { + err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC); + if (err) + goto drop; + add_helper = true; + if (p->ct_action & TCA_CT_ACT_NAT && !nfct_seqadj(ct)) { + if (!nfct_seqadj_ext_add(ct)) + goto drop; + } + } + + if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) { + if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT) + goto drop; + } + if (commit) { tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); @@ -1164,6 +1086,9 @@ static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { [TCA_CT_NAT_IPV6_MAX] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 }, [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 }, + [TCA_CT_HELPER_NAME] = { .type = NLA_STRING, .len = NF_CT_HELPER_NAME_LEN }, + [TCA_CT_HELPER_FAMILY] = { .type = NLA_U8 }, + [TCA_CT_HELPER_PROTO] = { .type = NLA_U8 }, }; static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, @@ -1253,8 +1178,9 @@ static int tcf_ct_fill_params(struct net *net, { struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; + int err, family, proto, len; struct nf_conn *tmpl; - int err; + char *name; p->zone = NF_CT_DEFAULT_ZONE_ID; @@ -1315,10 +1241,31 @@ static int tcf_ct_fill_params(struct net *net, NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template"); return -ENOMEM; } - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); p->tmpl = tmpl; + if (tb[TCA_CT_HELPER_NAME]) { + name = nla_data(tb[TCA_CT_HELPER_NAME]); + len = nla_len(tb[TCA_CT_HELPER_NAME]); + if (len > 16 || name[len - 1] != '\0') { + NL_SET_ERR_MSG_MOD(extack, "Failed to parse helper name."); + err = -EINVAL; + goto err; + } + family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET; + proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP; + err = nf_ct_add_helper(tmpl, name, family, proto, + p->ct_action & TCA_CT_ACT_NAT, &p->helper); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to add helper"); + goto err; + } + } + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); return 0; +err: + nf_ct_put(p->tmpl); + p->tmpl = NULL; + return err; } static int tcf_ct_init(struct net *net, struct nlattr *nla, @@ -1390,7 +1337,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, err = tcf_ct_flow_table_get(net, params); if (err) - goto cleanup_params; + goto cleanup; spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -1401,17 +1348,15 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); if (params) - call_rcu(¶ms->rcu, tcf_ct_params_free); + call_rcu(¶ms->rcu, tcf_ct_params_free_rcu); return res; -cleanup_params: - if (params->tmpl) - nf_ct_put(params->tmpl); cleanup: if (goto_ch) tcf_chain_put_by_act(goto_ch); - kfree(params); + if (params) + tcf_ct_params_free(params); tcf_idr_release(*a, bind); return err; } @@ -1423,7 +1368,7 @@ static void tcf_ct_cleanup(struct tc_action *a) params = rcu_dereference_protected(c->params, 1); if (params) - call_rcu(¶ms->rcu, tcf_ct_params_free); + call_rcu(¶ms->rcu, tcf_ct_params_free_rcu); } static int tcf_ct_dump_key_val(struct sk_buff *skb, @@ -1489,6 +1434,19 @@ static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) return 0; } +static int tcf_ct_dump_helper(struct sk_buff *skb, struct nf_conntrack_helper *helper) +{ + if (!helper) + return 0; + + if (nla_put_string(skb, TCA_CT_HELPER_NAME, helper->name) || + nla_put_u8(skb, TCA_CT_HELPER_FAMILY, helper->tuple.src.l3num) || + nla_put_u8(skb, TCA_CT_HELPER_PROTO, helper->tuple.dst.protonum)) + return -1; + + return 0; +} + static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -1541,6 +1499,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, if (tcf_ct_dump_nat(skb, p)) goto nla_put_failure; + if (tcf_ct_dump_helper(skb, p->helper)) + goto nla_put_failure; + skip_dump: if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; |