From 029f7f3b8701cc7aca8bdb31f0c7edd6a479e357 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:39 +0100 Subject: netfilter: ipv6: nf_defrag: avoid/free clone operations commit 6aafeef03b9d9ecf ("netfilter: push reasm skb through instead of original frag skbs") changed ipv6 defrag to not use the original skbs anymore. So rather than keeping the original skbs around just to discard them afterwards just use the original skbs directly for the fraglist of the newly assembled skb and remove the extra clone/free operations. The skb that completes the fragment queue is morphed into a the reassembled one instead, just like ipv4 defrag. openvswitch doesn't need any additional skb_morph magic anymore to deal with this situation so just remove that. A followup patch can then also remove the NF_HOOK (re)invocation in the ipv6 netfilter defrag hook. Cc: Joe Stringer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fb7da5bb76cc..fcd20cf8f5d5 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -6,7 +6,6 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); -void nf_ct_frag6_consume_orig(struct sk_buff *skb); struct inet_frags_ctl; -- cgit v1.2.3 From daaa7d647f81f3f1494d9a9029d611b666d63181 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:40 +0100 Subject: netfilter: ipv6: avoid nf_iterate recursion The previous patch changed nf_ct_frag6_gather() to morph reassembled skb with the previous one. This means that the return value is always NULL or the skb argument. So change it to an err value. Instead of invoking NF_HOOK recursively with threshold to skip already-called hooks we can now just return NF_ACCEPT to move on to the next hook except for -EINPROGRESS (which means skb has been queued for reassembly), in which case we return NF_STOLEN. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 71 +++++++++++++---------------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 14 ++---- net/openvswitch/conntrack.c | 11 ++--- 4 files changed, 42 insertions(+), 56 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fcd20cf8f5d5..ddf162f7966f 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -5,7 +5,7 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); struct inet_frags_ctl; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a86a08adbe5..912bc3afc183 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -361,14 +361,15 @@ err: /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. + * + * returns true if *prev skb has been transformed into the reassembled + * skb, false otherwise. */ -static struct sk_buff * +static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; @@ -382,22 +383,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - goto out_fail; + return false; /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - pr_debug("payload len is too large.\n"); - goto out_oversize; + net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", + payload_len); + return false; } /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) { - pr_debug("skb is cloned but can't expand head"); - goto out_oom; - } + if (skb_unclone(head, GFP_ATOMIC)) + return false; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part @@ -408,7 +408,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic clone = alloc_skb(0, GFP_ATOMIC); if (clone == NULL) - goto out_oom; + return false; clone->next = head->next; head->next = clone; @@ -438,7 +438,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fp = skb_clone(prev, GFP_ATOMIC); if (!fp) - goto out_oom; + return false; fp->next = prev->next; skb_queue_walk(head, iter) { @@ -494,16 +494,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - return head; - -out_oversize: - net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", - payload_len); - goto out_fail; -out_oom: - net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); -out_fail: - return NULL; + return true; } /* @@ -569,27 +560,26 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev; + int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; - int fhoff, nhoff; u8 prevhdr; - struct sk_buff *ret_skb = NULL; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); - return skb; + return -EINVAL; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) - return skb; + return -EINVAL; if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) - return skb; + return -ENOMEM; skb_set_transport_header(skb, fhoff); hdr = ipv6_hdr(skb); @@ -598,27 +588,28 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) - return skb; + return -ENOMEM; spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { - spin_unlock_bh(&fq->q.lock); - pr_debug("Can't insert skb to queue\n"); - inet_frag_put(&fq->q, &nf_frags); - return skb; + ret = -EINVAL; + goto out_unlock; } + /* after queue has assumed skb ownership, only 0 or -EINPROGRESS + * must be returned. + */ + ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, skb, dev); - if (ret_skb == NULL) - pr_debug("Can't reassemble fragmented packets\n"); - } - spin_unlock_bh(&fq->q.lock); + fq->q.meat == fq->q.len && + nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; +out_unlock: + spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q, &nf_frags); - return ret_skb; + return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index fb96b1018884..f7aab5ab93a5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct sk_buff *reasm; + int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ @@ -63,17 +63,13 @@ static unsigned int ipv6_defrag(void *priv, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(state->net, skb, - nf_ct6_defrag_user(state->hook, skb)); + err = nf_ct_frag6_gather(state->net, skb, + nf_ct6_defrag_user(state->hook, skb)); /* queued */ - if (reasm == NULL) + if (err == -EINPROGRESS) return NF_STOLEN; - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, - state->in, state->out, - state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - - return NF_STOLEN; + return NF_ACCEPT; } static struct nf_hook_ops ipv6_defrag_ops[] = { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cac2169f2909..0c68c8e46d0b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -300,10 +300,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + int err; if (key->eth.type == htons(ETH_P_IP)) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - int err; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); err = ip_defrag(net, skb, user); @@ -314,14 +314,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - struct sk_buff *reasm; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - reasm = nf_ct_frag6_gather(net, skb, user); - if (!reasm) - return -EINPROGRESS; + err = nf_ct_frag6_gather(net, skb, user); + if (err) + return err; - key->ip.proto = ipv6_hdr(reasm)->nexthdr; + key->ip.proto = ipv6_hdr(skb)->nexthdr; ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.2.3 From a9ecfbe7fcf2f600718c3f995cbb46043f7a7a5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Nov 2015 00:03:28 +0100 Subject: netfilter: nf_tables: remove unused struct members Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4bd7508bedc9..101d7d7ec243 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -19,8 +19,6 @@ struct nft_pktinfo { const struct net_device *out; u8 pf; u8 hook; - u8 nhoff; - u8 thoff; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; -- cgit v1.2.3 From 7ec3f7b47b8d9ad7ba425726f2c58f9ddce040df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Nov 2015 10:00:22 +0000 Subject: netfilter: nft_payload: add packet mangling support Add support for mangling packet payload. Checksum for the specified base header is updated automatically if requested, however no updates for any kind of pseudo headers are supported, meaning no stateless NAT is supported. For checksum updates different checksumming methods can be specified. The currently supported methods are NONE for no checksum updates, and INET for internet type checksums. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 +++ include/uapi/linux/netfilter/nf_tables.h | 17 ++++ net/netfilter/nft_payload.c | 135 +++++++++++++++++++++++++++++-- 3 files changed, 155 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index c6f400cfaac8..4ff5424909aa 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -47,6 +47,15 @@ struct nft_payload { enum nft_registers dreg:8; }; +struct nft_payload_set { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers sreg:8; + u8 csum_type; + u8 csum_offset; +}; + extern const struct nft_expr_ops nft_payload_fast_ops; int nft_payload_module_init(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d8c8a7c9d88a..5f3ececf84b3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -597,6 +597,17 @@ enum nft_payload_bases { NFT_PAYLOAD_TRANSPORT_HEADER, }; +/** + * enum nft_payload_csum_types - nf_tables payload expression checksum types + * + * @NFT_PAYLOAD_CSUM_NONE: no checksumming + * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791) + */ +enum nft_payload_csum_types { + NFT_PAYLOAD_CSUM_NONE, + NFT_PAYLOAD_CSUM_INET, +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -604,6 +615,9 @@ enum nft_payload_bases { * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) + * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -611,6 +625,9 @@ enum nft_payload_attributes { NFTA_PAYLOAD_BASE, NFTA_PAYLOAD_OFFSET, NFTA_PAYLOAD_LEN, + NFTA_PAYLOAD_SREG, + NFTA_PAYLOAD_CSUM_TYPE, + NFTA_PAYLOAD_CSUM_OFFSET, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 09b4b07eb676..12cd4bf16d17 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,10 +107,13 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; + int offset, csum_offset; + __wsum fsum, tsum; + __sum16 sum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + + csum_offset = offset + priv->csum_offset; + offset += priv->offset; + + if (priv->csum_type == NFT_PAYLOAD_CSUM_INET && + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + + fsum = skb_checksum(skb, offset, priv->len, 0); + tsum = csum_partial(src, priv->len, 0); + sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), + tsum)); + if (sum == 0) + sum = CSUM_MANGLED_0; + + if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || + skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + } + + if (!skb_make_writable(skb, max(offset + priv->len, 0)) || + skb_store_bits(skb, offset, src, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload_set *priv = nft_expr_priv(expr); + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) + priv->csum_type = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) + priv->csum_offset = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + + switch (priv->csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; + default: + return -EOPNOTSUPP; + } + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, + htonl(priv->csum_offset))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_payload_set_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), + .eval = nft_payload_set_eval, + .init = nft_payload_set_init, + .dump = nft_payload_set_dump, +}; + static const struct nft_expr_ops * nft_payload_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, enum nft_payload_bases base; unsigned int offset, len; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || + if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || tb[NFTA_PAYLOAD_LEN] == NULL) return ERR_PTR(-EINVAL); @@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EOPNOTSUPP); } + if (tb[NFTA_PAYLOAD_SREG] != NULL) { + if (tb[NFTA_PAYLOAD_DREG] != NULL) + return ERR_PTR(-EINVAL); + return &nft_payload_set_ops; + } + + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); -- cgit v1.2.3 From 33d5a7b14bfd02e60af9d223db8dfff0cbcabe6b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:04 +0100 Subject: netfilter: nf_tables: extend tracing infrastructure nft monitor mode can then decode and display this trace data. Parts of LL/Network/Transport headers are provided as separate attributes. Otherwise, printing IP address data becomes virtually impossible for userspace since in the case of the netdev family we really don't want userspace to have to know all the possible link layer types and/or sizes just to display/print an ip address. We also don't want userspace to have to follow ipv6 header chains to get the s/dport info, the kernel already did this work for us. To avoid bloating nft_do_chain all data required for tracing is encapsulated in nft_traceinfo. The structure is initialized unconditionally(!) for each nft_do_chain invocation. This unconditionall call will be moved under a static key in a followup patch. With lots of help from Patrick McHardy and Pablo Neira. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 32 ++++ include/uapi/linux/netfilter/nf_tables.h | 52 ++++++ include/uapi/linux/netfilter/nfnetlink.h | 2 + net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 12 +- net/netfilter/nf_tables_core.c | 45 +++-- net/netfilter/nf_tables_trace.c | 271 +++++++++++++++++++++++++++++++ net/netfilter/nfnetlink.c | 1 + 8 files changed, 398 insertions(+), 19 deletions(-) create mode 100644 net/netfilter/nf_tables_trace.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 101d7d7ec243..b313cda49194 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -888,6 +888,38 @@ void nft_unregister_chain_type(const struct nf_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); +int nft_verdict_dump(struct sk_buff *skb, int type, + const struct nft_verdict *v); + +/** + * struct nft_traceinfo - nft tracing information and state + * + * @pkt: pktinfo currently processed + * @basechain: base chain currently processed + * @chain: chain currently processed + * @rule: rule that was evaluated + * @verdict: verdict given by rule + * @type: event type (enum nft_trace_types) + * @packet_dumped: packet headers sent in a previous traceinfo message + * @trace: other struct members are initialised + */ +struct nft_traceinfo { + const struct nft_pktinfo *pkt; + const struct nft_base_chain *basechain; + const struct nft_chain *chain; + const struct nft_rule *rule; + const struct nft_verdict *verdict; + enum nft_trace_types type; + bool packet_dumped; + bool trace; +}; + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *basechain); + +void nft_trace_notify(struct nft_traceinfo *info); + #define nft_dereference(p) \ nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5f3ececf84b3..b48a3ab761f8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -83,6 +83,7 @@ enum nft_verdicts { * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) + * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -102,6 +103,7 @@ enum nf_tables_msg_types { NFT_MSG_DELSETELEM, NFT_MSG_NEWGEN, NFT_MSG_GETGEN, + NFT_MSG_TRACE, NFT_MSG_MAX, }; @@ -987,4 +989,54 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/** + * enum nft_trace_attributes - nf_tables trace netlink attributes + * + * @NFTA_TRACE_TABLE: name of the table (NLA_STRING) + * @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING) + * @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types) + * @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts) + * @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32) + * @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY) + * @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY) + * @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY) + * @NFTA_TRACE_IIF: indev ifindex (NLA_U32) + * @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16) + * @NFTA_TRACE_OIF: outdev ifindex (NLA_U32) + * @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16) + * @NFTA_TRACE_MARK: nfmark (NLA_U32) + * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) + * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) + */ +enum nft_trace_attibutes { + NFTA_TRACE_UNSPEC, + NFTA_TRACE_TABLE, + NFTA_TRACE_CHAIN, + NFTA_TRACE_RULE_HANDLE, + NFTA_TRACE_TYPE, + NFTA_TRACE_VERDICT, + NFTA_TRACE_ID, + NFTA_TRACE_LL_HEADER, + NFTA_TRACE_NETWORK_HEADER, + NFTA_TRACE_TRANSPORT_HEADER, + NFTA_TRACE_IIF, + NFTA_TRACE_IIFTYPE, + NFTA_TRACE_OIF, + NFTA_TRACE_OIFTYPE, + NFTA_TRACE_MARK, + NFTA_TRACE_NFPROTO, + NFTA_TRACE_POLICY, + __NFTA_TRACE_MAX +}; +#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) + +enum nft_trace_types { + NFT_TRACETYPE_UNSPEC, + NFT_TRACETYPE_POLICY, + NFT_TRACETYPE_RETURN, + NFT_TRACETYPE_RULE, + __NFT_TRACETYPE_MAX +}; +#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) #endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 354a7e5e50f2..4bb8cb7730e7 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -22,6 +22,8 @@ enum nfnetlink_groups { #define NFNLGRP_NFTABLES NFNLGRP_NFTABLES NFNLGRP_ACCT_QUOTA, #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA + NFNLGRP_NFTRACE, +#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7638c36b498c..22934846b5d1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 93cc4737018f..c4969a0d54ba 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4446,22 +4446,22 @@ static void nft_verdict_uninit(const struct nft_data *data) } } -static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + nest = nla_nest_start(skb, type); if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; - switch (data->verdict.code) { + switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, - data->verdict.chain->name)) + v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4572,7 +4572,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: - err = nft_verdict_dump(skb, data); + err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f3695a497408..2395de7c8ab2 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,22 +44,36 @@ static struct nf_loginfo trace_loginfo = { }, }; -static void __nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static noinline void __nft_trace_packet(struct nft_traceinfo *info, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { + const struct nft_pktinfo *pkt = info->pkt; + + if (!pkt->skb->nf_trace) + return; + + info->chain = chain; + info->type = type; + + nft_trace_notify(info); + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, +static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + const struct nft_rule *rule, + int rulenum, + enum nft_trace_types type) { - if (unlikely(pkt->skb->nf_trace)) - __nft_trace_packet(pkt, chain, rulenum, type); + if (unlikely(info->trace)) { + info->rule = rule; + __nft_trace_packet(info, chain, rulenum, type); + } } static void nft_cmp_fast_eval(const struct nft_expr *expr, @@ -121,7 +135,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); + struct nft_traceinfo info; + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); @@ -151,7 +167,8 @@ next_rule: regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); continue; } break; @@ -161,7 +178,8 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -174,7 +192,8 @@ next_rule: stackptr++; /* fall through */ case NFT_GOTO: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; @@ -182,7 +201,8 @@ next_rule: rulenum++; /* fall through */ case NFT_RETURN: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -196,7 +216,8 @@ next_rule: goto next_rule; } - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(&info, basechain, NULL, -1, + NFT_TRACETYPE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c new file mode 100644 index 000000000000..36fd7ad6729a --- /dev/null +++ b/net/netfilter/nf_tables_trace.c @@ -0,0 +1,271 @@ +/* + * (C) 2015 Red Hat GmbH + * Author: Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_TRACETYPE_LL_HSIZE 20 +#define NFT_TRACETYPE_NETWORK_HSIZE 40 +#define NFT_TRACETYPE_TRANSPORT_HSIZE 20 + +static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) +{ + __be32 id; + + /* using skb address as ID results in a limited number of + * values (and quick reuse). + * + * So we attempt to use as many skb members that will not + * change while skb is with netfilter. + */ + id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), + skb->skb_iif); + + return nla_put_be32(nlskb, NFTA_TRACE_ID, id); +} + +static int trace_fill_header(struct sk_buff *nlskb, u16 type, + const struct sk_buff *skb, + int off, unsigned int len) +{ + struct nlattr *nla; + + if (len == 0) + return 0; + + nla = nla_reserve(nlskb, type, len); + if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) + return -1; + + return 0; +} + +static int nf_trace_fill_ll_header(struct sk_buff *nlskb, + const struct sk_buff *skb) +{ + struct vlan_ethhdr veth; + int off; + + BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); + + off = skb_mac_header(skb) - skb->data; + if (off != -ETH_HLEN) + return -1; + + if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) + return -1; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); +} + +static int nf_trace_fill_dev_info(struct sk_buff *nlskb, + const struct net_device *indev, + const struct net_device *outdev) +{ + if (indev) { + if (nla_put_be32(nlskb, NFTA_TRACE_IIF, + htonl(indev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, + htons(indev->type))) + return -1; + } + + if (outdev) { + if (nla_put_be32(nlskb, NFTA_TRACE_OIF, + htonl(outdev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, + htons(outdev->type))) + return -1; + } + + return 0; +} + +static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, + const struct nft_pktinfo *pkt) +{ + const struct sk_buff *skb = pkt->skb; + unsigned int len = min_t(unsigned int, + pkt->xt.thoff - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); + int off = skb_network_offset(skb); + + if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) + return -1; + + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_get(skb)) + return nf_trace_fill_ll_header(nlskb, skb); + + off = skb_mac_header(skb) - skb->data; + len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); + return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, + skb, off, len); +} + +static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_traceinfo *info) +{ + if (!info->rule) + return 0; + + /* a continue verdict with ->type == RETURN means that this is + * an implicit return (end of chain reached). + * + * Since no rule matched, the ->rule pointer is invalid. + */ + if (info->type == NFT_TRACETYPE_RETURN && + info->verdict->code == NFT_CONTINUE) + return 0; + + return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, + cpu_to_be64(info->rule->handle)); +} + +void nft_trace_notify(struct nft_traceinfo *info) +{ + const struct nft_pktinfo *pkt = info->pkt; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct sk_buff *skb; + unsigned int size; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + + if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + return; + + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + + nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size(sizeof(__be32)) + /* trace type */ + nla_total_size(0) + /* VERDICT, nested */ + nla_total_size(sizeof(u32)) + /* verdict code */ + nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ + nla_total_size(sizeof(u32)) + /* id */ + nla_total_size(NFT_TRACETYPE_LL_HSIZE) + + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + + nla_total_size(sizeof(u32)) + /* iif */ + nla_total_size(sizeof(__be16)) + /* iiftype */ + nla_total_size(sizeof(u32)) + /* oif */ + nla_total_size(sizeof(__be16)) + /* oiftype */ + nla_total_size(sizeof(u32)) + /* mark */ + nla_total_size(sizeof(u32)) + /* nfproto */ + nla_total_size(sizeof(u32)); /* policy */ + + skb = nlmsg_new(size, GFP_ATOMIC); + if (!skb) + return; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + if (!nlh) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = info->basechain->type->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) + goto nla_put_failure; + + if (trace_fill_id(skb, pkt->skb)) + goto nla_put_failure; + + if (info->chain) { + if (nla_put_string(skb, NFTA_TRACE_CHAIN, + info->chain->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_TRACE_TABLE, + info->chain->table->name)) + goto nla_put_failure; + } + + if (nf_trace_fill_rule_info(skb, info)) + goto nla_put_failure; + + switch (info->type) { + case NFT_TRACETYPE_UNSPEC: + case __NFT_TRACETYPE_MAX: + break; + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + goto nla_put_failure; + break; + case NFT_TRACETYPE_POLICY: + if (nla_put_be32(skb, NFTA_TRACE_POLICY, + info->basechain->policy)) + goto nla_put_failure; + break; + } + + if (pkt->skb->mark && + nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + goto nla_put_failure; + + if (!info->packet_dumped) { + if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + goto nla_put_failure; + + if (nf_trace_fill_pkt_info(skb, pkt)) + goto nla_put_failure; + info->packet_dumped = true; + } + + nlmsg_end(skb, nlh); + nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + return; + + nla_put_failure: + WARN_ON_ONCE(1); + kfree_skb(skb); +} + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *chain) +{ + info->basechain = nft_base_chain(chain); + info->trace = true; + info->packet_dumped = false; + info->pkt = pkt; + info->verdict = verdict; +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 46453ab318db..28591fa94ba5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -49,6 +49,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, + [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; void nfnl_lock(__u8 subsys_id) -- cgit v1.2.3 From e639f7ab079b5256660018511d87aa34b54f1a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:05 +0100 Subject: netfilter: nf_tables: wrap tracing with a static key Only needed when meta nftrace rule(s) were added. The assumption is that no such rules are active, so the call to nft_trace_init is "never" needed. When nftrace rules are active, we always call the nft_trace_* functions, but will only send netlink messages when all of the following are true: - traceinfo structure was initialised - skb->nf_trace == 1 - at least one subscriber to trace group. Adding an extra conditional (static_branch ... && skb->nf_trace) nft_trace_init( ..) Is possible but results in a larger nft_do_chain footprint. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 1 + include/net/netfilter/nft_meta.h | 3 +++ net/bridge/netfilter/nft_meta_bridge.c | 1 + net/netfilter/nf_tables_core.c | 9 ++++++--- net/netfilter/nf_tables_trace.c | 4 ++++ net/netfilter/nft_meta.c | 16 ++++++++++++++++ 6 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 4ff5424909aa..a9060dd99db7 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -57,6 +57,7 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; +extern struct static_key_false nft_trace_enabled; int nft_payload_module_init(void); void nft_payload_module_exit(void); diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 711887a09e91..d27588c8dbd9 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -33,4 +33,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr); + #endif diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index a21269b83f16..4b901d9f2e7c 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2395de7c8ab2..67fa41d317f6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, { const struct nft_pktinfo *pkt = info->pkt; - if (!pkt->skb->nf_trace) + if (!info->trace || !pkt->skb->nf_trace) return; info->chain = chain; @@ -70,7 +71,7 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, int rulenum, enum nft_trace_types type) { - if (unlikely(info->trace)) { + if (static_branch_unlikely(&nft_trace_enabled)) { info->rule = rule; __nft_trace_packet(info, chain, rulenum, type); } @@ -137,7 +138,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; - nft_trace_init(&info, pkt, ®s.verdict, basechain); + info.trace = false; + if (static_branch_unlikely(&nft_trace_enabled)) + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 36fd7ad6729a..e9e959f65d91 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,9 @@ #define NFT_TRACETYPE_NETWORK_HSIZE 40 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 +DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); +EXPORT_SYMBOL_GPL(nft_trace_enabled); + static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) { __be32 id; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0..85a465b773e5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include /* for TCP_TIME_WAIT */ #include +#include #include void nft_meta_get_eval(const struct nft_expr *expr, @@ -297,6 +299,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_META_NFTRACE) + static_branch_inc(&nft_trace_enabled); + return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); @@ -334,6 +339,16 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_meta_set_dump); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (priv->key == NFT_META_NFTRACE) + static_branch_dec(&nft_trace_enabled); +} +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); + static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -348,6 +363,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; -- cgit v1.2.3 From 19576c9478682a398276c994ea0d2696474df32b Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 9 Dec 2015 14:07:40 +0100 Subject: netfilter: cttimeout: add netns support Add a per-netns list of timeout objects and adjust code to use it. Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 3 + include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nf_conntrack_timeout.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 82 +++++++++++++++++----------- net/netfilter/xt_CT.c | 2 +- 5 files changed, 57 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2dcea635ecce..4089abc6e9c0 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -121,6 +121,9 @@ struct net { #if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) struct list_head nfnl_acct_list; #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + struct list_head nfct_timeout_list; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index f72be38860a7..5cc5e9e6171a 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -104,7 +104,7 @@ static inline void nf_conntrack_timeout_fini(void) #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(const char *name); +extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout); #endif diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 93da609d9d29..26e742006c48 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,7 +25,7 @@ #include struct ctnl_timeout * -(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly; +(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c7a2d0e1c462..3921d544f5ba 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -38,8 +38,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); -static LIST_HEAD(cttimeout_list); - static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, @@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &cttimeout_list, head) { + list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, timeout->l3num = l3num; timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &cttimeout_list); + list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; err: @@ -209,6 +207,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) @@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &cttimeout_list, head) { + list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i, RCU_INIT_POINTER(timeout_ext->timeout, NULL); } -static void ctnl_untimeout(struct ctnl_timeout *timeout) +static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; int i; local_bh_disable(); - for (i = 0; i < init_net.ct.htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < init_net.ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode) + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout) } /* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; @@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(timeout); + ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); char *name; struct ctnl_timeout *cur; int ret = -ENOENT; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &cttimeout_list, head) - ctnl_timeout_try_del(cur); + list_for_each_entry(cur, &net->nfct_timeout_list, head) + ctnl_timeout_try_del(net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(cur); + ret = ctnl_timeout_try_del(net, cur); if (ret < 0) return ret; @@ -511,12 +512,13 @@ err: } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) +static struct ctnl_timeout * +ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; rcu_read_lock(); - list_for_each_entry_rcu(timeout, &cttimeout_list, head) { + list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); +static int __net_init cttimeout_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfct_timeout_list); + + return 0; +} + +static void __net_exit cttimeout_net_exit(struct net *net) +{ + struct ctnl_timeout *cur, *tmp; + + ctnl_untimeout(net, NULL); + + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + nf_ct_l4proto_put(cur->l4proto); + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations cttimeout_ops = { + .init = cttimeout_net_init, + .exit = cttimeout_net_exit, +}; + static int __init cttimeout_init(void) { int ret; + ret = register_pernet_subsys(&cttimeout_ops); + if (ret < 0) + return ret; + ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " @@ -586,28 +617,17 @@ static int __init cttimeout_init(void) return 0; err_out: + unregister_pernet_subsys(&cttimeout_ops); return ret; } static void __exit cttimeout_exit(void) { - struct ctnl_timeout *cur, *tmp; - pr_info("cttimeout: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&cttimeout_subsys); - /* Make sure no conntrack objects refer to custom timeouts anymore. */ - ctnl_untimeout(NULL); - - list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. - */ - nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&cttimeout_ops); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index e7ac07e53b59..6669e68d589e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto out; } - timeout = timeout_find_get(timeout_name); + timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", timeout_name); -- cgit v1.2.3