diff options
author | David S. Miller <davem@davemloft.net> | 2021-02-16 17:30:20 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-02-16 17:51:13 -0800 |
commit | d489ded1a3690d7eca8633575cba3f7dac8484c7 (patch) | |
tree | 20e739382965ac61d6314e6b0df4cb6acbbbca0e /net | |
parent | 86dd9868b8788a9063893a97649594af93cd5aa6 (diff) | |
parent | 3af409ca278d4a8d50e91f9f7c4c33b175645cf3 (diff) | |
download | lwn-d489ded1a3690d7eca8633575cba3f7dac8484c7.tar.gz lwn-d489ded1a3690d7eca8633575cba3f7dac8484c7.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Diffstat (limited to 'net')
-rw-r--r-- | net/appletalk/ddp.c | 33 | ||||
-rw-r--r-- | net/caif/chnl_net.c | 5 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 6 | ||||
-rw-r--r-- | net/mptcp/options.c | 10 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 55 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 18 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 83 | ||||
-rw-r--r-- | net/sched/act_api.c | 2 | ||||
-rw-r--r-- | net/sched/cls_api.c | 1 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 39 |
10 files changed, 171 insertions, 81 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ca1a0d07a087..ebda397fa95a 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1577,8 +1577,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; struct ddpehdr *ddp; - int size; - struct atalk_route *rt; + int size, hard_header_len; + struct atalk_route *rt, *rt_lo = NULL; int err; if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) @@ -1641,7 +1641,22 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", sk, size, dev->name); - size += dev->hard_header_len; + hard_header_len = dev->hard_header_len; + /* Leave room for loopback hardware header if necessary */ + if (usat->sat_addr.s_node == ATADDR_BCAST && + (dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) { + struct atalk_addr at_lo; + + at_lo.s_node = 0; + at_lo.s_net = 0; + + rt_lo = atrtr_find(&at_lo); + + if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len) + hard_header_len = rt_lo->dev->hard_header_len; + } + + size += hard_header_len; release_sock(sk); skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); lock_sock(sk); @@ -1649,7 +1664,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; skb_reserve(skb, ddp_dl->header_length); - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hard_header_len); skb->dev = dev; SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); @@ -1700,18 +1715,12 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* loop back */ skb_orphan(skb); if (ddp->deh_dnode == ATADDR_BCAST) { - struct atalk_addr at_lo; - - at_lo.s_node = 0; - at_lo.s_net = 0; - - rt = atrtr_find(&at_lo); - if (!rt) { + if (!rt_lo) { kfree_skb(skb); err = -ENETUNREACH; goto out; } - dev = rt->dev; + dev = rt_lo->dev; skb->dev = dev; } ddp_dl->request(ddp_dl, skb, dev->dev_addr); diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 79b6a04d8eb6..fadc7c8a3107 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -115,10 +115,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) else skb->ip_summed = CHECKSUM_NONE; - if (in_interrupt()) - netif_rx(skb); - else - netif_rx_ni(skb); + netif_rx_any_context(skb); /* Update statistics. */ priv->netdev->stats.rx_packets++; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c565c7a17091..2ef2224b3bff 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1056,6 +1056,9 @@ proto_again: key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } + __skb_flow_dissect_ipv4(skb, flow_dissector, + target_container, data, iph); + if (ip_is_fragment(iph)) { key_control->flags |= FLOW_DIS_IS_FRAGMENT; @@ -1072,9 +1075,6 @@ proto_again: } } - __skb_flow_dissect_ipv4(skb, flow_dissector, - target_container, data, iph); - break; } case htons(ETH_P_IPV6): { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bb874c5d663a..b63574d6b812 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -508,8 +508,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); + u64 snd_data_fin_enable, ack_seq; unsigned int dss_size = 0; - u64 snd_data_fin_enable; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; @@ -541,13 +541,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return ret; } + ack_seq = READ_ONCE(msk->ack_seq); if (READ_ONCE(msk->use_64bit_ack)) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); + opts->ext_copy.data_ack = ack_seq; opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq); + opts->ext_copy.data_ack32 = (uint32_t)ack_seq; opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; @@ -918,8 +919,7 @@ static void ack_update_msk(struct mptcp_sock *msk, msk->wnd_end = new_wnd_end; /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) && - sk_stream_memory_free(ssk)) + if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c2a8392254dc..a57f3eab7b6a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -363,8 +363,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk) /* Look for an acknowledged DATA_FIN */ if (mptcp_pending_data_fin_ack(sk)) { - mptcp_stop_timer(sk); - WRITE_ONCE(msk->snd_data_fin_enable, 0); switch (sk->sk_state) { @@ -458,7 +456,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk) static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) { struct sock *ack_hint = READ_ONCE(msk->ack_hint); + int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool cleanup; + + /* this is a simple superset of what tcp_cleanup_rbuf() implements + * so that we don't have to acquire the ssk socket lock most of the time + * to do actually nothing + */ + cleanup = __mptcp_space(sk) - old_space >= max(0, old_space); + if (!cleanup) + return; /* if the hinted ssk is still active, try to use it */ if (likely(ack_hint)) { @@ -1565,6 +1574,9 @@ out: mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + if (msk->snd_data_fin_enable && msk->snd_nxt + 1 == msk->write_seq) mptcp_schedule_work(sk); @@ -1868,7 +1880,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk) skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); } -static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) +static bool __mptcp_move_skbs(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; unsigned int moved = 0; @@ -1888,13 +1900,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) slowpath = lock_sock_fast(ssk); mptcp_data_lock(sk); + __mptcp_update_rmem(sk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); mptcp_data_unlock(sk); - if (moved && rcv) { - WRITE_ONCE(msk->rmem_pending, min(rcv, moved)); - tcp_cleanup_rbuf(ssk, 1); - WRITE_ONCE(msk->rmem_pending, 0); - } + tcp_cleanup_rbuf(ssk, moved); unlock_sock_fast(ssk, slowpath); } while (!done); @@ -1907,6 +1916,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) ret |= __mptcp_ofo_queue(msk); __mptcp_splice_receive_queue(sk); mptcp_data_unlock(sk); + mptcp_cleanup_rbuf(msk); } if (ret) mptcp_check_data_fin((struct sock *)msk); @@ -1936,7 +1946,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); while (copied < len) { - int bytes_read, old_space; + int bytes_read; bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); if (unlikely(bytes_read < 0)) { @@ -1947,14 +1957,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - if (skb_queue_empty(&msk->receive_queue) && - __mptcp_move_skbs(msk, len - copied)) - continue; - /* be sure to advertise window change */ - old_space = READ_ONCE(msk->old_wspace); - if ((tcp_space(sk) - old_space) >= old_space) - mptcp_cleanup_rbuf(msk); + mptcp_cleanup_rbuf(msk); + + if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) + continue; /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() @@ -1982,7 +1989,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* race breaker: the shutdown could be after the * previous receive queue check */ - if (__mptcp_move_skbs(msk, len - copied)) + if (__mptcp_move_skbs(msk)) continue; break; } @@ -2015,7 +2022,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* .. race-breaker: ssk might have gotten new data * after last __mptcp_move_skbs() returned false. */ - if (unlikely(__mptcp_move_skbs(msk, 0))) + if (unlikely(__mptcp_move_skbs(msk))) set_bit(MPTCP_DATA_READY, &msk->flags); } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { /* data to read but mptcp_wait_data() cleared DATA_READY */ @@ -2275,6 +2282,7 @@ static void mptcp_worker(struct work_struct *work) if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; + __mptcp_clean_una(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) goto unlock; @@ -2943,6 +2951,8 @@ static void mptcp_release_cb(struct sock *sk) mptcp_push_pending(sk, 0); spin_lock_bh(&sk->sk_lock.slock); } + if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) + __mptcp_error_report(sk); /* clear any wmem reservation and errors */ __mptcp_update_wmem(sk); @@ -3319,7 +3329,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) struct sock *sk = (struct sock *)msk; if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - return 0; + return EPOLLOUT | EPOLLWRNORM; if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; @@ -3352,9 +3362,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, mask |= mptcp_check_readable(msk); mask |= mptcp_check_writeable(msk); } + if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= EPOLLERR; + return mask; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1b6ec1773678..91827d949766 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -102,6 +102,7 @@ #define MPTCP_WORK_CLOSE_SUBFLOW 5 #define MPTCP_PUSH_PENDING 6 #define MPTCP_CLEAN_UNA 7 +#define MPTCP_ERROR_REPORT 8 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -237,7 +238,6 @@ struct mptcp_sock { u64 wnd_end; unsigned long timer_ival; u32 token; - int rmem_pending; int rmem_released; unsigned long flags; bool can_ack; @@ -301,7 +301,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) static inline int __mptcp_space(const struct sock *sk) { - return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending); + return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) @@ -334,20 +334,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } -static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) +static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una))) + if (msk->snd_una == READ_ONCE(msk->snd_nxt)) return NULL; - return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); -} - -static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } @@ -436,6 +429,7 @@ struct mptcp_subflow_context { void (*tcp_data_ready)(struct sock *sk); void (*tcp_state_change)(struct sock *sk); void (*tcp_write_space)(struct sock *sk); + void (*tcp_error_report)(struct sock *sk); struct rcu_head rcu; }; @@ -560,6 +554,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, sk->sk_data_ready = ctx->tcp_data_ready; sk->sk_state_change = ctx->tcp_state_change; sk->sk_write_space = ctx->tcp_write_space; + sk->sk_error_report = ctx->tcp_error_report; inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } @@ -587,6 +582,7 @@ bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); +void __mptcp_error_report(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __mptcp_flush_join_list(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ce2dea2a6e0a..06e233410e0e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -100,7 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) return msk; } -static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) +static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -108,16 +108,6 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li subflow_req->mp_join = 0; subflow_req->msk = NULL; mptcp_token_init_request(req); - -#ifdef CONFIG_TCP_MD5SIG - /* no MPTCP if MD5SIG is enabled on this socket or we may run out of - * TCP option space. - */ - if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) - return -EINVAL; -#endif - - return 0; } static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk) @@ -130,20 +120,23 @@ static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct soc * Returns an error code if a JOIN has failed and a TCP reset * should be sent. */ -static int subflow_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +static int subflow_check_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_options_received mp_opt; - int ret; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - ret = __subflow_init_req(req, sk_listener); - if (ret) - return 0; +#ifdef CONFIG_TCP_MD5SIG + /* no MPTCP if MD5SIG is enabled on this socket or we may run out of + * TCP option space. + */ + if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) + return -EINVAL; +#endif mptcp_get_options(skb, &mp_opt); @@ -236,10 +229,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, struct mptcp_options_received mp_opt; int err; - err = __subflow_init_req(req, sk_listener); - if (err) - return err; - + subflow_init_req(req, sk_listener); mptcp_get_options(skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) @@ -279,12 +269,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, int err; tcp_rsk(req)->is_mptcp = 1; + subflow_init_req(req, sk); dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req); if (!dst) return NULL; - err = subflow_init_req(req, sk, skb); + err = subflow_check_req(req, sk, skb); if (err == 0) return dst; @@ -304,12 +295,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, int err; tcp_rsk(req)->is_mptcp = 1; + subflow_init_req(req, sk); dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req); if (!dst) return NULL; - err = subflow_init_req(req, sk, skb); + err = subflow_check_req(req, sk, skb); if (err == 0) return dst; @@ -1124,6 +1116,46 @@ static void subflow_write_space(struct sock *ssk) mptcp_write_space(sk); } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + inet_sk_state_store(sk, inet_sk_state_load(ssk)); + sk->sk_err = -err; + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + break; + } +} + +static void subflow_error_report(struct sock *ssk) +{ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_error_report(sk); + else + set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags); + mptcp_data_unlock(sk); +} + static struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { @@ -1470,9 +1502,11 @@ static int subflow_ulp_init(struct sock *sk) ctx->tcp_data_ready = sk->sk_data_ready; ctx->tcp_state_change = sk->sk_state_change; ctx->tcp_write_space = sk->sk_write_space; + ctx->tcp_error_report = sk->sk_error_report; sk->sk_data_ready = subflow_data_ready; sk->sk_write_space = subflow_write_space; sk->sk_state_change = subflow_state_change; + sk->sk_error_report = subflow_error_report; out: return err; } @@ -1526,6 +1560,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_write_space = old_ctx->tcp_write_space; + new_ctx->tcp_error_report = old_ctx->tcp_error_report; new_ctx->rel_write_seq = 1; new_ctx->tcp_sock = newsk; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4dd235ce9a07..b919826939e0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -908,7 +908,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -static void tcf_idr_insert_many(struct tc_action *actions[]) +void tcf_idr_insert_many(struct tc_action *actions[]) { int i; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a67c66a512a4..e37556cc37ab 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3060,6 +3060,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; + tcf_idr_insert_many(exts->actions); } else if (exts->action && tb[exts->action]) { int err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index caf7643e9c83..2409e522c68f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -30,6 +30,11 @@ #include <uapi/linux/netfilter/nf_conntrack_common.h> +#define TCA_FLOWER_KEY_CT_FLAGS_MAX \ + ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1) +#define TCA_FLOWER_KEY_CT_FLAGS_MASK \ + (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -690,8 +695,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, - [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_CT_STATE] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), + [TCA_FLOWER_KEY_CT_STATE_MASK] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), [TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 }, @@ -1394,12 +1401,33 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return 0; } +static int fl_validate_ct_state(u16 state, struct nlattr *tb, + struct netlink_ext_ack *extack) +{ + if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "no trk, so no other flag can be set"); + return -EINVAL; + } + + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && + state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "new and est are mutually exclusive"); + return -EINVAL; + } + + return 0; +} + static int fl_set_key_ct(struct nlattr **tb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask, struct netlink_ext_ack *extack) { if (tb[TCA_FLOWER_KEY_CT_STATE]) { + int err; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) { NL_SET_ERR_MSG(extack, "Conntrack isn't enabled"); return -EOPNOTSUPP; @@ -1407,6 +1435,13 @@ static int fl_set_key_ct(struct nlattr **tb, fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE, &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, sizeof(key->ct_state)); + + err = fl_validate_ct_state(mask->ct_state, + tb[TCA_FLOWER_KEY_CT_STATE_MASK], + extack); + if (err) + return err; + } if (tb[TCA_FLOWER_KEY_CT_ZONE]) { if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { |