From d92191aa84e5f187d543867c3d54b38f294833fa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 21 Mar 2018 13:55:42 +0100
Subject: netfilter: nf_tables: cache device name in flowtable object

Devices going away have to grab the nfnl_lock from the netdev event path
to avoid races with control plane updates.

However, netlink dumps in netfilter do not hold nfnl_lock mutex. Cache
the device name into the objects to avoid an use-after-free situation
for a device that is going away.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 663b015dace5..30eb0652b025 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1068,6 +1068,8 @@ struct nft_object_ops {
 int nft_register_obj(struct nft_object_type *obj_type);
 void nft_unregister_obj(struct nft_object_type *obj_type);
 
+#define NFT_FLOWTABLE_DEVICE_MAX	8
+
 /**
  *	struct nft_flowtable - nf_tables flow table
  *
@@ -1080,6 +1082,7 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
  *	@genmask: generation mask
  *	@use: number of references to this flow table
  * 	@handle: unique object handle
+ *	@dev_name: array of device names
  *	@data: rhashtable and garbage collector
  * 	@ops: array of hooks
  */
@@ -1093,6 +1096,7 @@ struct nft_flowtable {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
+	char				*dev_name[NFT_FLOWTABLE_DEVICE_MAX];
 	/* runtime data below here */
 	struct nf_hook_ops		*ops ____cacheline_aligned;
 	struct nf_flowtable		data;
-- 
cgit v1.2.3


From eb82a994479245a79647d302f9b4eb8e7c9d7ca6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 24 Mar 2018 22:25:06 -0700
Subject: net: sched, fix OOO packets with pfifo_fast

After the qdisc lock was dropped in pfifo_fast we allow multiple
enqueue threads and dequeue threads to run in parallel. On the
enqueue side the skb bit ooo_okay is used to ensure all related
skbs are enqueued in-order. On the dequeue side though there is
no similar logic. What we observe is with fewer queues than CPUs
it is possible to re-order packets when two instances of
__qdisc_run() are running in parallel. Each thread will dequeue
a skb and then whichever thread calls the ndo op first will
be sent on the wire. This doesn't typically happen because
qdisc_run() is usually triggered by the same core that did the
enqueue. However, drivers will trigger __netif_schedule()
when queues are transitioning from stopped to awake using the
netif_tx_wake_* APIs. When this happens netif_schedule() calls
qdisc_run() on the same CPU that did the netif_tx_wake_* which
is usually done in the interrupt completion context. This CPU
is selected with the irq affinity which is unrelated to the
enqueue operations.

To resolve this we add a RUNNING bit to the qdisc to ensure
only a single dequeue per qdisc is running. Enqueue and dequeue
operations can still run in parallel and also on multi queue
NICs we can still have a dequeue in-flight per qdisc, which
is typically per CPU.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Reported-by: Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/sch_generic.c   | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2092d33194dd..8da32678ce18 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -30,6 +30,7 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
+	__QDISC_STATE_RUNNING,
 };
 
 struct qdisc_size_table {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7e3fbe9cc936..39c144b6ff98 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -373,24 +373,33 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
  */
 static inline bool qdisc_restart(struct Qdisc *q, int *packets)
 {
+	bool more, validate, nolock = q->flags & TCQ_F_NOLOCK;
 	spinlock_t *root_lock = NULL;
 	struct netdev_queue *txq;
 	struct net_device *dev;
 	struct sk_buff *skb;
-	bool validate;
 
 	/* Dequeue packet */
+	if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+		return false;
+
 	skb = dequeue_skb(q, &validate, packets);
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		if (nolock)
+			clear_bit(__QDISC_STATE_RUNNING, &q->state);
 		return false;
+	}
 
-	if (!(q->flags & TCQ_F_NOLOCK))
+	if (!nolock)
 		root_lock = qdisc_lock(q);
 
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
 
-	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+	more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+	if (nolock)
+		clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	return more;
 }
 
 void __qdisc_run(struct Qdisc *q)
-- 
cgit v1.2.3


From b85ab56c3f81c5a24b5a5213374f549df06430da Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 26 Mar 2018 15:08:33 -0700
Subject: llc: properly handle dev_queue_xmit() return value

llc_conn_send_pdu() pushes the skb into write queue and
calls llc_conn_send_pdus() to flush them out. However, the
status of dev_queue_xmit() is not returned to caller,
in this case, llc_conn_state_process().

llc_conn_state_process() needs hold the skb no matter
success or failure, because it still uses it after that,
therefore we should hold skb before dev_queue_xmit() when
that skb is the one being processed by llc_conn_state_process().

For other callers, they can just pass NULL and ignore
the return value as they are.

Reported-by: Noam Rathaus <noamr@beyondsecurity.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_conn.h |  2 +-
 net/llc/llc_c_ac.c     | 15 +++++++++------
 net/llc/llc_conn.c     | 32 +++++++++++++++++++++++---------
 3 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index fe994d2e5286..5c40f118c0fa 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -103,7 +103,7 @@ void llc_sk_reset(struct sock *sk);
 
 /* Access to a connection */
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
 void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
 void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index f59648018060..163121192aca 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -389,7 +389,7 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
 	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		llc_conn_send_pdu(sk, skb);
+		rc = llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -916,7 +916,7 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
 	llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
 	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
 	if (likely(!rc)) {
-		llc_conn_send_pdu(sk, skb);
+		rc = llc_conn_send_pdu(sk, skb);
 		llc_conn_ac_inc_vs_by_1(sk, skb);
 	}
 	return rc;
@@ -935,14 +935,17 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
 int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct llc_sock *llc = llc_sk(sk);
+	int ret;
 
 	if (llc->ack_must_be_send) {
-		llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
+		ret = llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
 		llc->ack_must_be_send = 0 ;
 		llc->ack_pf = 0;
-	} else
-		llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
-	return 0;
+	} else {
+		ret = llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
+	}
+
+	return ret;
 }
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 9177dbb16dce..110e32bcb399 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -30,7 +30,7 @@
 #endif
 
 static int llc_find_offset(int state, int ev_type);
-static void llc_conn_send_pdus(struct sock *sk);
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
 static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
 static int llc_exec_conn_trans_actions(struct sock *sk,
 				       struct llc_conn_state_trans *trans,
@@ -193,11 +193,11 @@ out_skb_put:
 	return rc;
 }
 
-void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
 {
 	/* queue PDU to send to MAC layer */
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	llc_conn_send_pdus(sk);
+	return llc_conn_send_pdus(sk, skb);
 }
 
 /**
@@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk);
+	llc_conn_send_pdus(sk, NULL);
 out:;
 }
 
@@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
 	if (howmany_resend > 0)
 		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
 	/* any PDUs to re-send are queued up; start sending to MAC */
-	llc_conn_send_pdus(sk);
+	llc_conn_send_pdus(sk, NULL);
 out:;
 }
 
@@ -340,12 +340,16 @@ out:
 /**
  *	llc_conn_send_pdus - Sends queued PDUs
  *	@sk: active connection
+ *	@hold_skb: the skb held by caller, or NULL if does not care
  *
- *	Sends queued pdus to MAC layer for transmission.
+ *	Sends queued pdus to MAC layer for transmission. When @hold_skb is
+ *	NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
+ *	successfully, or 1 for failure.
  */
-static void llc_conn_send_pdus(struct sock *sk)
+static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
 {
 	struct sk_buff *skb;
+	int ret = 0;
 
 	while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -357,10 +361,20 @@ static void llc_conn_send_pdus(struct sock *sk)
 			skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
 			if (!skb2)
 				break;
-			skb = skb2;
+			dev_queue_xmit(skb2);
+		} else {
+			bool is_target = skb == hold_skb;
+			int rc;
+
+			if (is_target)
+				skb_get(skb);
+			rc = dev_queue_xmit(skb);
+			if (is_target)
+				ret = rc;
 		}
-		dev_queue_xmit(skb);
 	}
+
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3