mac80211: implement fair queueing per txq

mac80211's software queues were designed to work very closely with device tx queues. They are required to make use of 802.11 packet aggregation easily and efficiently. Due to the way 802.11 aggregation is designed it only makes sense to keep fair queuing as close to hardware as possible to reduce induced latency and inertia and provide the best flow responsiveness. This change doesn't translate directly to immediate and significant gains. End result depends on driver's induced latency. Best results can be achieved if driver keeps its own tx queue/fifo fill level to a minimum. Signed-off-by: Michal Kazior <michal.kazior@tieto.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
author: Michal Kazior <michal.kazior@tieto.com> 2016-05-19 10:37:49 +0200
committer: Johannes Berg <johannes.berg@intel.com> 2016-06-09 11:34:08 +0200
commit: fa962b92120bb70693a4db545f89067eb3373294 (patch)
tree: c8e159c99501f312756ad295088c884ef2a2361c /net/mac80211/tx.c
parent: 80a83cfc434b1e3afe38974570b460db4898bec6 (diff)
download: lwn-fa962b92120bb70693a4db545f89067eb3373294.tar.gz
lwn-fa962b92120bb70693a4db545f89067eb3373294.zip
1 files changed, 115 insertions, 21 deletions
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3e77da195ce8..1d8343fca6d4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -25,6 +25,7 @@
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
 #include <asm/unaligned.h>
+#include <net/fq_impl.h>
 
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -1266,46 +1267,121 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 	return to_txq_info(txq);
 }
 
+static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
+					   struct fq_tin *tin,
+					   struct fq_flow *flow)
+{
+	return fq_flow_dequeue(fq, flow);
+}
+
+static void fq_skb_free_func(struct fq *fq,
+			     struct fq_tin *tin,
+			     struct fq_flow *flow,
+			     struct sk_buff *skb)
+{
+	struct ieee80211_local *local;
+
+	local = container_of(fq, struct ieee80211_local, fq);
+	ieee80211_free_txskb(&local->hw, skb);
+}
+
+static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
+						struct fq_tin *tin,
+						int idx,
+						struct sk_buff *skb)
+{
+	struct txq_info *txqi;
+
+	txqi = container_of(tin, struct txq_info, tin);
+	return &txqi->def_flow;
+}
+
 static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 				  struct txq_info *txqi,
 				  struct sk_buff *skb)
 {
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txqi->txq.vif);
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
 
-	lockdep_assert_held(&txqi->queue.lock);
+	fq_tin_enqueue(fq, tin, skb,
+		       fq_skb_free_func,
+		       fq_flow_get_default_func);
+}
 
-	if (atomic_read(&sdata->num_tx_queued) >= TOTAL_MAX_TX_BUFFER ||
-	    txqi->queue.qlen >= STA_MAX_TX_BUFFER) {
-		ieee80211_free_txskb(&local->hw, skb);
-		return;
+void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
+			struct sta_info *sta,
+			struct txq_info *txqi, int tid)
+{
+	fq_tin_init(&txqi->tin);
+	fq_flow_init(&txqi->def_flow);
+
+	txqi->txq.vif = &sdata->vif;
+
+	if (sta) {
+		txqi->txq.sta = &sta->sta;
+		sta->sta.txq[tid] = &txqi->txq;
+		txqi->txq.tid = tid;
+		txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
+	} else {
+		sdata->vif.txq = &txqi->txq;
+		txqi->txq.tid = 0;
+		txqi->txq.ac = IEEE80211_AC_BE;
 	}
+}
 
-	atomic_inc(&sdata->num_tx_queued);
-	txqi->byte_cnt += skb->len;
-	__skb_queue_tail(&txqi->queue, skb);
+void ieee80211_txq_purge(struct ieee80211_local *local,
+			 struct txq_info *txqi)
+{
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
+
+	fq_tin_reset(fq, tin, fq_skb_free_func);
+}
+
+int ieee80211_txq_setup_flows(struct ieee80211_local *local)
+{
+	struct fq *fq = &local->fq;
+	int ret;
+
+	if (!local->ops->wake_tx_queue)
+		return 0;
+
+	ret = fq_init(fq, 4096);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
+{
+	struct fq *fq = &local->fq;
+
+	if (!local->ops->wake_tx_queue)
+		return;
+
+	fq_reset(fq, fq_skb_free_func);
 }
 
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 				     struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
 	struct txq_info *txqi = container_of(txq, struct txq_info, txq);
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *skb = NULL;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
 
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
 
 	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
 		goto out;
 
-	skb = __skb_dequeue(&txqi->queue);
+	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
 	if (!skb)
 		goto out;
 
-	atomic_dec(&sdata->num_tx_queued);
-	txqi->byte_cnt -= skb->len;
-
 	hdr = (struct ieee80211_hdr *)skb->data;
 	if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
 		struct sta_info *sta = container_of(txq->sta, struct sta_info,
@@ -1320,7 +1396,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	}
 
 out:
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 
 	if (skb && skb_has_frag_list(skb) &&
 	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
@@ -1337,6 +1413,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 			       bool txpending)
 {
 	struct ieee80211_tx_control control = {};
+	struct fq *fq = &local->fq;
 	struct sk_buff *skb, *tmp;
 	struct txq_info *txqi;
 	unsigned long flags;
@@ -1359,9 +1436,9 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 
 			__skb_unlink(skb, skbs);
 
-			spin_lock_bh(&txqi->queue.lock);
+			spin_lock_bh(&fq->lock);
 			ieee80211_txq_enqueue(local, txqi, skb);
-			spin_unlock_bh(&txqi->queue.lock);
+			spin_unlock_bh(&fq->lock);
 
 			drv_wake_tx_queue(local, txqi);
 
@@ -2893,6 +2970,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 				      struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin;
+	struct fq_flow *flow;
 	u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 	struct ieee80211_txq *txq = sta->sta.txq[tid];
 	struct txq_info *txqi;
@@ -2904,6 +2984,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	__be16 len;
 	void *data;
 	bool ret = false;
+	unsigned int orig_len;
 	int n = 1, nfrags;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
@@ -2920,12 +3001,20 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 		max_amsdu_len = min_t(int, max_amsdu_len,
 				      sta->sta.max_rc_amsdu_len);
 
-	spin_lock_bh(&txqi->queue.lock);
+	spin_lock_bh(&fq->lock);
 
-	head = skb_peek_tail(&txqi->queue);
+	/* TODO: Ideally aggregation should be done on dequeue to remain
+	 * responsive to environment changes.
+	 */
+
+	tin = &txqi->tin;
+	flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
+	head = skb_peek_tail(&flow->queue);
 	if (!head)
 		goto out;
 
+	orig_len = head->len;
+
 	if (skb->len + head->len > max_amsdu_len)
 		goto out;
 
@@ -2964,8 +3053,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	head->data_len += skb->len;
 	*frag_tail = skb;
 
+	flow->backlog += head->len - orig_len;
+	tin->backlog_bytes += head->len - orig_len;
+
+	fq_recalc_backlog(fq, tin, flow);
+
 out:
-	spin_unlock_bh(&txqi->queue.lock);
+	spin_unlock_bh(&fq->lock);
 
 	return ret;
 }
author	Michal Kazior <michal.kazior@tieto.com>	2016-05-19 10:37:49 +0200
committer	Johannes Berg <johannes.berg@intel.com>	2016-06-09 11:34:08 +0200
commit	fa962b92120bb70693a4db545f89067eb3373294 (patch)
tree	c8e159c99501f312756ad295088c884ef2a2361c /net/mac80211/tx.c
parent	80a83cfc434b1e3afe38974570b460db4898bec6 (diff)
download	lwn-fa962b92120bb70693a4db545f89067eb3373294.tar.gz lwn-fa962b92120bb70693a4db545f89067eb3373294.zip