summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/sched/sch_cake.c439
1 files changed, 423 insertions, 16 deletions
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 633ca1578114..43eeca81b247 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -296,6 +296,68 @@ static void cobalt_set_enqueue_time(struct sk_buff *skb,
static u16 quantum_div[CAKE_QUEUES + 1] = {0};
+/* Diffserv lookup tables */
+
+static const u8 precedence[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static const u8 diffserv8[] = {
+ 2, 5, 1, 2, 4, 2, 2, 2,
+ 0, 2, 1, 2, 1, 2, 1, 2,
+ 5, 2, 4, 2, 4, 2, 4, 2,
+ 3, 2, 3, 2, 3, 2, 3, 2,
+ 6, 2, 3, 2, 3, 2, 3, 2,
+ 6, 2, 2, 2, 6, 2, 6, 2,
+ 7, 2, 2, 2, 2, 2, 2, 2,
+ 7, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const u8 diffserv4[] = {
+ 0, 2, 0, 0, 2, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0,
+ 2, 0, 2, 0, 2, 0, 2, 0,
+ 3, 0, 2, 0, 2, 0, 2, 0,
+ 3, 0, 0, 0, 3, 0, 3, 0,
+ 3, 0, 0, 0, 0, 0, 0, 0,
+ 3, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 diffserv3[] = {
+ 0, 0, 0, 0, 2, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 2, 0, 2, 0,
+ 2, 0, 0, 0, 0, 0, 0, 0,
+ 2, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 besteffort[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* tin priority order for stats dumping */
+
+static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const u8 bulk_order[] = {1, 0, 2, 3};
+
#define REC_INV_SQRT_CACHE (16)
static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
@@ -1351,20 +1413,91 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data *t,
+static void cake_wash_diffserv(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+ break;
+ case htons(ETH_P_IPV6):
+ ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+ break;
+ default:
+ break;
+ }
+}
+
+static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+{
+ u8 dscp;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ if (wash && dscp)
+ ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+ return dscp;
+
+ case htons(ETH_P_IPV6):
+ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+ if (wash && dscp)
+ ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+ return dscp;
+
+ case htons(ETH_P_ARP):
+ return 0x38; /* CS7 - Net Control */
+
+ default:
+ /* If there is no Diffserv field, treat as best-effort */
+ return 0;
+ }
+}
+
+static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
+ struct sk_buff *skb)
+{
+ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 tin;
+
+ if (TC_H_MAJ(skb->priority) == sch->handle &&
+ TC_H_MIN(skb->priority) > 0 &&
+ TC_H_MIN(skb->priority) <= q->tin_cnt) {
+ tin = TC_H_MIN(skb->priority) - 1;
+
+ if (q->rate_flags & CAKE_FLAG_WASH)
+ cake_wash_diffserv(skb);
+ } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
+ /* extract the Diffserv Precedence field, if it exists */
+ /* and clear DSCP bits if washing */
+ tin = q->tin_index[cake_handle_diffserv(skb,
+ q->rate_flags & CAKE_FLAG_WASH)];
+ if (unlikely(tin >= q->tin_cnt))
+ tin = 0;
+ } else {
+ tin = 0;
+ if (q->rate_flags & CAKE_FLAG_WASH)
+ cake_wash_diffserv(skb);
+ }
+
+ return &q->tins[tin];
+}
+
+static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
struct sk_buff *skb, int flow_mode, int *qerr)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct tcf_proto *filter;
struct tcf_result res;
+ u32 flow = 0;
int result;
filter = rcu_dereference_bh(q->filter_list);
if (!filter)
- return cake_hash(t, skb, flow_mode) + 1;
+ goto hash;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tcf_classify(skb, filter, &res, false);
+
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -1378,9 +1511,11 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data *t,
}
#endif
if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
- return TC_H_MIN(res.classid);
+ flow = TC_H_MIN(res.classid);
}
- return 0;
+hash:
+ *t = cake_select_tin(sch, skb);
+ return flow ?: cake_hash(*t, skb, flow_mode) + 1;
}
static void cake_reconfigure(struct Qdisc *sch);
@@ -1395,13 +1530,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
ktime_t now = ktime_get();
struct cake_tin_data *b;
struct cake_flow *flow;
- u32 idx, tin;
-
- tin = 0;
- b = &q->tins[tin];
+ u32 idx;
/* choose flow to insert into */
- idx = cake_classify(sch, b, skb, q->flow_mode, &ret);
+ idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
if (idx == 0) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
@@ -1917,18 +2049,275 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->cparams.p_dec = 1 << 20; /* 1/4096 */
}
-static void cake_reconfigure(struct Qdisc *sch)
+static int cake_config_besteffort(struct Qdisc *sch)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[0];
- int c, ft = 0;
+ u32 mtu = psched_mtu(qdisc_dev(sch));
+ u64 rate = q->rate_bps;
q->tin_cnt = 1;
- cake_set_rate(b, q->rate_bps, psched_mtu(qdisc_dev(sch)),
+
+ q->tin_index = besteffort;
+ q->tin_order = normal_order;
+
+ cake_set_rate(b, rate, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
b->tin_quantum_band = 65535;
b->tin_quantum_prio = 65535;
+ return 0;
+}
+
+static int cake_config_precedence(struct Qdisc *sch)
+{
+ /* convert high-level (user visible) parameters into internal format */
+ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 mtu = psched_mtu(qdisc_dev(sch));
+ u64 rate = q->rate_bps;
+ u32 quantum1 = 256;
+ u32 quantum2 = 256;
+ u32 i;
+
+ q->tin_cnt = 8;
+ q->tin_index = precedence;
+ q->tin_order = normal_order;
+
+ for (i = 0; i < q->tin_cnt; i++) {
+ struct cake_tin_data *b = &q->tins[i];
+
+ cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+ us_to_ns(q->interval));
+
+ b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+ b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+ /* calculate next class's parameters */
+ rate *= 7;
+ rate >>= 3;
+
+ quantum1 *= 3;
+ quantum1 >>= 1;
+
+ quantum2 *= 7;
+ quantum2 >>= 3;
+ }
+
+ return 0;
+}
+
+/* List of known Diffserv codepoints:
+ *
+ * Least Effort (CS1)
+ * Best Effort (CS0)
+ * Max Reliability & LLT "Lo" (TOS1)
+ * Max Throughput (TOS2)
+ * Min Delay (TOS4)
+ * LLT "La" (TOS5)
+ * Assured Forwarding 1 (AF1x) - x3
+ * Assured Forwarding 2 (AF2x) - x3
+ * Assured Forwarding 3 (AF3x) - x3
+ * Assured Forwarding 4 (AF4x) - x3
+ * Precedence Class 2 (CS2)
+ * Precedence Class 3 (CS3)
+ * Precedence Class 4 (CS4)
+ * Precedence Class 5 (CS5)
+ * Precedence Class 6 (CS6)
+ * Precedence Class 7 (CS7)
+ * Voice Admit (VA)
+ * Expedited Forwarding (EF)
+
+ * Total 25 codepoints.
+ */
+
+/* List of traffic classes in RFC 4594:
+ * (roughly descending order of contended priority)
+ * (roughly ascending order of uncontended throughput)
+ *
+ * Network Control (CS6,CS7) - routing traffic
+ * Telephony (EF,VA) - aka. VoIP streams
+ * Signalling (CS5) - VoIP setup
+ * Multimedia Conferencing (AF4x) - aka. video calls
+ * Realtime Interactive (CS4) - eg. games
+ * Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch
+ * Broadcast Video (CS3)
+ * Low Latency Data (AF2x,TOS4) - eg. database
+ * Ops, Admin, Management (CS2,TOS1) - eg. ssh
+ * Standard Service (CS0 & unrecognised codepoints)
+ * High Throughput Data (AF1x,TOS2) - eg. web traffic
+ * Low Priority Data (CS1) - eg. BitTorrent
+
+ * Total 12 traffic classes.
+ */
+
+static int cake_config_diffserv8(struct Qdisc *sch)
+{
+/* Pruned list of traffic classes for typical applications:
+ *
+ * Network Control (CS6, CS7)
+ * Minimum Latency (EF, VA, CS5, CS4)
+ * Interactive Shell (CS2, TOS1)
+ * Low Latency Transactions (AF2x, TOS4)
+ * Video Streaming (AF4x, AF3x, CS3)
+ * Bog Standard (CS0 etc.)
+ * High Throughput (AF1x, TOS2)
+ * Background Traffic (CS1)
+ *
+ * Total 8 traffic classes.
+ */
+
+ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 mtu = psched_mtu(qdisc_dev(sch));
+ u64 rate = q->rate_bps;
+ u32 quantum1 = 256;
+ u32 quantum2 = 256;
+ u32 i;
+
+ q->tin_cnt = 8;
+
+ /* codepoint to class mapping */
+ q->tin_index = diffserv8;
+ q->tin_order = normal_order;
+
+ /* class characteristics */
+ for (i = 0; i < q->tin_cnt; i++) {
+ struct cake_tin_data *b = &q->tins[i];
+
+ cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+ us_to_ns(q->interval));
+
+ b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+ b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+ /* calculate next class's parameters */
+ rate *= 7;
+ rate >>= 3;
+
+ quantum1 *= 3;
+ quantum1 >>= 1;
+
+ quantum2 *= 7;
+ quantum2 >>= 3;
+ }
+
+ return 0;
+}
+
+static int cake_config_diffserv4(struct Qdisc *sch)
+{
+/* Further pruned list of traffic classes for four-class system:
+ *
+ * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4)
+ * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
+ * Best Effort (CS0, AF1x, TOS2, and those not specified)
+ * Background Traffic (CS1)
+ *
+ * Total 4 traffic classes.
+ */
+
+ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 mtu = psched_mtu(qdisc_dev(sch));
+ u64 rate = q->rate_bps;
+ u32 quantum = 1024;
+
+ q->tin_cnt = 4;
+
+ /* codepoint to class mapping */
+ q->tin_index = diffserv4;
+ q->tin_order = bulk_order;
+
+ /* class characteristics */
+ cake_set_rate(&q->tins[0], rate, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+ cake_set_rate(&q->tins[1], rate >> 4, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+ cake_set_rate(&q->tins[2], rate >> 1, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+ cake_set_rate(&q->tins[3], rate >> 2, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+
+ /* priority weights */
+ q->tins[0].tin_quantum_prio = quantum;
+ q->tins[1].tin_quantum_prio = quantum >> 4;
+ q->tins[2].tin_quantum_prio = quantum << 2;
+ q->tins[3].tin_quantum_prio = quantum << 4;
+
+ /* bandwidth-sharing weights */
+ q->tins[0].tin_quantum_band = quantum;
+ q->tins[1].tin_quantum_band = quantum >> 4;
+ q->tins[2].tin_quantum_band = quantum >> 1;
+ q->tins[3].tin_quantum_band = quantum >> 2;
+
+ return 0;
+}
+
+static int cake_config_diffserv3(struct Qdisc *sch)
+{
+/* Simplified Diffserv structure with 3 tins.
+ * Low Priority (CS1)
+ * Best Effort
+ * Latency Sensitive (TOS4, VA, EF, CS6, CS7)
+ */
+ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 mtu = psched_mtu(qdisc_dev(sch));
+ u64 rate = q->rate_bps;
+ u32 quantum = 1024;
+
+ q->tin_cnt = 3;
+
+ /* codepoint to class mapping */
+ q->tin_index = diffserv3;
+ q->tin_order = bulk_order;
+
+ /* class characteristics */
+ cake_set_rate(&q->tins[0], rate, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+ cake_set_rate(&q->tins[1], rate >> 4, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+ cake_set_rate(&q->tins[2], rate >> 2, mtu,
+ us_to_ns(q->target), us_to_ns(q->interval));
+
+ /* priority weights */
+ q->tins[0].tin_quantum_prio = quantum;
+ q->tins[1].tin_quantum_prio = quantum >> 4;
+ q->tins[2].tin_quantum_prio = quantum << 4;
+
+ /* bandwidth-sharing weights */
+ q->tins[0].tin_quantum_band = quantum;
+ q->tins[1].tin_quantum_band = quantum >> 4;
+ q->tins[2].tin_quantum_band = quantum >> 2;
+
+ return 0;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+ struct cake_sched_data *q = qdisc_priv(sch);
+ int c, ft;
+
+ switch (q->tin_mode) {
+ case CAKE_DIFFSERV_BESTEFFORT:
+ ft = cake_config_besteffort(sch);
+ break;
+
+ case CAKE_DIFFSERV_PRECEDENCE:
+ ft = cake_config_precedence(sch);
+ break;
+
+ case CAKE_DIFFSERV_DIFFSERV8:
+ ft = cake_config_diffserv8(sch);
+ break;
+
+ case CAKE_DIFFSERV_DIFFSERV4:
+ ft = cake_config_diffserv4(sch);
+ break;
+
+ case CAKE_DIFFSERV_DIFFSERV3:
+ default:
+ ft = cake_config_diffserv3(sch);
+ break;
+ }
+
for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
cake_clear_tin(sch, c);
q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
@@ -1984,6 +2373,16 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_CAKE_BASE_RATE64])
q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+ if (tb[TCA_CAKE_DIFFSERV_MODE])
+ q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+
+ if (tb[TCA_CAKE_WASH]) {
+ if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
+ q->rate_flags |= CAKE_FLAG_WASH;
+ else
+ q->rate_flags &= ~CAKE_FLAG_WASH;
+ }
+
if (tb[TCA_CAKE_FLOW_MODE])
q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
@@ -2048,7 +2447,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
int i, j, err;
sch->limit = 10240;
- q->tin_mode = CAKE_DIFFSERV_BESTEFFORT;
+ q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
q->flow_mode = CAKE_FLOW_TRIPLE;
q->rate_bps = 0; /* unlimited by default */
@@ -2158,6 +2557,13 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_CAKE_WASH,
+ !!(q->rate_flags & CAKE_FLAG_WASH)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -2211,7 +2617,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
} while (0)
for (i = 0; i < q->tin_cnt; i++) {
- struct cake_tin_data *b = &q->tins[i];
+ struct cake_tin_data *b = &q->tins[q->tin_order[i]];
ts = nla_nest_start(d->skb, i + 1);
if (!ts)
@@ -2310,7 +2716,8 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
u32 idx = cl - 1;
if (idx < CAKE_QUEUES * q->tin_cnt) {
- const struct cake_tin_data *b = &q->tins[idx / CAKE_QUEUES];
+ const struct cake_tin_data *b = \
+ &q->tins[q->tin_order[idx / CAKE_QUEUES]];
const struct sk_buff *skb;
flow = &b->flows[idx % CAKE_QUEUES];
@@ -2382,7 +2789,7 @@ static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < q->tin_cnt; i++) {
- struct cake_tin_data *b = &q->tins[i];
+ struct cake_tin_data *b = &q->tins[q->tin_order[i]];
for (j = 0; j < CAKE_QUEUES; j++) {
if (list_empty(&b->flows[j].flowchain) ||