From 65c5189a2b57b9aa1d89e4b79da39928257c9505 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Nov 2013 08:57:26 -0800 Subject: pkt_sched: fq: warn users using defrate Commit 7eec4174ff29 ("pkt_sched: fq: fix non TCP flows pacing") obsoleted TCA_FQ_FLOW_DEFAULT_RATE without notice for the users. Suggested by David Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 307f293477e8..885001b62c83 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -763,9 +763,7 @@ enum { TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ - TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, - * use the following rate - */ + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ -- cgit v1.2.3 From f52ed89971adbe79b6438c459814034707b8ab91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Nov 2013 08:58:14 -0800 Subject: pkt_sched: fq: fix pacing for small frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For performance reasons, sch_fq tried hard to not setup timers for every sent packet, using a quantum based heuristic : A delay is setup only if the flow exhausted its credit. Problem is that application limited flows can refill their credit for every queued packet, and they can evade pacing. This problem can also be triggered when TCP flows use small MSS values, as TSO auto sizing builds packets that are smaller than the default fq quantum (3028 bytes) This patch adds a 40 ms delay to guard flow credit refill. Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Cc: Willem de Bruijn Cc: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 3 +++ net/sched/sch_fq.c | 22 ++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 885001b62c83..a806687ad98f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -768,6 +768,9 @@ enum { TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + __TCA_FQ_MAX }; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 26906ab51c52..95d843961907 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -88,6 +88,7 @@ struct fq_sched_data { struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; u32 initial_quantum; + u32 flow_refill_delay; u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ struct rb_root *fq_root; @@ -114,6 +115,7 @@ static struct fq_flow detached, throttled; static void fq_flow_set_detached(struct fq_flow *f) { f->next = &detached; + f->age = jiffies; } static bool fq_flow_is_detached(const struct fq_flow *f) @@ -366,17 +368,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch) } f->qlen++; - flow_queue_add(f, skb); if (skb_is_retransmit(skb)) q->stat_tcp_retrans++; sch->qstats.backlog += qdisc_pkt_len(skb); if (fq_flow_is_detached(f)) { fq_flow_add_tail(&q->new_flows, f); - if (q->quantum > f->credit) - f->credit = q->quantum; + if (time_after(jiffies, f->age + q->flow_refill_delay)) + f->credit = max_t(u32, f->credit, q->quantum); q->inactive_flows--; qdisc_unthrottled(sch); } + + /* Note: this overwrites f->age */ + flow_queue_add(f, skb); + if (unlikely(f == &q->internal)) { q->stat_internal_packets++; qdisc_unthrottled(sch); @@ -454,7 +459,6 @@ begin: fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); - f->age = jiffies; q->inactive_flows++; } goto begin; @@ -608,6 +612,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 }, [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, + [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -664,6 +669,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) err = -EINVAL; } + if (tb[TCA_FQ_FLOW_REFILL_DELAY]) { + u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ; + + q->flow_refill_delay = usecs_to_jiffies(usecs_delay); + } + if (!err) err = fq_resize(q, fq_log); @@ -699,6 +710,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->flow_plimit = 100; q->quantum = 2 * psched_mtu(qdisc_dev(sch)); q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); + q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; q->rate_enable = 1; q->new_flows.first = NULL; @@ -733,6 +745,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || + nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, + jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; -- cgit v1.2.3 From 2ecf7536b2787580616d23b6507005d930975ca0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Nov 2013 15:19:33 +0100 Subject: quota/genetlink: use proper genetlink multicast APIs The quota code is abusing the genetlink API and is using its family ID as the multicast group ID, which is invalid and may belong to somebody else (and likely will.) Make the quota code use the correct API, but since this is already used as-is by userspace, reserve a family ID for this code and also reserve that group ID to not break userspace assumptions. Acked-by: Jan Kara Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- fs/quota/netlink.c | 17 +++++++++++++++-- include/uapi/linux/genetlink.h | 1 + net/netlink/genetlink.c | 10 ++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 16e8abb7709b..aa22fe03b76c 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -11,13 +11,23 @@ /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, + /* + * Needed due to multicast group ID abuse - old code assumed + * the family ID was also a valid multicast group ID (which + * isn't true) and userspace might thus rely on it. Assign a + * static ID for this group to make dealing with that easier. + */ + .id = GENL_ID_VFS_DQUOT, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, .maxattr = QUOTA_NL_A_MAX, }; +static struct genl_multicast_group quota_mcgrp = { + .name = "events", +}; + /** * quota_send_warning - Send warning to userspace about exceeded quota * @type: The quota type: USRQQUOTA, GRPQUOTA,... @@ -78,7 +88,7 @@ void quota_send_warning(struct kqid qid, dev_t dev, goto attr_err_out; genlmsg_end(skb, msg_head); - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + genlmsg_multicast(skb, 0, quota_mcgrp.id, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); @@ -92,6 +102,9 @@ static int __init quota_init(void) if (genl_register_family("a_genl_family) != 0) printk(KERN_ERR "VFS: Failed to create quota netlink interface.\n"); + if (genl_register_mc_group("a_genl_family, "a_mcgrp)) + printk(KERN_ERR + "VFS: Failed to register quota mcast group.\n"); return 0; }; diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index c880a417d8a9..1af72d8228e0 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -27,6 +27,7 @@ struct genlmsghdr { */ #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) /************************************************************************** * Controller diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 353909d46dda..bee91a7527a5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -69,8 +69,11 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * abuses the API and thinks it can statically use group 1. * That group will typically conflict with other groups that * any proper users use. + * Bit 17 is marked as already used since the VFS quota code + * also abused this API and relied on family == group ID, we + * cater to that by giving it a static family and group ID. */ -static unsigned long mc_group_start = 0x3; +static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_VFS_DQUOT); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; @@ -130,7 +133,8 @@ static u16 genl_generate_id(void) int i; for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { - if (!genl_family_find_byid(id_gen_idx)) + if (id_gen_idx != GENL_ID_VFS_DQUOT && + !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) id_gen_idx = GENL_MIN_ID; @@ -169,6 +173,8 @@ int genl_register_mc_group(struct genl_family *family, id = GENL_ID_CTRL; else if (strcmp(family->name, "NET_DM") == 0) id = 1; + else if (strcmp(family->name, "VFS_DQUOT") == 0) + id = GENL_ID_VFS_DQUOT; else id = find_first_zero_bit(mc_groups, mc_groups_longs * BITS_PER_LONG); -- cgit v1.2.3