summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCong Wang <xiyou.wangcong@gmail.com>2019-05-01 19:56:59 -0700
committerDavid S. Miller <davem@davemloft.net>2019-05-04 00:41:41 -0400
commit141b6b2ad75d92770240de3af98d55c41ce7cd18 (patch)
treec6b302bf24d04bd8a3d294d2df858f60d15116e2
parentf3f050a4df355c398f70e3788360a1262ac0c5df (diff)
downloadlwn-141b6b2ad75d92770240de3af98d55c41ce7cd18.tar.gz
lwn-141b6b2ad75d92770240de3af98d55c41ce7cd18.zip
net: add a generic tracepoint for TX queue timeout
Although devlink health report does a nice job on reporting TX timeout and other NIC errors, unfortunately it requires drivers to support it but currently only mlx5 has implemented it. Before other drivers could catch up, it is useful to have a generic tracepoint to monitor this kind of TX timeout. We have been suffering TX timeout with different drivers, we plan to start to monitor it with rasdaemon which just needs a new tracepoint. Sample output: ksoftirqd/1-16 [001] ..s2 144.043173: net_dev_xmit_timeout: dev=ens3 driver=e1000 queue=0 Cc: Eran Ben Elisha <eranbe@mellanox.com> Cc: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/trace/events/net.h23
-rw-r--r--net/sched/sch_generic.c2
2 files changed, 25 insertions, 0 deletions
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 1efd7d9b25fe..2399073c3afc 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -95,6 +95,29 @@ TRACE_EVENT(net_dev_xmit,
__get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
);
+TRACE_EVENT(net_dev_xmit_timeout,
+
+ TP_PROTO(struct net_device *dev,
+ int queue_index),
+
+ TP_ARGS(dev, queue_index),
+
+ TP_STRUCT__entry(
+ __string( name, dev->name )
+ __string( driver, netdev_drivername(dev))
+ __field( int, queue_index )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, dev->name);
+ __assign_str(driver, netdev_drivername(dev));
+ __entry->queue_index = queue_index;
+ ),
+
+ TP_printk("dev=%s driver=%s queue=%d",
+ __get_str(name), __get_str(driver), __entry->queue_index)
+);
+
DECLARE_EVENT_CLASS(net_dev_template,
TP_PROTO(struct sk_buff *skb),
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 848aab3693bd..cce1e9ee85af 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -32,6 +32,7 @@
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <trace/events/qdisc.h>
+#include <trace/events/net.h>
#include <net/xfrm.h>
/* Qdisc to use by default */
@@ -441,6 +442,7 @@ static void dev_watchdog(struct timer_list *t)
}
if (some_queue_timedout) {
+ trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
dev->netdev_ops->ndo_tx_timeout(dev);