diff options
author | Claudiu Manoil <claudiu.manoil@nxp.com> | 2020-07-21 10:55:22 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-07-21 15:38:30 -0700 |
commit | ae0e6a5d16271f3a773bded360fa0eb0bdd25b10 (patch) | |
tree | 87c3f807700e7faf857c0f218d485693d79a277a /drivers/net/ethernet/freescale/enetc/enetc.h | |
parent | 915710812ba0ff11f49aa810025f91bbad8da20a (diff) | |
download | lwn-ae0e6a5d16271f3a773bded360fa0eb0bdd25b10.tar.gz lwn-ae0e6a5d16271f3a773bded360fa0eb0bdd25b10.zip |
enetc: Add adaptive interrupt coalescing
Use the generic dynamic interrupt moderation (dim)
framework to implement adaptive interrupt coalescing
on Rx. With the per-packet interrupt scheme, a high
interrupt rate has been noted for moderate traffic flows
leading to high CPU utilization. The 'dim' scheme
implemented by the current patch addresses this issue
improving CPU utilization while using minimal coalescing
time thresholds in order to preserve a good latency.
On the Tx side use an optimal time threshold value by
default. This value has been optimized for Tx TCP
streams at a rate of around 85kpps on a 1G link,
at which rate half of the Tx ring size (128) gets filled
in 1500 usecs. Scaling this down to 2.5G links yields
the current value of 600 usecs, which is conservative
and gives good enough results for 1G links too (see
next).
Below are some measurement results for before and after
this patch (and related dependencies) basically, for a
2 ARM Cortex-A72 @1.3Ghz CPUs system (32 KB L1 data cache),
using 60secs log netperf TCP stream tests @ 1Gbit link
(maximum throughput):
1) 1 Rx TCP flow, both Rx and Tx processed by the same NAPI
thread on the same CPU:
CPU utilization int rate (ints/sec)
Before: 50%-60% (over 50%) 92k
After: 13%-22% 3.5k-12k
Comment: Major CPU utilization improvement for a single flow
Rx TCP flow (i.e. netperf -t TCP_MAERTS) on a single
CPU. Usually settles under 16% for longer tests.
2) 4 Rx TCP flows + 4 Tx TCP flows (+ pings to check the latency):
Total CPU utilization Total int rate (ints/sec)
Before: ~80% (spikes to 90%) ~100k
After: 60% (more steady) ~4k
Comment: Important improvement for this load test, while the
ping test outcome does not show any notable
difference compared to before.
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/freescale/enetc/enetc.h')
-rw-r--r-- | drivers/net/ethernet/freescale/enetc/enetc.h | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 4e3af7f07892..d309803cfeb6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -10,6 +10,7 @@ #include <linux/ethtool.h> #include <linux/if_vlan.h> #include <linux/phy.h> +#include <linux/dim.h> #include "enetc_hw.h" @@ -194,12 +195,15 @@ struct enetc_int_vector { unsigned long tx_rings_map; int count_tx_rings; u32 rx_ictt; - struct napi_struct napi; + u16 comp_cnt; + bool rx_dim_en, rx_napi_work; + struct napi_struct napi ____cacheline_aligned_in_smp; + struct dim rx_dim ____cacheline_aligned_in_smp; char name[ENETC_INT_NAME_MAX]; struct enetc_bdr rx_ring; struct enetc_bdr tx_ring[]; -}; +} ____cacheline_aligned_in_smp; struct enetc_cls_rule { struct ethtool_rx_flow_spec fs; @@ -230,10 +234,13 @@ enum enetc_ic_mode { /* activated when int coalescing time is set to a non-0 value */ ENETC_IC_RX_MANUAL = BIT(0), ENETC_IC_TX_MANUAL = BIT(1), + /* use dynamic interrupt moderation */ + ENETC_IC_RX_ADAPTIVE = BIT(2), }; #define ENETC_RXIC_PKTTHR min_t(u32, 256, ENETC_RX_RING_DEFAULT_SIZE / 2) #define ENETC_TXIC_PKTTHR min_t(u32, 128, ENETC_TX_RING_DEFAULT_SIZE / 2) +#define ENETC_TXIC_TIMETHR enetc_usecs_to_cycles(600) struct enetc_ndev_priv { struct net_device *ndev; |