diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/Kconfig | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 277 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 34 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 195 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 126 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 274 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/fw.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/fw.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/intf.c | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/main.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mcg.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 72 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mr.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/port.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 22 |
15 files changed, 882 insertions, 157 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 9ca3734ebb6b..5098e7f21987 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -24,13 +24,6 @@ config MLX4_EN_DCB If unsure, set to Y -config MLX4_EN_VXLAN - bool "VXLAN offloads Support" - default y - depends on MLX4_EN && VXLAN && !(MLX4_EN=y && VXLAN=m) - ---help--- - Say Y here if you want to use VXLAN offloads in the driver. - config MLX4_CORE tristate depends on PCI diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index f01918c63f28..99c6bbdff501 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -37,6 +37,11 @@ #include "mlx4_en.h" #include "fw_qos.h" +enum { + MLX4_CEE_STATE_DOWN = 0, + MLX4_CEE_STATE_UP = 1, +}; + /* Definitions for QCN */ @@ -80,13 +85,202 @@ struct mlx4_congestion_control_mb_prio_802_1_qau_statistics { __be32 reserved3[4]; }; +static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + switch (capid) { + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->cee_params.dcbx_cap; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx4_max_tc(priv->mdev->dev); + break; + default: + *cap = false; + break; + } + + return 0; +} + +static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + return priv->cee_params.dcb_cfg.pfc_state; +} + +static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_params.dcb_cfg.pfc_state = state; +} + +static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, + u8 *setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + *setting = priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc; +} + +static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, + u8 setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc = setting; + priv->cee_params.dcb_cfg.pfc_state = true; +} + +static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return -EINVAL; + + if (tcid == DCB_NUMTCS_ATTR_PFC) + *num = mlx4_max_tc(priv->mdev->dev); + else + *num = 0; + + return 0; +} + +static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg; + int err = 0; + + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + if (dcb_cfg->pfc_state) { + int tc; + + priv->prof->rx_pause = 0; + priv->prof->tx_pause = 0; + for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) { + u8 tc_mask = 1 << tc; + + switch (dcb_cfg->tc_config[tc].dcb_pfc) { + case pfc_disabled: + priv->prof->tx_ppp &= ~tc_mask; + priv->prof->rx_ppp &= ~tc_mask; + break; + case pfc_enabled_full: + priv->prof->tx_ppp |= tc_mask; + priv->prof->rx_ppp |= tc_mask; + break; + case pfc_enabled_tx: + priv->prof->tx_ppp |= tc_mask; + priv->prof->rx_ppp &= ~tc_mask; + break; + case pfc_enabled_rx: + priv->prof->tx_ppp &= ~tc_mask; + priv->prof->rx_ppp |= tc_mask; + break; + default: + break; + } + } + en_dbg(DRV, priv, "Set pfc on\n"); + } else { + priv->prof->rx_pause = 1; + priv->prof->tx_pause = 1; + en_dbg(DRV, priv, "Set pfc off\n"); + } + + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + if (err) + en_err(priv, "Failed setting pause params\n"); + return err; +} + +static u8 mlx4_en_dcbnl_get_state(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED) + return MLX4_CEE_STATE_UP; + + return MLX4_CEE_STATE_DOWN; +} + +static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int num_tcs = 0; + + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 1; + + if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return 0; + + if (state) { + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + num_tcs = IEEE_8021QAZ_MAX_TCS; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + } + + return mlx4_en_setup_tc(dev, num_tcs); +} + +/* On success returns a non-zero 802.1p user priority bitmap + * otherwise returns 0 as the invalid user priority bitmap to + * indicate an error. + */ +static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 0; + + return dcb_getapp(netdev, &app); +} + +static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype, + u16 id, u8 up) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app; + + if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + memset(&app, 0, sizeof(struct dcb_app)); + app.selector = idtype; + app.protocol = id; + app.priority = up; + + return dcb_setapp(netdev, &app); +} + static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { struct mlx4_en_priv *priv = netdev_priv(dev); struct ieee_ets *my_ets = &priv->ets; - /* No IEEE PFC settings available */ if (!my_ets) return -EINVAL; @@ -237,18 +431,51 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) { - return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->cee_params.dcbx_cap; } static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) { + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ieee_ets ets = {0}; + struct ieee_pfc pfc = {0}; + + if (mode == priv->cee_params.dcbx_cap) + return 0; + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || - (mode & DCB_CAP_DCBX_VER_CEE) || - !(mode & DCB_CAP_DCBX_VER_IEEE) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && + (mode & DCB_CAP_DCBX_VER_CEE)) || !(mode & DCB_CAP_DCBX_HOST)) - return 1; + goto err; + + priv->cee_params.dcbx_cap = mode; + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS; + + if (mode & DCB_CAP_DCBX_VER_IEEE) { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + } else if (mode & DCB_CAP_DCBX_VER_CEE) { + if (mlx4_en_dcbnl_set_all(dev)) + goto err; + } else { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + if (mlx4_en_setup_tc(dev, 0)) + goto err; + } return 0; +err: + return 1; } #define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */ @@ -463,24 +690,46 @@ static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev, } const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { - .ieee_getets = mlx4_en_dcbnl_ieee_getets, - .ieee_setets = mlx4_en_dcbnl_ieee_setets, - .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, - .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, - .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, - .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + .ieee_getets = mlx4_en_dcbnl_ieee_getets, + .ieee_setets = mlx4_en_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, + .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, + .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, + .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getstate = mlx4_en_dcbnl_get_state, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getcap = mlx4_en_dcbnl_getcap, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, - .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, - .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, - .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, }; const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = { .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, + .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 44cf16d01f42..bdda17d2ea0f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1112,7 +1112,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - return priv->rx_ring_num; + return rounddown_pow_of_two(priv->rx_ring_num); } static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) @@ -1146,19 +1146,17 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, u8 *hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rss_map *rss_map = &priv->rss_map; - int rss_rings; - size_t n = priv->rx_ring_num; + u32 n = mlx4_en_get_rxfh_indir_size(dev); + u32 i, rss_rings; int err = 0; - rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; - rss_rings = 1 << ilog2(rss_rings); + rss_rings = priv->prof->rss_rings ?: n; + rss_rings = rounddown_pow_of_two(rss_rings); - while (n--) { + for (i = 0; i < n; i++) { if (!ring_index) break; - ring_index[n] = rss_map->qps[n % rss_rings].qpn - - rss_map->base_qpn; + ring_index[i] = i % rss_rings; } if (key) memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); @@ -1171,6 +1169,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, const u8 *key, const u8 hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); + u32 n = mlx4_en_get_rxfh_indir_size(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; @@ -1180,18 +1179,18 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, /* Calculate RSS table size and make sure flows are spread evenly * between rings */ - for (i = 0; i < priv->rx_ring_num; i++) { + for (i = 0; i < n; i++) { if (!ring_index) - continue; + break; if (i > 0 && !ring_index[i] && !rss_rings) rss_rings = i; - if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num))) + if (ring_index[i] != (i % (rss_rings ?: n))) return -EINVAL; } if (!rss_rings) - rss_rings = priv->rx_ring_num; + rss_rings = n; /* RSS table size must be an order of 2 */ if (!is_power_of_2(rss_rings)) @@ -1730,6 +1729,12 @@ static int mlx4_en_set_channels(struct net_device *dev, !channel->tx_count || !channel->rx_count) return -EINVAL; + if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) { + en_err(priv, "Minimum %d tx channels required with XDP on\n", + priv->xdp_ring_num / MLX4_EN_NUM_UP + 1); + return -EINVAL; + } + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) return -ENOMEM; @@ -1751,7 +1756,8 @@ static int mlx4_en_set_channels(struct net_device *dev, mlx4_en_safe_replace_resources(priv, tmp); - netif_set_real_num_tx_queues(dev, priv->tx_ring_num); + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); if (dev->num_tc) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 8359e9e51b3b..4198e9bf89d0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -31,6 +31,7 @@ * */ +#include <linux/bpf.h> #include <linux/etherdevice.h> #include <linux/tcp.h> #include <linux/if_vlan.h> @@ -67,6 +68,17 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) offset += priv->num_tx_rings_p_up; } +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) { + if (up) { + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + priv->cee_params.dcb_cfg.pfc_state = false; + } + } +#endif /* CONFIG_MLX4_EN_DCB */ + return 0; } @@ -1201,8 +1213,8 @@ static void mlx4_en_netpoll(struct net_device *dev) struct mlx4_en_cq *cq; int i; - for (i = 0; i < priv->rx_ring_num; i++) { - cq = priv->rx_cq[i]; + for (i = 0; i < priv->tx_ring_num; i++) { + cq = priv->tx_cq[i]; napi_schedule(&cq->napi); } } @@ -1510,6 +1522,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); } +static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv, + int tx_ring_idx) +{ + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx]; + int rr_index; + + rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx; + if (rr_index >= 0) { + tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; + tx_ring->recycle_ring = priv->rx_ring[rr_index]; + en_dbg(DRV, priv, + "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n", + tx_ring_idx, rr_index); + } else { + tx_ring->recycle_ring = NULL; + } +} + int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1632,6 +1662,8 @@ int mlx4_en_start_port(struct net_device *dev) } tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + mlx4_en_init_recycle_ring(priv, i); + /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); @@ -1696,10 +1728,9 @@ int mlx4_en_start_port(struct net_device *dev) /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); -#ifdef CONFIG_MLX4_EN_VXLAN if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) - vxlan_get_rx_port(dev); -#endif + udp_tunnel_get_rx_info(dev); + priv->port_up = true; netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -2177,6 +2208,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } + if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { + en_err(priv, "MTU size:%d requires frags but XDP running\n", + new_mtu); + return -EOPNOTSUPP; + } dev->mtu = new_mtu; if (netif_running(dev)) { @@ -2434,7 +2470,6 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev, return 0; } -#ifdef CONFIG_MLX4_EN_VXLAN static void mlx4_en_add_vxlan_offloads(struct work_struct *work) { int ret; @@ -2484,15 +2519,19 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) } static void mlx4_en_add_vxlan_port(struct net_device *dev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 port = ti->port; __be16 current_port; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + if (ti->sa_family != AF_INET) return; - if (sa_family == AF_INET6) + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; current_port = priv->vxlan_port; @@ -2507,15 +2546,19 @@ static void mlx4_en_add_vxlan_port(struct net_device *dev, } static void mlx4_en_del_vxlan_port(struct net_device *dev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 port = ti->port; __be16 current_port; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) return; - if (sa_family == AF_INET6) + if (ti->sa_family != AF_INET) + return; + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; current_port = priv->vxlan_port; @@ -2550,7 +2593,6 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, return features; } -#endif static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate) { @@ -2579,6 +2621,103 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m return err; } +static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct bpf_prog *old_prog; + int xdp_ring_num; + int port_up = 0; + int err; + int i; + + xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0; + + /* No need to reconfigure buffers when simply swapping the + * program for a new one. + */ + if (priv->xdp_ring_num == xdp_ring_num) { + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + for (i = 0; i < priv->rx_ring_num; i++) { + /* This xchg is paired with READ_ONCE in the fastpath */ + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + return 0; + } + + if (priv->num_frags > 1) { + en_err(priv, "Cannot set XDP if MTU requires multiple frags\n"); + return -EOPNOTSUPP; + } + + if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) { + en_err(priv, + "Minimum %d tx channels required to run XDP\n", + (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP); + return -EINVAL; + } + + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + priv->xdp_ring_num = xdp_ring_num; + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); + + for (i = 0; i < priv->rx_ring_num; i++) { + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port %d for XDP change\n", + priv->port); + queue_work(mdev->workqueue, &priv->watchdog_task); + } + } + + mutex_unlock(&mdev->state_lock); + return 0; +} + +static bool mlx4_xdp_attached(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return !!priv->xdp_ring_num; +} + +static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx4_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx4_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2603,12 +2742,11 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, -#ifdef CONFIG_MLX4_EN_VXLAN - .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, - .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, + .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, -#endif .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2641,12 +2779,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, -#ifdef CONFIG_MLX4_EN_VXLAN - .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, - .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, + .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, -#endif .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; struct mlx4_en_bond { @@ -2911,6 +3048,9 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_priv *priv; int i; int err; +#ifdef CONFIG_MLX4_EN_DCB + struct tc_configuration *tc; +#endif dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), MAX_TX_RINGS, MAX_RX_RINGS); @@ -2936,10 +3076,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); -#ifdef CONFIG_MLX4_EN_VXLAN INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); -#endif #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); @@ -2979,6 +3117,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->msg_enable = MLX4_EN_MSG_LEVEL; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { + priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + priv->flags |= MLX4_EN_DCB_ENABLED; + priv->cee_params.dcb_cfg.pfc_state = false; + + for (i = 0; i < MLX4_EN_NUM_UP; i++) { + tc = &priv->cee_params.dcb_cfg.tc_config[i]; + tc->dcb_pfc = pfc_disabled; + } + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 99b5407f2278..2040dad8611d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -32,6 +32,7 @@ */ #include <net/busy_poll.h> +#include <linux/bpf.h> #include <linux/mlx4/cq.h> #include <linux/slab.h> #include <linux/mlx4/qp.h> @@ -57,7 +58,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, struct page *page; dma_addr_t dma; - for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) { + for (order = frag_info->order; ;) { gfp_t gfp = _gfp; if (order) @@ -70,7 +71,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, return -ENOMEM; } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (dma_mapping_error(priv->ddev, dma)) { put_page(page); return -ENOMEM; @@ -124,7 +125,8 @@ out: while (i--) { if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, PCI_DMA_FROMDEVICE); + page_alloc[i].page_size, + priv->frag_info[i].dma_dir); page = page_alloc[i].page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc[i].page_size / @@ -145,7 +147,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (frags[i].page) put_page(frags[i].page); @@ -176,7 +178,8 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, + priv->frag_info[i].dma_dir); page = page_alloc->page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc->page_size / @@ -201,7 +204,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, frag_info->dma_dir); while (page_alloc->page_offset + frag_info->frag_stride < page_alloc->page_size) { put_page(page_alloc->page); @@ -244,6 +247,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); + if (ring->page_cache.index > 0) { + frags[0] = ring->page_cache.buf[--ring->page_cache.index]; + rx_desc->data[0].addr = cpu_to_be64(frags[0].dma); + return 0; + } + return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); } @@ -502,13 +511,35 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) } } +/* When the rx ring is running in page-per-packet mode, a released frame can go + * directly into a small cache, to avoid unmapping or touching the page + * allocator. In bpf prog performance scenarios, buffers are either forwarded + * or dropped, never converted to skbs, so every page can come directly from + * this cache when it is sized to be a multiple of the napi budget. + */ +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame) +{ + struct mlx4_en_page_cache *cache = &ring->page_cache; + + if (cache->index >= MLX4_EN_CACHE_SIZE) + return false; + + cache->buf[cache->index++] = *frame; + return true; +} + void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; + struct bpf_prog *old_prog; + old_prog = READ_ONCE(ring->xdp_prog); + if (old_prog) + bpf_prog_put(old_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); ring->rx_info = NULL; @@ -519,6 +550,16 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { + int i; + + for (i = 0; i < ring->page_cache.index; i++) { + struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; + + dma_unmap_page(priv->ddev, frame->dma, frame->page_size, + priv->frag_info[0].dma_dir); + put_page(frame->page); + } + ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -740,7 +781,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; struct mlx4_en_rx_desc *rx_desc; + struct bpf_prog *xdp_prog; + int doorbell_pending; struct sk_buff *skb; + int tx_index; int index; int nr; unsigned int length; @@ -756,6 +800,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (budget <= 0) return polled; + xdp_prog = READ_ONCE(ring->xdp_prog); + doorbell_pending = 0; + tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ @@ -832,6 +880,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + /* A bpf program gets first chance to drop the packet. It may + * read bytes but not past the end of the frag. + */ + if (xdp_prog) { + struct xdp_buff xdp; + dma_addr_t dma; + u32 act; + + dma = be64_to_cpu(rx_desc->data[0].addr); + dma_sync_single_for_cpu(priv->ddev, dma, + priv->frag_info[0].frag_size, + DMA_FROM_DEVICE); + + xdp.data = page_address(frags[0].page) + + frags[0].page_offset; + xdp.data_end = xdp.data + length; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + if (!mlx4_en_xmit_frame(frags, dev, + length, tx_index, + &doorbell_pending)) + goto consumed; + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + if (mlx4_en_rx_recycle(ring, frags)) + goto consumed; + goto next; + } + } + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { @@ -983,6 +1068,7 @@ next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); +consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; @@ -991,6 +1077,9 @@ next: } out: + if (doorbell_pending) + mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -1058,22 +1147,35 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { + enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE; struct mlx4_en_priv *priv = netdev_priv(dev); - /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple - * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). - */ - int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN); + int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); + int order = MLX4_EN_ALLOC_PREFER_ORDER; + u32 align = SMP_CACHE_BYTES; int buf_size = 0; int i = 0; + /* bpf requires buffers to be set up as 1 packet per page. + * This only works when num_frags == 1. + */ + if (priv->xdp_ring_num) { + dma_dir = PCI_DMA_BIDIRECTIONAL; + /* This will gain efficient xdp frame recycling at the expense + * of more costly truesize accounting + */ + align = PAGE_SIZE; + order = 0; + } + while (buf_size < eff_mtu) { + priv->frag_info[i].order = order; priv->frag_info[i].frag_size = (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); + ALIGN(priv->frag_info[i].frag_size, align); + priv->frag_info[i].dma_dir = dma_dir; buf_size += priv->frag_info[i].frag_size; i++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 76aa4d27183c..9df87ca0515a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ring->last_nr_txbb = 1; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); + ring->free_tx_desc = mlx4_en_free_tx_desc; ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8); @@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, } -static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, - int napi_mode) +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; @@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, return tx_info->nr_txbb; } +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_rx_alloc frame = { + .page = tx_info->page, + .dma = tx_info->map0_dma, + .page_offset = 0, + .page_size = PAGE_SIZE, + }; + + if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + dma_unmap_page(priv->ddev, tx_info->map0_dma, + PAGE_SIZE, priv->frag_info[0].dma_dir); + put_page(tx_info->page); + } + + return tx_info->nr_txbb; +} int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { @@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) } while (ring->cons != ring->prod) { - ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, + ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0, 0 /* Non-NAPI caller */); @@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ - last_nr_txbb = mlx4_en_free_tx_desc( + last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & ring->size), timestamp, napi_budget); @@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + return done < budget; + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this stopped, and ring is not full. @@ -631,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb, static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, const struct sk_buff *skb, const struct skb_shared_info *shinfo, - int real_size, u16 *vlan_tag, - int tx_ind, void *fragptr) + void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; @@ -700,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src, __iowrite64_copy(dst, src, bytecnt / 8); } +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) +{ + wmb(); + /* Since there is no iowrite*_native() that writes the + * value as is, without byteswapping - using the one + * the doesn't do byteswapping in the relevant arch + * endianness. + */ +#if defined(__LITTLE_ENDIAN) + iowrite32( +#else + iowrite32be( +#endif + ring->doorbell_qpn, + ring->bf.uar->map + MLX4_SEND_DOORBELL); +} + +static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, + struct mlx4_en_tx_desc *tx_desc, + union mlx4_wqe_qpn_vlan qpn_vlan, + int desc_size, int bf_index, + __be32 op_own, bool bf_ok, + bool send_doorbell) +{ + tx_desc->ctrl.qpn_vlan = qpn_vlan; + + if (bf_ok) { + op_own |= htonl((bf_index & 0xffff) << 8); + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + wmb(); + + mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, + desc_size); + + wmb(); + + ring->bf.offset ^= ring->bf.buf_size; + } else { + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + if (send_doorbell) + mlx4_en_xmit_doorbell(ring); + else + ring->xmit_more++; + } +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; @@ -715,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_tag = 0; u16 vlan_proto = 0; int i_frag; int lso_header_size; @@ -725,6 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool stop_queue; bool inline_ok; u32 ring_cons; + bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[tx_ind]; @@ -749,9 +829,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } + bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { - vlan_tag = skb_vlan_tag_get(skb); + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); + if (vlan_proto == ETH_P_8021AD) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + qpn_vlan.ins_vlan = 0; + bf_ok = false; } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -771,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; + bf_ok = false; } /* Save skb in tx_info ring */ @@ -907,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) - build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag, - tx_ind, fragptr); + build_inline_wqe(tx_desc, skb, shinfo, fragptr); if (skb->encapsulation) { union { @@ -946,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = (real_size / 16) & 0x3f; - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && - !skb_vlan_tag_present(skb) && send_doorbell) { - tx_desc->ctrl.bf_qpn = ring->doorbell_qpn | - cpu_to_be32(real_size); - - op_own |= htonl((bf_index & 0xffff) << 8); - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - - wmb(); + bf_ok &= desc_size <= MAX_BF && send_doorbell; - mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, - desc_size); - - wmb(); - - ring->bf.offset ^= ring->bf.buf_size; - } else { - tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - if (vlan_proto == ETH_P_8021AD) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; - else if (vlan_proto == ETH_P_8021Q) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; - else - tx_desc->ctrl.ins_vlan = 0; - - tx_desc->ctrl.fence_size = real_size; + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - if (send_doorbell) { - wmb(); - /* Since there is no iowrite*_native() that writes the - * value as is, without byteswapping - using the one - * the doesn't do byteswapping in the relevant arch - * endianness. - */ -#if defined(__LITTLE_ENDIAN) - iowrite32( -#else - iowrite32be( -#endif - ring->doorbell_qpn, - ring->bf.uar->map + MLX4_SEND_DOORBELL); - } else { - ring->xmit_more++; - } - } + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index, + op_own, bf_ok, send_doorbell); if (unlikely(stop_queue)) { /* If queue was emptied after the if (stop_queue) , and before @@ -1034,3 +1077,106 @@ tx_drop: return NETDEV_TX_OK; } +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_info *tx_info; + int index, bf_index; + bool send_doorbell; + int nr_txbb = 1; + bool stop_queue; + dma_addr_t dma; + int real_size; + __be32 op_own; + u32 ring_cons; + bool bf_ok; + + BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, + "mlx4_en_xmit_frame requires minimum size tx desc"); + + ring = priv->tx_ring[tx_ind]; + + if (!priv->port_up) + goto tx_drop; + + if (mlx4_en_is_tx_ring_full(ring)) + goto tx_drop; + + /* fetch ring->cons far ahead before needing it to avoid stall */ + ring_cons = READ_ONCE(ring->cons); + + index = ring->prod & ring->size_mask; + tx_info = &ring->tx_info[index]; + + bf_ok = ring->bf_enabled; + + /* Track current inflight packets for performance analysis */ + AVG_PERF_COUNTER(priv->pstats.inflight_avg, + (u32)(ring->prod - ring_cons - 1)); + + bf_index = ring->prod; + tx_desc = ring->buf + index * TXBB_SIZE; + data = &tx_desc->data; + + dma = frame->dma; + + tx_info->page = frame->page; + frame->page = NULL; + tx_info->map0_dma = dma; + tx_info->map0_byte_count = length; + tx_info->nr_txbb = nr_txbb; + tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); + tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE); + + data->addr = cpu_to_be64(dma); + data->lkey = ring->mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(length); + + /* tx completion can avoid cache line miss for common cases */ + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + ring->packets++; + ring->bytes += tx_info->nr_bytes; + AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); + + ring->prod += nr_txbb; + + stop_queue = mlx4_en_is_tx_ring_full(ring); + send_doorbell = stop_queue || + *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; + bf_ok &= send_doorbell; + + real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; + + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, + op_own, bf_ok, send_doorbell); + *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + + return NETDEV_TX_OK; + +tx_drop: + ring->tx_dropped++; + return NETDEV_TX_BUSY; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e97094598b2d..f4497cf4d06d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1128,6 +1128,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c port_cap->max_pkeys = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); port_cap->max_vl = field & 0xf; + port_cap->max_tc_eth = field >> 4; MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); port_cap->log_max_macs = field & 0xf; port_cap->log_max_vlans = field >> 4; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 7ea258af636a..cdbd76f10ced 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -53,6 +53,7 @@ struct mlx4_port_cap { int ib_mtu; int max_port_width; int max_vl; + int max_tc_eth; int max_gids; int max_pkeys; u64 def_mac; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index dec77d6f0ac9..0e8b7c44931f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) if (enable) { dev->flags |= MLX4_FLAG_BONDED; } else { - ret = mlx4_virt2phy_port_map(dev, 1, 2); + ret = mlx4_virt2phy_port_map(dev, 1, 2); if (ret) { mlx4_err(dev, "Fail to reset port map\n"); return ret; @@ -218,6 +218,9 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) + return; + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 546fab0ecc3b..75dd2e3d3059 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -292,6 +292,7 @@ static int _mlx4_dev_port(struct mlx4_dev *dev, int port, dev->caps.pkey_table_len[port] = port_cap->max_pkeys; dev->caps.port_width_cap[port] = port_cap->max_port_width; dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu; + dev->caps.max_tc_eth = port_cap->max_tc_eth; dev->caps.def_mac[port] = port_cap->def_mac; dev->caps.supported_type[port] = port_cap->supported_port_types; dev->caps.suggested_type[port] = port_cap->suggested_type; @@ -2599,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize user access region table, aborting\n"); - return err; + return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index f2d0920018a5..94b891c118c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_READ_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; @@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; } } } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 13d297ee34bb..2c2913dcae98 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -132,6 +132,7 @@ enum { MLX4_EN_NUM_UP) #define MLX4_EN_DEFAULT_TX_WORK 256 +#define MLX4_EN_DOORBELL_BUDGET 8 /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 @@ -164,6 +165,10 @@ enum { #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) #define MLX4_EN_MIN_MTU 46 +/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ +#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN)) #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 @@ -215,7 +220,10 @@ enum cq_type { struct mlx4_en_tx_info { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct page *page; + }; dma_addr_t map0_dma; u32 map0_byte_count; u32 nr_txbb; @@ -255,6 +263,14 @@ struct mlx4_en_rx_alloc { u32 page_size; }; +#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) +struct mlx4_en_page_cache { + u32 index; + struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; +}; + +struct mlx4_en_priv; + struct mlx4_en_tx_ring { /* cache line used and dirtied in tx completion * (mlx4_en_free_tx_buf()) @@ -288,6 +304,11 @@ struct mlx4_en_tx_ring { __be32 mr_key; void *buf; struct mlx4_en_tx_info *tx_info; + struct mlx4_en_rx_ring *recycle_ring; + u32 (*free_tx_desc)(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, + u64 timestamp, int napi_mode); u8 *bounce_buf; struct mlx4_qp_context context; int qpn; @@ -319,6 +340,8 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; + struct bpf_prog *xdp_prog; + struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; unsigned long csum_ok; @@ -440,7 +463,9 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; - u16 frag_stride; + u32 frag_stride; + enum dma_data_direction dma_dir; + int order; }; #ifdef CONFIG_MLX4_EN_DCB @@ -450,6 +475,27 @@ struct mlx4_en_frag_info { #define MLX4_EN_TC_ETS 7 +enum dcb_pfc_type { + pfc_disabled = 0, + pfc_enabled_full, + pfc_enabled_tx, + pfc_enabled_rx +}; + +struct tc_configuration { + enum dcb_pfc_type dcb_pfc; +}; + +struct mlx4_en_cee_config { + bool pfc_state; + struct tc_configuration tc_config[MLX4_EN_NUM_UP]; +}; + +struct mlx4_en_cee_params { + u8 dcbx_cap; + struct mlx4_en_cee_config dcb_cfg; +}; + #endif struct ethtool_flow_id { @@ -469,6 +515,9 @@ enum { MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), +#ifdef CONFIG_MLX4_EN_DCB + MLX4_EN_FLAG_DCB_ENABLED = (1 << 6), +#endif }; #define PORT_BEACON_MAX_LIMIT (65535) @@ -536,6 +585,7 @@ struct mlx4_en_priv { struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 num_frags; u16 log_rx_info; + int xdp_ring_num; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; @@ -547,10 +597,8 @@ struct mlx4_en_priv { struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; -#ifdef CONFIG_MLX4_EN_VXLAN struct work_struct vxlan_add_task; struct work_struct vxlan_del_task; -#endif struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_counter_stats pf_stats; @@ -572,9 +620,11 @@ struct mlx4_en_priv { u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB +#define MLX4_EN_DCB_ENABLED 0x3 struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS]; + struct mlx4_en_cee_params cee_params; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; @@ -644,6 +694,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending); +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, @@ -672,6 +728,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 93195191f45b..395b5463cfd9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) offset, order); return; } - __mlx4_free_mtt_range(dev, offset, order); + __mlx4_free_mtt_range(dev, offset, order); } void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 087b23b320cb..3d2095e5c61c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,6 +52,7 @@ #define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 #define MLX4_IGNORE_FCS_MASK 0x1 +#define MLNX4_TX_MAX_NUMBER 8 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { @@ -2015,3 +2016,14 @@ out: return ret; } EXPORT_SYMBOL(mlx4_get_module_info); + +int mlx4_max_tc(struct mlx4_dev *dev) +{ + u8 num_tc = dev->caps.max_tc_eth; + + if (!num_tc) + num_tc = MLNX4_TX_MAX_NUMBER; + + return num_tc; +} +EXPORT_SYMBOL(mlx4_max_tc); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cd9b2b28df88..8b81114bdc72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: - index = get_param_l(&in_param); - id = index & mpt_mask(dev); - err = mr_res_start_move_to(dev, slave, id, - RES_MPT_RESERVED, &mpt); - if (err) - return err; - - __mlx4_mpt_free_icm(dev, mpt->key); - res_end_move(dev, slave, RES_MPT, id); + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, + RES_MPT_RESERVED, &mpt); + if (err) return err; + + __mlx4_mpt_free_icm(dev, mpt->key); + res_end_move(dev, slave, RES_MPT, id); break; default: err = -EINVAL; @@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { - mlx4_warn(dev, - "Src check LB for slave %d isn't supported\n", - slave); + mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", + slave); return -ENOTSUPP; } |