From b8cce68bf1f1b773ac1a535707f968512b3c1e5f Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Sun, 29 Oct 2017 22:40:56 -0500 Subject: net/mlx5: Loop over temp list to release delay events list_splice_init initializing waiting_events_list after splicing it to temp list, therefore we should loop over temp list to fire the events. Fixes: 4ca637a20a52 ("net/mlx5: Delay events till mlx5 interface's add complete for pci resume") Signed-off-by: Huy Nguyen Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index fc281712869b..17b723218b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -93,7 +93,7 @@ static void delayed_event_release(struct mlx5_device_context *dev_ctx, list_splice_init(&priv->waiting_events_list, &temp); if (!dev_ctx->context) goto out; - list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) + list_for_each_entry_safe(de, n, &temp, list) dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); out: -- cgit v1.2.3 From d2aa060d40fa060e963f9a356d43481e43ba3dac Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 26 Sep 2017 15:11:56 -0500 Subject: net/mlx5: Cancel health poll before sending panic teardown command After the panic teardown firmware command, health_care detects the error in PCI bus and calls the mlx5_pci_err_detected. This health_care flow is no longer needed because the panic teardown firmware command will bring down the PCI bus communication with the HCA. The solution is to cancel the health care timer and its pending workqueue request before sending panic teardown firmware command. Kernel trace: mlx5_core 0033:01:00.0: Shutdown was called mlx5_core 0033:01:00.0: health_care:154:(pid 9304): handling bad device here mlx5_core 0033:01:00.0: mlx5_handle_bad_state:114:(pid 9304): NIC state 1 mlx5_core 0033:01:00.0: mlx5_pci_err_detected was called mlx5_core 0033:01:00.0: mlx5_enter_error_state:96:(pid 9304): start mlx5_3:mlx5_ib_event:3061:(pid 9304): warning: event on port 0 mlx5_core 0033:01:00.0: mlx5_enter_error_state:104:(pid 9304): end Unable to handle kernel paging request for data at address 0x0000003f Faulting instruction address: 0xc0080000434b8c80 Fixes: 8812c24d28f4 ('net/mlx5: Add fast unload support in shutdown flow') Signed-off-by: Huy Nguyen Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0d2c8dcd6eae..06562c9a6b9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1482,9 +1482,16 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) return -EAGAIN; } + /* Panic tear down fw command will stop the PCI bus communication + * with the HCA, so the health polll is no longer needed. + */ + mlx5_drain_health_wq(dev); + mlx5_stop_health_poll(dev); + ret = mlx5_cmd_force_teardown_hca(dev); if (ret) { mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + mlx5_start_health_poll(dev); return ret; } -- cgit v1.2.3 From 2a8d6065e7b90ad9d5540650944d802b0f86bdfe Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 31 Oct 2017 15:34:00 -0700 Subject: net/mlx5e: Fix napi poll with zero budget napi->poll can be called with budget 0, e.g. in netpoll scenarios where the caller only wants to poll TX rings (poll_one_napi@net/core/netpoll.c). The below commit changed RX polling from "while" loop to "do {} while", which caused to ignore the initial budget and handle at least one RX packet. This fixes the following warning: [ 2852.049194] mlx5e_napi_poll+0x0/0x260 [mlx5_core] exceeded budget in poll [ 2852.049195] ------------[ cut here ]------------ [ 2852.049195] WARNING: CPU: 0 PID: 25691 at net/core/netpoll.c:171 netpoll_poll_dev+0x18a/0x1a0 Fixes: 4b7dfc992514 ("net/mlx5e: Early-return on empty completion queues") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan Reported-by: Martin KaFai Lau Tested-by: Martin KaFai Lau Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index e906b754415c..ab92298eafc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,7 +49,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); bool busy = false; - int work_done; + int work_done = 0; int i; for (i = 0; i < c->num_tc; i++) @@ -58,15 +58,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (c->xdp) busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); - work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); - busy |= work_done == budget; + if (likely(budget)) { /* budget=0 means: don't poll rx rings */ + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + busy |= work_done == budget; + } busy |= c->rq.post_wqes(&c->rq); if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) return budget; - if (work_done == budget) + if (budget && work_done == budget) work_done--; } -- cgit v1.2.3 From 2e50b2619538ea0224c037f6fa746023089e0654 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Sun, 15 Oct 2017 17:30:59 +0300 Subject: net/mlx5e: Set page to null in case dma mapping fails Currently, when dma mapping fails, put_page is called, but the page is not set to null. Later, in the page_reuse treatment in mlx5e_free_rx_descs(), mlx5e_page_release() is called for the second time, improperly doing dma_unmap (for a non-mapped address) and an extra put_page. Prevent this by nullifying the page pointer when dma_map fails. Fixes: accd58833237 ("net/mlx5e: Introduce RX Page-Reuse") Signed-off-by: Inbar Karmy Reviewed-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 15a1687483cc..91b1b0938931 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -215,22 +215,20 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - struct page *page; - if (mlx5e_rx_cache_get(rq, dma_info)) return 0; - page = dev_alloc_pages(rq->buff.page_order); - if (unlikely(!page)) + dma_info->page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!dma_info->page)) return -ENOMEM; - dma_info->addr = dma_map_page(rq->pdev, page, 0, + dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, RQ_PAGE_SIZE(rq), rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(page); + put_page(dma_info->page); + dma_info->page = NULL; return -ENOMEM; } - dma_info->page = page; return 0; } -- cgit v1.2.3 From d1c61e6d79ea0d4d53dc18bcd2db30ef2d99cfa7 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 12 Jan 2017 17:11:45 +0200 Subject: net/mlx5e: Increase Striding RQ minimum size limit to 4 multi-packet WQEs This is to prevent the case of working with a single MPWQE (1 WQE is always reserved as RQ is linked-list). When the WQE is fully consumed, HW should still have available buffer in order not to drop packets. Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Signed-off-by: Eugenia Emantayev Reviewed-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc13d3dbd366..13b5ef9d8703 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -67,7 +67,7 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd -#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 -- cgit v1.2.3