diff options
Diffstat (limited to 'drivers/net/ethernet/cavium/thunder')
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nic.h | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nic_main.c | 64 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c | 29 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_main.c | 391 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 397 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 32 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/q_struct.h | 10 |
7 files changed, 711 insertions, 224 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 2269ff562d95..4a02e618e318 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -252,12 +252,14 @@ struct nicvf_drv_stats { u64 tx_csum_overflow; /* driver debug stats */ - u64 rcv_buffer_alloc_failures; u64 tx_tso; u64 tx_timeout; u64 txq_stop; u64 txq_wake; + u64 rcv_buffer_alloc_failures; + u64 page_alloc; + struct u64_stats_sync syncp; }; @@ -266,9 +268,9 @@ struct nicvf { struct net_device *netdev; struct pci_dev *pdev; void __iomem *reg_base; + struct bpf_prog *xdp_prog; #define MAX_QUEUES_PER_QSET 8 struct queue_set *qs; - struct nicvf_cq_poll *napi[8]; void *iommu_domain; u8 vf_id; u8 sqs_id; @@ -294,6 +296,7 @@ struct nicvf { /* Queue count */ u8 rx_queues; u8 tx_queues; + u8 xdp_tx_queues; u8 max_queues; u8 node; @@ -318,10 +321,11 @@ struct nicvf { struct nicvf_drv_stats __percpu *drv_stats; struct bgx_stats bgx_stats; + /* Napi */ + struct nicvf_cq_poll *napi[8]; + /* MSI-X */ - bool msix_enabled; u8 num_vec; - struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 767234e2e8f9..fb770b0182d3 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -65,9 +65,7 @@ struct nicpf { bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; /* MSI-X */ - bool msix_enabled; u8 num_vec; - struct msix_entry *msix_entries; bool irq_allocated[NIC_PF_MSIX_VECTORS]; char irq_name[NIC_PF_MSIX_VECTORS][20]; }; @@ -1088,7 +1086,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) u64 intr; u8 vf, vf_per_mbx_reg = 64; - if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector) + if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0)) mbx = 0; else mbx = 1; @@ -1107,51 +1105,13 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) return IRQ_HANDLED; } -static int nic_enable_msix(struct nicpf *nic) -{ - int i, ret; - - nic->num_vec = pci_msix_vec_count(nic->pdev); - - nic->msix_entries = kmalloc_array(nic->num_vec, - sizeof(struct msix_entry), - GFP_KERNEL); - if (!nic->msix_entries) - return -ENOMEM; - - for (i = 0; i < nic->num_vec; i++) - nic->msix_entries[i].entry = i; - - ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); - if (ret) { - dev_err(&nic->pdev->dev, - "Request for #%d msix vectors failed, returned %d\n", - nic->num_vec, ret); - kfree(nic->msix_entries); - return ret; - } - - nic->msix_enabled = 1; - return 0; -} - -static void nic_disable_msix(struct nicpf *nic) -{ - if (nic->msix_enabled) { - pci_disable_msix(nic->pdev); - kfree(nic->msix_entries); - nic->msix_enabled = 0; - nic->num_vec = 0; - } -} - static void nic_free_all_interrupts(struct nicpf *nic) { int irq; for (irq = 0; irq < nic->num_vec; irq++) { if (nic->irq_allocated[irq]) - free_irq(nic->msix_entries[irq].vector, nic); + free_irq(pci_irq_vector(nic->pdev, irq), nic); nic->irq_allocated[irq] = false; } } @@ -1159,18 +1119,24 @@ static void nic_free_all_interrupts(struct nicpf *nic) static int nic_register_interrupts(struct nicpf *nic) { int i, ret; + nic->num_vec = pci_msix_vec_count(nic->pdev); /* Enable MSI-X */ - ret = nic_enable_msix(nic); - if (ret) - return ret; + ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, + PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&nic->pdev->dev, + "Request for #%d msix vectors failed, returned %d\n", + nic->num_vec, ret); + return 1; + } /* Register mailbox interrupt handler */ for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) { sprintf(nic->irq_name[i], "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0)); - ret = request_irq(nic->msix_entries[i].vector, + ret = request_irq(pci_irq_vector(nic->pdev, i), nic_mbx_intr_handler, 0, nic->irq_name[i], nic); if (ret) @@ -1186,14 +1152,16 @@ static int nic_register_interrupts(struct nicpf *nic) fail: dev_err(&nic->pdev->dev, "Request irq failed\n"); nic_free_all_interrupts(nic); - nic_disable_msix(nic); + pci_free_irq_vectors(nic->pdev); + nic->num_vec = 0; return ret; } static void nic_unregister_interrupts(struct nicpf *nic) { nic_free_all_interrupts(nic); - nic_disable_msix(nic); + pci_free_irq_vectors(nic->pdev); + nic->num_vec = 0; } static int nic_num_sqs_en(struct nicpf *nic, int vf_en) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 02a986cdbb39..b9ece9cbf98b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -100,11 +100,12 @@ static const struct nicvf_stat nicvf_drv_stats[] = { NICVF_DRV_STAT(tx_csum_overlap), NICVF_DRV_STAT(tx_csum_overflow), - NICVF_DRV_STAT(rcv_buffer_alloc_failures), NICVF_DRV_STAT(tx_tso), NICVF_DRV_STAT(tx_timeout), NICVF_DRV_STAT(txq_stop), NICVF_DRV_STAT(txq_wake), + NICVF_DRV_STAT(rcv_buffer_alloc_failures), + NICVF_DRV_STAT(page_alloc), }; static const struct nicvf_stat nicvf_queue_stats[] = { @@ -720,7 +721,7 @@ static int nicvf_set_channels(struct net_device *dev, struct nicvf *nic = netdev_priv(dev); int err = 0; bool if_up = netif_running(dev); - int cqcount; + u8 cqcount, txq_count; if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -729,10 +730,26 @@ static int nicvf_set_channels(struct net_device *dev, if (channel->tx_count > nic->max_queues) return -EINVAL; + if (nic->xdp_prog && + ((channel->tx_count + channel->rx_count) > nic->max_queues)) { + netdev_err(nic->netdev, + "XDP mode, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -EINVAL; + } + if (if_up) nicvf_stop(dev); - cqcount = max(channel->rx_count, channel->tx_count); + nic->rx_queues = channel->rx_count; + nic->tx_queues = channel->tx_count; + if (!nic->xdp_prog) + nic->xdp_tx_queues = 0; + else + nic->xdp_tx_queues = channel->rx_count; + + txq_count = nic->xdp_tx_queues + nic->tx_queues; + cqcount = max(nic->rx_queues, txq_count); if (cqcount > MAX_CMP_QUEUES_PER_QS) { nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS); @@ -741,12 +758,10 @@ static int nicvf_set_channels(struct net_device *dev, nic->sqs_count = 0; } - nic->qs->rq_cnt = min_t(u32, channel->rx_count, MAX_RCV_QUEUES_PER_QS); - nic->qs->sq_cnt = min_t(u32, channel->tx_count, MAX_SND_QUEUES_PER_QS); + nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); + nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt); - nic->rx_queues = channel->rx_count; - nic->tx_queues = channel->tx_count; err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues); if (err) return err; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 24017588f531..d6477af88085 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -17,6 +17,9 @@ #include <linux/prefetch.h> #include <linux/irq.h> #include <linux/iommu.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/filter.h> #include "nic_reg.h" #include "nic.h" @@ -397,8 +400,10 @@ static void nicvf_request_sqs(struct nicvf *nic) if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; - if (nic->tx_queues > MAX_SND_QUEUES_PER_QS) - tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS; + + tx_queues = nic->tx_queues + nic->xdp_tx_queues; + if (tx_queues > MAX_SND_QUEUES_PER_QS) + tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; /* Set no of Rx/Tx queues in each of the SQsets */ for (sqs = 0; sqs < nic->sqs_count; sqs++) { @@ -496,12 +501,99 @@ static int nicvf_init_resources(struct nicvf *nic) return 0; } +static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, + struct cqe_rx_t *cqe_rx, struct snd_queue *sq, + struct sk_buff **skb) +{ + struct xdp_buff xdp; + struct page *page; + u32 action; + u16 len, offset = 0; + u64 dma_addr, cpu_addr; + void *orig_data; + + /* Retrieve packet buffer's DMA address and length */ + len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); + dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); + + cpu_addr = nicvf_iova_to_phys(nic, dma_addr); + if (!cpu_addr) + return false; + cpu_addr = (u64)phys_to_virt(cpu_addr); + page = virt_to_page((void *)cpu_addr); + + xdp.data_hard_start = page_address(page); + xdp.data = (void *)cpu_addr; + xdp.data_end = xdp.data + len; + orig_data = xdp.data; + + rcu_read_lock(); + action = bpf_prog_run_xdp(prog, &xdp); + rcu_read_unlock(); + + /* Check if XDP program has changed headers */ + if (orig_data != xdp.data) { + len = xdp.data_end - xdp.data; + offset = orig_data - xdp.data; + dma_addr -= offset; + } + + switch (action) { + case XDP_PASS: + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } + + /* Build SKB and pass on packet to network stack */ + *skb = build_skb(xdp.data, + RCV_FRAG_LEN - cqe_rx->align_pad + offset); + if (!*skb) + put_page(page); + else + skb_put(*skb, len); + return false; + case XDP_TX: + nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); + return true; + default: + bpf_warn_invalid_xdp_action(action); + case XDP_ABORTED: + trace_xdp_exception(nic->netdev, prog, action); + case XDP_DROP: + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } + put_page(page); + return true; + } + return false; +} + static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cqe_send_t *cqe_tx, - int cqe_type, int budget, + int budget, int *subdesc_cnt, unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; + struct page *page; struct nicvf *nic = netdev_priv(netdev); struct snd_queue *sq; struct sq_hdr_subdesc *hdr; @@ -513,12 +605,26 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) return; - netdev_dbg(nic->netdev, - "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n", - __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, - cqe_tx->sqe_ptr, hdr->subdesc_cnt); + /* Check for errors */ + if (cqe_tx->send_status) + nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); + + /* Is this a XDP designated Tx queue */ + if (sq->is_xdp) { + page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; + /* Check if it's recycled page or else unmap DMA mapping */ + if (page && (page_ref_count(page) == 1)) + nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, + hdr->subdesc_cnt); + + /* Release page reference for recycling */ + if (page) + put_page(page); + sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; + *subdesc_cnt += hdr->subdesc_cnt + 1; + return; + } - nicvf_check_cqe_tx_errs(nic, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; if (skb) { /* Check for dummy descriptor used for HW TSO offload on 88xx */ @@ -528,12 +634,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, tso_sqe->subdesc_cnt); - nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1); + *subdesc_cnt += tso_sqe->subdesc_cnt + 1; } else { nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, hdr->subdesc_cnt); } - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + *subdesc_cnt += hdr->subdesc_cnt + 1; prefetch(skb); (*tx_pkts)++; *tx_bytes += skb->len; @@ -544,7 +650,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, * a SKB attached, so just free SQEs here. */ if (!nic->hw_tso) - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + *subdesc_cnt += hdr->subdesc_cnt + 1; } } @@ -578,9 +684,9 @@ static inline void nicvf_set_rxhash(struct net_device *netdev, static void nicvf_rcv_pkt_handler(struct net_device *netdev, struct napi_struct *napi, - struct cqe_rx_t *cqe_rx) + struct cqe_rx_t *cqe_rx, struct snd_queue *sq) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); struct nicvf *snic = nic; int err = 0; @@ -595,16 +701,25 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, } /* Check for errors */ - err = nicvf_check_cqe_rx_errs(nic, cqe_rx); - if (err && !cqe_rx->rb_cnt) - return; + if (cqe_rx->err_level || cqe_rx->err_opcode) { + err = nicvf_check_cqe_rx_errs(nic, cqe_rx); + if (err && !cqe_rx->rb_cnt) + return; + } - skb = nicvf_get_rcv_skb(snic, cqe_rx); - if (!skb) { - netdev_dbg(nic->netdev, "Packet not received\n"); - return; + /* For XDP, ignore pkts spanning multiple pages */ + if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { + /* Packet consumed by XDP */ + if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb)) + return; + } else { + skb = nicvf_get_rcv_skb(snic, cqe_rx, + nic->xdp_prog ? true : false); } + if (!skb) + return; + if (netif_msg_pktdata(nic)) { netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name, skb, skb->len); @@ -646,13 +761,14 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, { int processed_cqe, work_done = 0, tx_done = 0; int cqe_count, cqe_head; + int subdesc_cnt = 0; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; - struct snd_queue *sq; - unsigned int tx_pkts = 0, tx_bytes = 0; + struct snd_queue *sq = &qs->sq[cq_idx]; + unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; spin_lock_bh(&cq->lock); loop: @@ -667,8 +783,6 @@ loop: cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; cqe_head &= 0xFFFF; - netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n", - __func__, cq_idx, cqe_count, cqe_head); while (processed_cqe < cqe_count) { /* Get the CQ descriptor */ cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); @@ -682,17 +796,15 @@ loop: break; } - netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n", - cq_idx, cq_desc->cqe_type); switch (cq_desc->cqe_type) { case CQE_TYPE_RX: - nicvf_rcv_pkt_handler(netdev, napi, cq_desc); + nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq); work_done++; break; case CQE_TYPE_SEND: - nicvf_snd_pkt_handler(netdev, - (void *)cq_desc, CQE_TYPE_SEND, - budget, &tx_pkts, &tx_bytes); + nicvf_snd_pkt_handler(netdev, (void *)cq_desc, + budget, &subdesc_cnt, + &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -704,9 +816,6 @@ loop: } processed_cqe++; } - netdev_dbg(nic->netdev, - "%s CQ%d processed_cqe %d work_done %d budget %d\n", - __func__, cq_idx, processed_cqe, work_done, budget); /* Ring doorbell to inform H/W to reuse processed CQEs */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, @@ -716,13 +825,26 @@ loop: goto loop; done: + /* Update SQ's descriptor free count */ + if (subdesc_cnt) + nicvf_put_sq_desc(sq, subdesc_cnt); + + txq_idx = nicvf_netdev_qidx(nic, cq_idx); + /* Handle XDP TX queues */ + if (nic->pnicvf->xdp_prog) { + if (txq_idx < nic->pnicvf->xdp_tx_queues) { + nicvf_xdp_sq_doorbell(nic, sq, cq_idx); + goto out; + } + nic = nic->pnicvf; + txq_idx -= nic->pnicvf->xdp_tx_queues; + } + /* Wakeup TXQ if its stopped earlier due to SQ full */ - sq = &nic->qs->sq[cq_idx]; if (tx_done || (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { netdev = nic->pnicvf->netdev; - txq = netdev_get_tx_queue(netdev, - nicvf_netdev_qidx(nic, cq_idx)); + txq = netdev_get_tx_queue(netdev, txq_idx); if (tx_pkts) netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); @@ -735,10 +857,11 @@ done: if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit queue wakeup SQ%d\n", - netdev->name, cq_idx); + netdev->name, txq_idx); } } +out: spin_unlock_bh(&cq->lock); return work_done; } @@ -882,38 +1005,9 @@ static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) return IRQ_HANDLED; } -static int nicvf_enable_msix(struct nicvf *nic) -{ - int ret, vec; - - nic->num_vec = NIC_VF_MSIX_VECTORS; - - for (vec = 0; vec < nic->num_vec; vec++) - nic->msix_entries[vec].entry = vec; - - ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); - if (ret) { - netdev_err(nic->netdev, - "Req for #%d msix vectors failed\n", nic->num_vec); - return 0; - } - nic->msix_enabled = 1; - return 1; -} - -static void nicvf_disable_msix(struct nicvf *nic) -{ - if (nic->msix_enabled) { - pci_disable_msix(nic->pdev); - nic->msix_enabled = 0; - nic->num_vec = 0; - } -} - static void nicvf_set_irq_affinity(struct nicvf *nic) { int vec, cpu; - int irqnum; for (vec = 0; vec < nic->num_vec; vec++) { if (!nic->irq_allocated[vec]) @@ -930,15 +1024,14 @@ static void nicvf_set_irq_affinity(struct nicvf *nic) cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), nic->affinity_mask[vec]); - irqnum = nic->msix_entries[vec].vector; - irq_set_affinity_hint(irqnum, nic->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), + nic->affinity_mask[vec]); } } static int nicvf_register_interrupts(struct nicvf *nic) { int irq, ret = 0; - int vector; for_each_cq_irq(irq) sprintf(nic->irq_name[irq], "%s-rxtx-%d", @@ -957,8 +1050,8 @@ static int nicvf_register_interrupts(struct nicvf *nic) /* Register CQ interrupts */ for (irq = 0; irq < nic->qs->cq_cnt; irq++) { - vector = nic->msix_entries[irq].vector; - ret = request_irq(vector, nicvf_intr_handler, + ret = request_irq(pci_irq_vector(nic->pdev, irq), + nicvf_intr_handler, 0, nic->irq_name[irq], nic->napi[irq]); if (ret) goto err; @@ -968,8 +1061,8 @@ static int nicvf_register_interrupts(struct nicvf *nic) /* Register RBDR interrupt */ for (irq = NICVF_INTR_ID_RBDR; irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { - vector = nic->msix_entries[irq].vector; - ret = request_irq(vector, nicvf_rbdr_intr_handler, + ret = request_irq(pci_irq_vector(nic->pdev, irq), + nicvf_rbdr_intr_handler, 0, nic->irq_name[irq], nic); if (ret) goto err; @@ -981,7 +1074,7 @@ static int nicvf_register_interrupts(struct nicvf *nic) nic->pnicvf->netdev->name, nic->sqs_mode ? (nic->sqs_id + 1) : 0); irq = NICVF_INTR_ID_QS_ERR; - ret = request_irq(nic->msix_entries[irq].vector, + ret = request_irq(pci_irq_vector(nic->pdev, irq), nicvf_qs_err_intr_handler, 0, nic->irq_name[irq], nic); if (ret) @@ -1001,6 +1094,7 @@ err: static void nicvf_unregister_interrupts(struct nicvf *nic) { + struct pci_dev *pdev = nic->pdev; int irq; /* Free registered interrupts */ @@ -1008,19 +1102,20 @@ static void nicvf_unregister_interrupts(struct nicvf *nic) if (!nic->irq_allocated[irq]) continue; - irq_set_affinity_hint(nic->msix_entries[irq].vector, NULL); + irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); free_cpumask_var(nic->affinity_mask[irq]); if (irq < NICVF_INTR_ID_SQ) - free_irq(nic->msix_entries[irq].vector, nic->napi[irq]); + free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); else - free_irq(nic->msix_entries[irq].vector, nic); + free_irq(pci_irq_vector(pdev, irq), nic); nic->irq_allocated[irq] = false; } /* Disable MSI-X */ - nicvf_disable_msix(nic); + pci_free_irq_vectors(pdev); + nic->num_vec = 0; } /* Initialize MSIX vectors and register MISC interrupt. @@ -1032,16 +1127,22 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) int irq = NICVF_INTR_ID_MISC; /* Return if mailbox interrupt is already registered */ - if (nic->msix_enabled) + if (nic->pdev->msix_enabled) return 0; /* Enable MSI-X */ - if (!nicvf_enable_msix(nic)) + nic->num_vec = pci_msix_vec_count(nic->pdev); + ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, + PCI_IRQ_MSIX); + if (ret < 0) { + netdev_err(nic->netdev, + "Req for #%d msix vectors failed\n", nic->num_vec); return 1; + } sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); /* Register Misc interrupt */ - ret = request_irq(nic->msix_entries[irq].vector, + ret = request_irq(pci_irq_vector(nic->pdev, irq), nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); if (ret) @@ -1076,6 +1177,13 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } + /* In XDP case, initial HW tx queues are used for XDP, + * but stack's queue mapping starts at '0', so skip the + * Tx queues attached to Rx queues for XDP. + */ + if (nic->xdp_prog) + qid += nic->xdp_tx_queues; + snic = nic; /* Get secondary Qset's SQ structure */ if (qid >= MAX_SND_QUEUES_PER_QS) { @@ -1164,7 +1272,7 @@ int nicvf_stop(struct net_device *netdev) /* Wait for pending IRQ handlers to finish */ for (irq = 0; irq < nic->num_vec; irq++) - synchronize_irq(nic->msix_entries[irq].vector); + synchronize_irq(pci_irq_vector(nic->pdev, irq)); tasklet_kill(&nic->rbdr_task); tasklet_kill(&nic->qs_err_task); @@ -1365,7 +1473,7 @@ static int nicvf_set_mac_address(struct net_device *netdev, void *p) memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - if (nic->msix_enabled) { + if (nic->pdev->msix_enabled) { if (nicvf_hw_set_mac_addr(nic, netdev)) return -EBUSY; } else { @@ -1553,6 +1661,114 @@ static int nicvf_set_features(struct net_device *netdev, return 0; } +static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) +{ + u8 cq_count, txq_count; + + /* Set XDP Tx queue count same as Rx queue count */ + if (!bpf_attached) + nic->xdp_tx_queues = 0; + else + nic->xdp_tx_queues = nic->rx_queues; + + /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets + * needs to be allocated, check how many. + */ + txq_count = nic->xdp_tx_queues + nic->tx_queues; + cq_count = max(nic->rx_queues, txq_count); + if (cq_count > MAX_CMP_QUEUES_PER_QS) { + nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); + nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; + } else { + nic->sqs_count = 0; + } + + /* Set primary Qset's resources */ + nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); + nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); + nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); + + /* Update stack */ + nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); +} + +static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) +{ + struct net_device *dev = nic->netdev; + bool if_up = netif_running(nic->netdev); + struct bpf_prog *old_prog; + bool bpf_attached = false; + + /* For now just support only the usual MTU sized frames */ + if (prog && (dev->mtu > 1500)) { + netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + dev->mtu); + return -EOPNOTSUPP; + } + + /* ALL SQs attached to CQs i.e same as RQs, are treated as + * XDP Tx queues and more Tx queues are allocated for + * network stack to send pkts out. + * + * No of Tx queues are either same as Rx queues or whatever + * is left in max no of queues possible. + */ + if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { + netdev_warn(dev, + "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -ENOMEM; + } + + if (if_up) + nicvf_stop(nic->netdev); + + old_prog = xchg(&nic->xdp_prog, prog); + /* Detach old prog, if any */ + if (old_prog) + bpf_prog_put(old_prog); + + if (nic->xdp_prog) { + /* Attach BPF program */ + nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); + if (!IS_ERR(nic->xdp_prog)) + bpf_attached = true; + } + + /* Calculate Tx queues needed for XDP and network stack */ + nicvf_set_xdp_queues(nic, bpf_attached); + + if (if_up) { + /* Reinitialize interface, clean slate */ + nicvf_open(nic->netdev); + netif_trans_update(nic->netdev); + } + + return 0; +} + +static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +{ + struct nicvf *nic = netdev_priv(netdev); + + /* To avoid checks while retrieving buffer address from CQE_RX, + * do not support XDP for T88 pass1.x silicons which are anyway + * not in use widely. + */ + if (pass1_silicon(nic->pdev)) + return -EOPNOTSUPP; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return nicvf_xdp_setup(nic, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!nic->xdp_prog; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops nicvf_netdev_ops = { .ndo_open = nicvf_open, .ndo_stop = nicvf_stop, @@ -1563,6 +1779,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, + .ndo_xdp = nicvf_xdp, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1665,8 +1882,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_unregister_interrupts; - netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_GRO | + netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HW_VLAN_CTAG_RX); netdev->hw_features |= NETIF_F_RXHASH; @@ -1674,7 +1892,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= netdev->hw_features; netdev->hw_features |= NETIF_F_LOOPBACK; - netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; + netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f13289f0d238..2b181762ad49 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -19,16 +19,8 @@ #include "q_struct.h" #include "nicvf_queues.h" -#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0) - -static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) -{ - /* Translation is installed only when IOMMU is present */ - if (nic->iommu_domain) - return iommu_iova_to_phys(nic->iommu_domain, dma_addr); - return dma_addr; -} - +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data); static void nicvf_get_page(struct nicvf *nic) { if (!nic->rb_pageref || !nic->rb_page) @@ -90,46 +82,152 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) dmem->base = NULL; } -/* Allocate buffer for packet reception - * HW returns memory address where packet is DMA'ed but not a pointer - * into RBDR ring, so save buffer address at the start of fragment and - * align the start address to a cache aligned address +#define XDP_PAGE_REFCNT_REFILL 256 + +/* Allocate a new page or recycle one if possible + * + * We cannot optimize dma mapping here, since + * 1. It's only one RBDR ring for 8 Rx queues. + * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed + * and not idx into RBDR ring, so can't refer to saved info. + * 3. There are multiple receive buffers per page */ -static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, - u32 buf_len, u64 **rbuf) +static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, + struct rbdr *rbdr, gfp_t gfp) +{ + int ref_count; + struct page *page = NULL; + struct pgcache *pgcache, *next; + + /* Check if page is already allocated */ + pgcache = &rbdr->pgcache[rbdr->pgidx]; + page = pgcache->page; + /* Check if page can be recycled */ + if (page) { + ref_count = page_ref_count(page); + /* Check if this page has been used once i.e 'put_page' + * called after packet transmission i.e internal ref_count + * and page's ref_count are equal i.e page can be recycled. + */ + if (rbdr->is_xdp && (ref_count == pgcache->ref_count)) + pgcache->ref_count--; + else + page = NULL; + + /* In non-XDP mode, page's ref_count needs to be '1' for it + * to be recycled. + */ + if (!rbdr->is_xdp && (ref_count != 1)) + page = NULL; + } + + if (!page) { + page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0); + if (!page) + return NULL; + + this_cpu_inc(nic->pnicvf->drv_stats->page_alloc); + + /* Check for space */ + if (rbdr->pgalloc >= rbdr->pgcnt) { + /* Page can still be used */ + nic->rb_page = page; + return NULL; + } + + /* Save the page in page cache */ + pgcache->page = page; + pgcache->dma_addr = 0; + pgcache->ref_count = 0; + rbdr->pgalloc++; + } + + /* Take additional page references for recycling */ + if (rbdr->is_xdp) { + /* Since there is single RBDR (i.e single core doing + * page recycling) per 8 Rx queues, in XDP mode adjusting + * page references atomically is the biggest bottleneck, so + * take bunch of references at a time. + * + * So here, below reference counts defer by '1'. + */ + if (!pgcache->ref_count) { + pgcache->ref_count = XDP_PAGE_REFCNT_REFILL; + page_ref_add(page, XDP_PAGE_REFCNT_REFILL); + } + } else { + /* In non-XDP case, single 64K page is divided across multiple + * receive buffers, so cost of recycling is less anyway. + * So we can do with just one extra reference. + */ + page_ref_add(page, 1); + } + + rbdr->pgidx++; + rbdr->pgidx &= (rbdr->pgcnt - 1); + + /* Prefetch refcount of next page in page cache */ + next = &rbdr->pgcache[rbdr->pgidx]; + page = next->page; + if (page) + prefetch(&page->_refcount); + + return pgcache; +} + +/* Allocate buffer for packet reception */ +static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, + gfp_t gfp, u32 buf_len, u64 *rbuf) { - int order = NICVF_PAGE_ORDER; + struct pgcache *pgcache = NULL; - /* Check if request can be accomodated in previous allocated page */ - if (nic->rb_page && - ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) { + /* Check if request can be accomodated in previous allocated page. + * But in XDP mode only one buffer per page is permitted. + */ + if (!rbdr->is_xdp && nic->rb_page && + ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) { nic->rb_pageref++; goto ret; } nicvf_get_page(nic); + nic->rb_page = NULL; - /* Allocate a new page */ - nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, - order); - if (!nic->rb_page) { + /* Get new page, either recycled or new one */ + pgcache = nicvf_alloc_page(nic, rbdr, gfp); + if (!pgcache && !nic->rb_page) { this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures); return -ENOMEM; } + nic->rb_page_offset = 0; + + /* Reserve space for header modifications by BPF program */ + if (rbdr->is_xdp) + buf_len += XDP_PACKET_HEADROOM; + + /* Check if it's recycled */ + if (pgcache) + nic->rb_page = pgcache->page; ret: - /* HW will ensure data coherency, CPU sync not required */ - *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, + if (rbdr->is_xdp && pgcache && pgcache->dma_addr) { + *rbuf = pgcache->dma_addr; + } else { + /* HW will ensure data coherency, CPU sync not required */ + *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, nic->rb_page_offset, buf_len, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC)); - if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { - if (!nic->rb_page_offset) - __free_pages(nic->rb_page, order); - nic->rb_page = NULL; - return -ENOMEM; + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { + if (!nic->rb_page_offset) + __free_pages(nic->rb_page, 0); + nic->rb_page = NULL; + return -ENOMEM; + } + if (pgcache) + pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM; + nic->rb_page_offset += buf_len; } - nic->rb_page_offset += buf_len; return 0; } @@ -159,7 +257,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len, int buf_size) { int idx; - u64 *rbuf; + u64 rbuf; struct rbdr_entry_t *desc; int err; @@ -177,10 +275,34 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, rbdr->head = 0; rbdr->tail = 0; + /* Initialize page recycling stuff. + * + * Can't use single buffer per page especially with 64K pages. + * On embedded platforms i.e 81xx/83xx available memory itself + * is low and minimum ring size of RBDR is 8K, that takes away + * lots of memory. + * + * But for XDP it has to be a single buffer per page. + */ + if (!nic->pnicvf->xdp_prog) { + rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size); + rbdr->is_xdp = false; + } else { + rbdr->pgcnt = ring_len; + rbdr->is_xdp = true; + } + rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt); + rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) * + rbdr->pgcnt, GFP_KERNEL); + if (!rbdr->pgcache) + return -ENOMEM; + rbdr->pgidx = 0; + rbdr->pgalloc = 0; + nic->rb_page = NULL; for (idx = 0; idx < ring_len; idx++) { - err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, - &rbuf); + err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL, + RCV_FRAG_LEN, &rbuf); if (err) { /* To free already allocated and mapped ones */ rbdr->tail = idx - 1; @@ -188,7 +310,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, } desc = GET_RBDR_DESC(rbdr, idx); - desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); } nicvf_get_page(nic); @@ -201,6 +323,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) { int head, tail; u64 buf_addr, phys_addr; + struct pgcache *pgcache; struct rbdr_entry_t *desc; if (!rbdr) @@ -216,7 +339,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) /* Release page references */ while (head != tail) { desc = GET_RBDR_DESC(rbdr, head); - buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + buf_addr = desc->buf_addr; phys_addr = nicvf_iova_to_phys(nic, buf_addr); dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); @@ -227,13 +350,31 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) } /* Release buffer of tail desc */ desc = GET_RBDR_DESC(rbdr, tail); - buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + buf_addr = desc->buf_addr; phys_addr = nicvf_iova_to_phys(nic, buf_addr); dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); if (phys_addr) put_page(virt_to_page(phys_to_virt(phys_addr))); + /* Sync page cache info */ + smp_rmb(); + + /* Release additional page references held for recycling */ + head = 0; + while (head < rbdr->pgcnt) { + pgcache = &rbdr->pgcache[head]; + if (pgcache->page && page_ref_count(pgcache->page) != 0) { + if (!rbdr->is_xdp) { + put_page(pgcache->page); + continue; + } + page_ref_sub(pgcache->page, pgcache->ref_count - 1); + put_page(pgcache->page); + } + head++; + } + /* Free RBDR ring */ nicvf_free_q_desc_mem(nic, &rbdr->dmem); } @@ -248,7 +389,7 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) int refill_rb_cnt; struct rbdr *rbdr; struct rbdr_entry_t *desc; - u64 *rbuf; + u64 rbuf; int new_rb = 0; refill: @@ -269,17 +410,20 @@ refill: else refill_rb_cnt = qs->rbdr_len - qcount - 1; + /* Sync page cache info */ + smp_rmb(); + /* Start filling descs from tail */ tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3; while (refill_rb_cnt) { tail++; tail &= (rbdr->dmem.q_len - 1); - if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf)) + if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf)) break; desc = GET_RBDR_DESC(rbdr, tail); - desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); refill_rb_cnt--; new_rb++; } @@ -362,7 +506,7 @@ static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq) /* Initialize transmit queue */ static int nicvf_init_snd_queue(struct nicvf *nic, - struct snd_queue *sq, int q_len) + struct snd_queue *sq, int q_len, int qidx) { int err; @@ -375,17 +519,38 @@ static int nicvf_init_snd_queue(struct nicvf *nic, sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL); if (!sq->skbuff) return -ENOMEM; + sq->head = 0; sq->tail = 0; - atomic_set(&sq->free_cnt, q_len - 1); sq->thresh = SND_QUEUE_THRESH; - /* Preallocate memory for TSO segment's header */ - sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, - q_len * TSO_HEADER_SIZE, - &sq->tso_hdrs_phys, GFP_KERNEL); - if (!sq->tso_hdrs) - return -ENOMEM; + /* Check if this SQ is a XDP TX queue */ + if (nic->sqs_mode) + qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS); + if (qidx < nic->pnicvf->xdp_tx_queues) { + /* Alloc memory to save page pointers for XDP_TX */ + sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL); + if (!sq->xdp_page) + return -ENOMEM; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = q_len - 1; + sq->is_xdp = true; + } else { + sq->xdp_page = NULL; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = 0; + sq->is_xdp = false; + + atomic_set(&sq->free_cnt, q_len - 1); + + /* Preallocate memory for TSO segment's header */ + sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, + q_len * TSO_HEADER_SIZE, + &sq->tso_hdrs_phys, + GFP_KERNEL); + if (!sq->tso_hdrs) + return -ENOMEM; + } return 0; } @@ -411,6 +576,7 @@ void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) { struct sk_buff *skb; + struct page *page; struct sq_hdr_subdesc *hdr; struct sq_hdr_subdesc *tso_sqe; @@ -428,8 +594,15 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) smp_rmb(); while (sq->head != sq->tail) { skb = (struct sk_buff *)sq->skbuff[sq->head]; - if (!skb) + if (!skb || !sq->xdp_page) + goto next; + + page = (struct page *)sq->xdp_page[sq->head]; + if (!page) goto next; + else + put_page(page); + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); /* Check for dummy descriptor used for HW TSO offload on 88xx */ if (hdr->dont_send) { @@ -442,12 +615,14 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) nicvf_unmap_sndq_buffers(nic, sq, sq->head, hdr->subdesc_cnt); } - dev_kfree_skb_any(skb); + if (skb) + dev_kfree_skb_any(skb); next: sq->head++; sq->head &= (sq->dmem.q_len - 1); } kfree(sq->skbuff); + kfree(sq->xdp_page); nicvf_free_q_desc_mem(nic, &sq->dmem); } @@ -838,7 +1013,7 @@ static int nicvf_alloc_resources(struct nicvf *nic) /* Alloc send queue */ for (qidx = 0; qidx < qs->sq_cnt; qidx++) { - if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len)) + if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx)) goto alloc_fail; } @@ -876,6 +1051,7 @@ int nicvf_set_qset_resources(struct nicvf *nic) nic->rx_queues = qs->rq_cnt; nic->tx_queues = qs->sq_cnt; + nic->xdp_tx_queues = 0; return 0; } @@ -940,7 +1116,10 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) int qentry; qentry = sq->tail; - atomic_sub(desc_cnt, &sq->free_cnt); + if (!sq->is_xdp) + atomic_sub(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt -= desc_cnt; sq->tail += desc_cnt; sq->tail &= (sq->dmem.q_len - 1); @@ -958,7 +1137,10 @@ static inline void nicvf_rollback_sq_desc(struct snd_queue *sq, /* Free descriptor back to SQ for future use */ void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) { - atomic_add(desc_cnt, &sq->free_cnt); + if (!sq->is_xdp) + atomic_add(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt += desc_cnt; sq->head += desc_cnt; sq->head &= (sq->dmem.q_len - 1); } @@ -1016,6 +1198,58 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, } } +/* XDP Transmit APIs */ +void nicvf_xdp_sq_doorbell(struct nicvf *nic, + struct snd_queue *sq, int sq_num) +{ + if (!sq->xdp_desc_cnt) + return; + + /* make sure all memory stores are done before ringing doorbell */ + wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, sq->xdp_desc_cnt); + sq->xdp_desc_cnt = 0; +} + +static inline void +nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, + int subdesc_cnt, u64 data, int len) +{ + struct sq_hdr_subdesc *hdr; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + hdr->subdesc_cnt = subdesc_cnt; + hdr->tot_len = len; + hdr->post_cqe = 1; + sq->xdp_page[qentry] = (u64)virt_to_page((void *)data); +} + +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + int qentry; + + if (subdesc_cnt > sq->xdp_free_cnt) + return 0; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len); + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr); + + sq->xdp_desc_cnt += subdesc_cnt; + + return 1; +} + /* Calculate no of SQ subdescriptors needed to transmit all * segments of this TSO packet. * Taken from 'Tilera network driver' with a minor modification. @@ -1094,7 +1328,13 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, { int proto; struct sq_hdr_subdesc *hdr; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + ip.hdr = skb_network_header(skb); hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); memset(hdr, 0, SND_QUEUE_DESC_SIZE); hdr->subdesc_type = SQ_DESC_TYPE_HEADER; @@ -1119,7 +1359,9 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, hdr->l3_offset = skb_network_offset(skb); hdr->l4_offset = skb_transport_offset(skb); - proto = ip_hdr(skb)->protocol; + proto = (ip.v4->version == 4) ? ip.v4->protocol : + ip.v6->nexthdr; + switch (proto) { case IPPROTO_TCP: hdr->csum_l4 = SEND_L4_CSUM_TCP; @@ -1366,8 +1608,33 @@ static inline unsigned frag_num(unsigned i) #endif } +static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr, + u64 buf_addr, bool xdp) +{ + struct page *page = NULL; + int len = RCV_FRAG_LEN; + + if (xdp) { + page = virt_to_page(phys_to_virt(buf_addr)); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) != 1) + return; + + len += XDP_PACKET_HEADROOM; + /* Receive buffers in XDP mode are mapped from page start */ + dma_addr &= PAGE_MASK; + } + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); +} + /* Returns SKB for a received packet */ -struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp) { int frag; int payload_len = 0; @@ -1402,10 +1669,9 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) if (!frag) { /* First fragment */ - dma_unmap_page_attrs(&nic->pdev->dev, - *rb_ptrs - cqe_rx->align_pad, - RCV_FRAG_LEN, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); + nicvf_unmap_rcv_buffer(nic, + *rb_ptrs - cqe_rx->align_pad, + phys_addr, xdp); skb = nicvf_rb_ptr_to_skb(nic, phys_addr - cqe_rx->align_pad, payload_len); @@ -1415,9 +1681,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) skb_put(skb, payload_len); } else { /* Add fragments */ - dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs, - RCV_FRAG_LEN, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); + nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp); page = virt_to_page(phys_to_virt(phys_addr)); offset = phys_to_virt(phys_addr) - page_address(page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, @@ -1547,9 +1811,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) /* Check for errors in the receive cmp.queue entry */ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { - if (!cqe_rx->err_level && !cqe_rx->err_opcode) - return 0; - if (netif_msg_rx_err(nic)) netdev_err(nic->netdev, "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n", @@ -1638,8 +1899,6 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx) { switch (cqe_tx->send_status) { - case CQ_TX_ERROP_GOOD: - return 0; case CQ_TX_ERROP_DESC_FAULT: this_cpu_inc(nic->drv_stats->tx_desc_fault); break; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 10cb4b84625b..57858522c33c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -10,6 +10,7 @@ #define NICVF_QUEUES_H #include <linux/netdevice.h> +#include <linux/iommu.h> #include "q_struct.h" #define MAX_QUEUE_SET 128 @@ -213,6 +214,12 @@ struct q_desc_mem { void *unalign_base; }; +struct pgcache { + struct page *page; + int ref_count; + u64 dma_addr; +}; + struct rbdr { bool enable; u32 dma_size; @@ -222,6 +229,13 @@ struct rbdr { u32 head; u32 tail; struct q_desc_mem dmem; + bool is_xdp; + + /* For page recycling */ + int pgidx; + int pgcnt; + int pgalloc; + struct pgcache *pgcache; } ____cacheline_aligned_in_smp; struct rcv_queue { @@ -258,6 +272,10 @@ struct snd_queue { u32 tail; u64 *skbuff; void *desc; + u64 *xdp_page; + u16 xdp_desc_cnt; + u16 xdp_free_cnt; + bool is_xdp; #define TSO_HEADER_SIZE 128 /* For TSO segment's header */ @@ -301,6 +319,14 @@ struct queue_set { #define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) +static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) +{ + /* Translation is installed only when IOMMU is present */ + if (nic->iommu_domain) + return iommu_iova_to_phys(nic->iommu_domain, dma_addr); + return dma_addr; +} + void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, int hdr_sqe, u8 subdesc_cnt); void nicvf_config_vlan_stripping(struct nicvf *nic, @@ -318,8 +344,12 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, int qidx); int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, struct sk_buff *skb, u8 sq_num); +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len); +void nicvf_xdp_sq_doorbell(struct nicvf *nic, struct snd_queue *sq, int sq_num); -struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx); +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp); void nicvf_rbdr_task(unsigned long data); void nicvf_rbdr_work(struct work_struct *work); diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h index f36347237a54..e47205aa87ea 100644 --- a/drivers/net/ethernet/cavium/thunder/q_struct.h +++ b/drivers/net/ethernet/cavium/thunder/q_struct.h @@ -359,15 +359,7 @@ union cq_desc_t { }; struct rbdr_entry_t { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 rsvd0:15; - u64 buf_addr:42; - u64 cache_align:7; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - u64 cache_align:7; - u64 buf_addr:42; - u64 rsvd0:15; -#endif + u64 buf_addr; }; /* TCP reassembly context */ |