diff options
Diffstat (limited to 'drivers/ntb')
| -rw-r--r-- | drivers/ntb/hw/amd/ntb_hw_amd.c | 19 | ||||
| -rw-r--r-- | drivers/ntb/hw/amd/ntb_hw_amd.h | 1 | ||||
| -rw-r--r-- | drivers/ntb/hw/epf/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/ntb/hw/epf/ntb_hw_epf.c | 118 | ||||
| -rw-r--r-- | drivers/ntb/hw/idt/ntb_hw_idt.c | 18 | ||||
| -rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen1.c | 14 | ||||
| -rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen3.c | 3 | ||||
| -rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen4.c | 22 | ||||
| -rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen4.h | 2 | ||||
| -rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_intel.h | 6 | ||||
| -rw-r--r-- | drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 16 | ||||
| -rw-r--r-- | drivers/ntb/msi.c | 86 | ||||
| -rw-r--r-- | drivers/ntb/ntb_transport.c | 294 | ||||
| -rw-r--r-- | drivers/ntb/test/ntb_msi_test.c | 2 | ||||
| -rw-r--r-- | drivers/ntb/test/ntb_perf.c | 4 | ||||
| -rw-r--r-- | drivers/ntb/test/ntb_pingpong.c | 3 | ||||
| -rw-r--r-- | drivers/ntb/test/ntb_tool.c | 2 |
17 files changed, 325 insertions, 286 deletions
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index d687e8c2cc78..1a163596ddf5 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -197,13 +197,22 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, static int amd_ntb_get_link_status(struct amd_ntb_dev *ndev) { - struct pci_dev *pdev = NULL; + struct pci_dev *pdev = ndev->ntb.pdev; struct pci_dev *pci_swds = NULL; struct pci_dev *pci_swus = NULL; u32 stat; int rc; if (ndev->ntb.topo == NTB_TOPO_SEC) { + if (ndev->dev_data->is_endpoint) { + rc = pcie_capability_read_dword(pdev, PCI_EXP_LNKCTL, &stat); + if (rc) + return rc; + + ndev->lnk_sta = stat; + return 0; + } + /* Locate the pointer to Downstream Switch for this device */ pci_swds = pci_upstream_bridge(ndev->ntb.pdev); if (pci_swds) { @@ -1311,6 +1320,11 @@ static const struct ntb_dev_data dev_data[] = { .mw_count = 2, .mw_idx = 2, }, + { /* for device 0x17d7 */ + .mw_count = 2, + .mw_idx = 2, + .is_endpoint = true, + }, }; static const struct pci_device_id amd_ntb_pci_tbl[] = { @@ -1318,6 +1332,9 @@ static const struct pci_device_id amd_ntb_pci_tbl[] = { { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] }, { PCI_VDEVICE(AMD, 0x14c0), (kernel_ulong_t)&dev_data[1] }, { PCI_VDEVICE(AMD, 0x14c3), (kernel_ulong_t)&dev_data[1] }, + { PCI_VDEVICE(AMD, 0x155a), (kernel_ulong_t)&dev_data[1] }, + { PCI_VDEVICE(AMD, 0x17d4), (kernel_ulong_t)&dev_data[1] }, + { PCI_VDEVICE(AMD, 0x17d7), (kernel_ulong_t)&dev_data[2] }, { PCI_VDEVICE(HYGON, 0x145b), (kernel_ulong_t)&dev_data[0] }, { 0, } }; diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h index 5f337b1572a0..e8c3165fa38b 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.h +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h @@ -168,6 +168,7 @@ enum { struct ntb_dev_data { const unsigned char mw_count; const unsigned int mw_idx; + const bool is_endpoint; }; struct amd_ntb_dev; diff --git a/drivers/ntb/hw/epf/Kconfig b/drivers/ntb/hw/epf/Kconfig index 6197d1aab344..314485574bf8 100644 --- a/drivers/ntb/hw/epf/Kconfig +++ b/drivers/ntb/hw/epf/Kconfig @@ -1,6 +1,5 @@ config NTB_EPF tristate "Generic EPF Non-Transparent Bridge support" - depends on m help This driver supports EPF NTB on configurable endpoint. If unsure, say N. diff --git a/drivers/ntb/hw/epf/ntb_hw_epf.c b/drivers/ntb/hw/epf/ntb_hw_epf.c index 00f0e78f685b..d3ecf25a5162 100644 --- a/drivers/ntb/hw/epf/ntb_hw_epf.c +++ b/drivers/ntb/hw/epf/ntb_hw_epf.c @@ -49,6 +49,7 @@ #define NTB_EPF_COMMAND_TIMEOUT 1000 /* 1 Sec */ enum pci_barno { + NO_BAR = -1, BAR_0, BAR_1, BAR_2, @@ -57,16 +58,26 @@ enum pci_barno { BAR_5, }; +enum epf_ntb_bar { + BAR_CONFIG, + BAR_PEER_SPAD, + BAR_DB, + BAR_MW1, + BAR_MW2, + BAR_MW3, + BAR_MW4, + NTB_BAR_NUM, +}; + +#define NTB_EPF_MAX_MW_COUNT (NTB_BAR_NUM - BAR_MW1) + struct ntb_epf_dev { struct ntb_dev ntb; struct device *dev; /* Mutex to protect providing commands to NTB EPF */ struct mutex cmd_lock; - enum pci_barno ctrl_reg_bar; - enum pci_barno peer_spad_reg_bar; - enum pci_barno db_reg_bar; - enum pci_barno mw_bar; + const enum pci_barno *barno_map; unsigned int mw_count; unsigned int spad_count; @@ -85,17 +96,6 @@ struct ntb_epf_dev { #define ntb_ndev(__ntb) container_of(__ntb, struct ntb_epf_dev, ntb) -struct ntb_epf_data { - /* BAR that contains both control region and self spad region */ - enum pci_barno ctrl_reg_bar; - /* BAR that contains peer spad region */ - enum pci_barno peer_spad_reg_bar; - /* BAR that contains Doorbell region and Memory window '1' */ - enum pci_barno db_reg_bar; - /* BAR that contains memory windows*/ - enum pci_barno mw_bar; -}; - static int ntb_epf_send_command(struct ntb_epf_dev *ndev, u32 command, u32 argument) { @@ -144,7 +144,7 @@ static int ntb_epf_mw_to_bar(struct ntb_epf_dev *ndev, int idx) return -EINVAL; } - return idx + 2; + return ndev->barno_map[BAR_MW1 + idx]; } static int ntb_epf_mw_count(struct ntb_dev *ntb, int pidx) @@ -413,7 +413,9 @@ static int ntb_epf_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, return -EINVAL; } - bar = idx + ndev->mw_bar; + bar = ntb_epf_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; mw_size = pci_resource_len(ntb->pdev, bar); @@ -455,7 +457,9 @@ static int ntb_epf_peer_mw_get_addr(struct ntb_dev *ntb, int idx, if (idx == 0) offset = readl(ndev->ctrl_reg + NTB_EPF_MW1_OFFSET); - bar = idx + ndev->mw_bar; + bar = ntb_epf_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; if (base) *base = pci_resource_start(ndev->ntb.pdev, bar) + offset; @@ -560,6 +564,11 @@ static int ntb_epf_init_dev(struct ntb_epf_dev *ndev) ndev->mw_count = readl(ndev->ctrl_reg + NTB_EPF_MW_COUNT); ndev->spad_count = readl(ndev->ctrl_reg + NTB_EPF_SPAD_COUNT); + if (ndev->mw_count > NTB_EPF_MAX_MW_COUNT) { + dev_err(dev, "Unsupported MW count: %u\n", ndev->mw_count); + return -EINVAL; + } + return 0; } @@ -596,14 +605,15 @@ static int ntb_epf_init_pci(struct ntb_epf_dev *ndev, dev_warn(&pdev->dev, "Cannot DMA highmem\n"); } - ndev->ctrl_reg = pci_iomap(pdev, ndev->ctrl_reg_bar, 0); + ndev->ctrl_reg = pci_iomap(pdev, ndev->barno_map[BAR_CONFIG], 0); if (!ndev->ctrl_reg) { ret = -EIO; goto err_pci_regions; } - if (ndev->peer_spad_reg_bar) { - ndev->peer_spad_reg = pci_iomap(pdev, ndev->peer_spad_reg_bar, 0); + if (ndev->barno_map[BAR_PEER_SPAD] != ndev->barno_map[BAR_CONFIG]) { + ndev->peer_spad_reg = pci_iomap(pdev, + ndev->barno_map[BAR_PEER_SPAD], 0); if (!ndev->peer_spad_reg) { ret = -EIO; goto err_pci_regions; @@ -614,7 +624,7 @@ static int ntb_epf_init_pci(struct ntb_epf_dev *ndev, ndev->peer_spad_reg = ndev->ctrl_reg + spad_off + spad_sz; } - ndev->db_reg = pci_iomap(pdev, ndev->db_reg_bar, 0); + ndev->db_reg = pci_iomap(pdev, ndev->barno_map[BAR_DB], 0); if (!ndev->db_reg) { ret = -EIO; goto err_pci_regions; @@ -659,12 +669,7 @@ static void ntb_epf_cleanup_isr(struct ntb_epf_dev *ndev) static int ntb_epf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - enum pci_barno peer_spad_reg_bar = BAR_1; - enum pci_barno ctrl_reg_bar = BAR_0; - enum pci_barno db_reg_bar = BAR_2; - enum pci_barno mw_bar = BAR_2; struct device *dev = &pdev->dev; - struct ntb_epf_data *data; struct ntb_epf_dev *ndev; int ret; @@ -675,18 +680,10 @@ static int ntb_epf_pci_probe(struct pci_dev *pdev, if (!ndev) return -ENOMEM; - data = (struct ntb_epf_data *)id->driver_data; - if (data) { - peer_spad_reg_bar = data->peer_spad_reg_bar; - ctrl_reg_bar = data->ctrl_reg_bar; - db_reg_bar = data->db_reg_bar; - mw_bar = data->mw_bar; - } + ndev->barno_map = (const enum pci_barno *)id->driver_data; + if (!ndev->barno_map) + return -EINVAL; - ndev->peer_spad_reg_bar = peer_spad_reg_bar; - ndev->ctrl_reg_bar = ctrl_reg_bar; - ndev->db_reg_bar = db_reg_bar; - ndev->mw_bar = mw_bar; ndev->dev = dev; ntb_epf_init_struct(ndev, pdev); @@ -730,30 +727,51 @@ static void ntb_epf_pci_remove(struct pci_dev *pdev) ntb_epf_deinit_pci(ndev); } -static const struct ntb_epf_data j721e_data = { - .ctrl_reg_bar = BAR_0, - .peer_spad_reg_bar = BAR_1, - .db_reg_bar = BAR_2, - .mw_bar = BAR_2, +static const enum pci_barno j721e_map[NTB_BAR_NUM] = { + [BAR_CONFIG] = BAR_0, + [BAR_PEER_SPAD] = BAR_1, + [BAR_DB] = BAR_2, + [BAR_MW1] = BAR_2, + [BAR_MW2] = BAR_3, + [BAR_MW3] = BAR_4, + [BAR_MW4] = BAR_5 }; -static const struct ntb_epf_data mx8_data = { - .ctrl_reg_bar = BAR_0, - .peer_spad_reg_bar = BAR_0, - .db_reg_bar = BAR_2, - .mw_bar = BAR_4, +static const enum pci_barno mx8_map[NTB_BAR_NUM] = { + [BAR_CONFIG] = BAR_0, + [BAR_PEER_SPAD] = BAR_0, + [BAR_DB] = BAR_2, + [BAR_MW1] = BAR_4, + [BAR_MW2] = BAR_5, + [BAR_MW3] = NO_BAR, + [BAR_MW4] = NO_BAR +}; + +static const enum pci_barno rcar_barno[NTB_BAR_NUM] = { + [BAR_CONFIG] = BAR_0, + [BAR_PEER_SPAD] = BAR_0, + [BAR_DB] = BAR_4, + [BAR_MW1] = BAR_2, + [BAR_MW2] = NO_BAR, + [BAR_MW3] = NO_BAR, + [BAR_MW4] = NO_BAR, }; static const struct pci_device_id ntb_epf_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E), .class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00, - .driver_data = (kernel_ulong_t)&j721e_data, + .driver_data = (kernel_ulong_t)j721e_map, }, { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x0809), .class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00, - .driver_data = (kernel_ulong_t)&mx8_data, + .driver_data = (kernel_ulong_t)mx8_map, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0030), + .class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00, + .driver_data = (kernel_ulong_t)rcar_barno, }, { }, }; diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 544d8a4d2af5..f27df8d7f3b9 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -1041,7 +1041,7 @@ static inline char *idt_get_mw_name(enum idt_mw_type mw_type) static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, unsigned char *mw_cnt) { - struct idt_mw_cfg mws[IDT_MAX_NR_MWS], *ret_mws; + struct idt_mw_cfg *mws; const struct idt_ntb_bar *bars; enum idt_mw_type mw_type; unsigned char widx, bidx, en_cnt; @@ -1049,6 +1049,11 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, int aprt_size; u32 data; + mws = devm_kcalloc(&ndev->ntb.pdev->dev, IDT_MAX_NR_MWS, + sizeof(*mws), GFP_KERNEL); + if (!mws) + return ERR_PTR(-ENOMEM); + /* Retrieve the array of the BARs registers */ bars = portdata_tbl[port].bars; @@ -1103,16 +1108,7 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, } } - /* Allocate memory for memory window descriptors */ - ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws), - GFP_KERNEL); - if (!ret_mws) - return ERR_PTR(-ENOMEM); - - /* Copy the info of detected memory windows */ - memcpy(ret_mws, mws, (*mw_cnt)*sizeof(*ret_mws)); - - return ret_mws; + return mws; } /* diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index 079b8cd79785..944d10b48ae4 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -763,7 +763,8 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, return ndev_ntb_debugfs_read(filp, ubuf, count, offp); else if (pdev_is_gen3(ndev->ntb.pdev)) return ndev_ntb3_debugfs_read(filp, ubuf, count, offp); - else if (pdev_is_gen4(ndev->ntb.pdev) || pdev_is_gen5(ndev->ntb.pdev)) + else if (pdev_is_gen4(ndev->ntb.pdev) || pdev_is_gen5(ndev->ntb.pdev) || + pdev_is_gen6(ndev->ntb.pdev)) return ndev_ntb4_debugfs_read(filp, ubuf, count, offp); return -ENXIO; @@ -1872,7 +1873,8 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, rc = gen3_init_dev(ndev); if (rc) goto err_init_dev; - } else if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) { + } else if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev) || + pdev_is_gen6(pdev)) { ndev->ntb.ops = &intel_ntb4_ops; rc = intel_ntb_init_pci(ndev, pdev); if (rc) @@ -1903,7 +1905,8 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, err_register: ndev_deinit_debugfs(ndev); if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) || - pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) + pdev_is_gen4(pdev) || pdev_is_gen5(pdev) || + pdev_is_gen6(pdev)) xeon_deinit_dev(ndev); err_init_dev: intel_ntb_deinit_pci(ndev); @@ -1920,7 +1923,8 @@ static void intel_ntb_pci_remove(struct pci_dev *pdev) ntb_unregister_device(&ndev->ntb); ndev_deinit_debugfs(ndev); if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) || - pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) + pdev_is_gen4(pdev) || pdev_is_gen5(pdev) || + pdev_is_gen6(pdev)) xeon_deinit_dev(ndev); intel_ntb_deinit_pci(ndev); kfree(ndev); @@ -2049,6 +2053,8 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_ICX)}, /* GEN5 PCIe */ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_GNR)}, + /* GEN6 PCIe */ + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_DMR)}, {0} }; MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c index ffcfc3e02c35..a5aa96a31f4a 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c @@ -215,6 +215,9 @@ static int gen3_init_ntb(struct intel_ntb_dev *ndev) } ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + /* Make sure we are not using DB's used for link status */ + if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) + ndev->db_valid_mask &= ~ndev->db_link_mask; ndev->reg->db_iowrite(ndev->db_valid_mask, ndev->self_mmio + diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c index 22cac7975b3c..a221a978a9d5 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c @@ -46,6 +46,16 @@ static const struct intel_ntb_alt_reg gen4_b2b_reg = { .spad = GEN4_EM_SPAD_OFFSET, }; +static u64 get_ppd0(struct pci_dev *pdev) +{ + if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) + return GEN4_PPD0_OFFSET; + else if (pdev_is_gen6(pdev)) + return GEN6_PPD0_OFFSET; + + return ULLONG_MAX; +} + static int gen4_poll_link(struct intel_ntb_dev *ndev) { u16 reg_val; @@ -183,7 +193,7 @@ static enum ntb_topo spr_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) int gen4_init_dev(struct intel_ntb_dev *ndev) { struct pci_dev *pdev = ndev->ntb.pdev; - u32 ppd1/*, ppd0*/; + u32 ppd1; u16 lnkctl; int rc; @@ -197,7 +207,7 @@ int gen4_init_dev(struct intel_ntb_dev *ndev) ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET); if (pdev_is_ICX(pdev)) ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); - else if (pdev_is_SPR(pdev) || pdev_is_gen5(pdev)) + else if (pdev_is_SPR(pdev) || pdev_is_gen5(pdev) || pdev_is_gen6(pdev)) ndev->ntb.topo = spr_ppd_topo(ndev, ppd1); dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1, ntb_topo_string(ndev->ntb.topo)); @@ -432,10 +442,12 @@ static int intel_ntb4_link_enable(struct ntb_dev *ntb, enum ntb_speed max_speed, enum ntb_width max_width) { struct intel_ntb_dev *ndev; + struct pci_dev *pdev; u32 ntb_ctl, ppd0; u16 lnkctl; ndev = container_of(ntb, struct intel_ntb_dev, ntb); + pdev = ntb->pdev; dev_dbg(&ntb->pdev->dev, "Enabling link with max_speed %d max_width %d\n", @@ -476,12 +488,12 @@ static int intel_ntb4_link_enable(struct ntb_dev *ntb, iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); /* start link training in PPD0 */ - ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET); + ppd0 = ioread32(ndev->self_mmio + get_ppd0(pdev)); ppd0 |= GEN4_PPD_LINKTRN; - iowrite32(ppd0, ndev->self_mmio + GEN4_PPD0_OFFSET); + iowrite32(ppd0, ndev->self_mmio + get_ppd0(pdev)); /* make sure link training has started */ - ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET); + ppd0 = ioread32(ndev->self_mmio + get_ppd0(pdev)); if (!(ppd0 & GEN4_PPD_LINKTRN)) { dev_warn(&ntb->pdev->dev, "Link is not training\n"); return -ENXIO; diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h index f91323eaf5ce..1ba8203d6352 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h @@ -103,6 +103,8 @@ #define NTB_LTR_IDLE_LATSCALE 0x0800 /* 1us scale */ #define NTB_LTR_IDLE_REQMNT 0x8000 /* snoop req enable */ +#define GEN6_PPD0_OFFSET 0xf0d4 + ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp); int gen4_init_dev(struct intel_ntb_dev *ndev); diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index da4d5fe55bab..0a3a3677f82a 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -71,6 +71,7 @@ #define PCI_DEVICE_ID_INTEL_NTB_B2B_SKX 0x201C #define PCI_DEVICE_ID_INTEL_NTB_B2B_ICX 0x347e #define PCI_DEVICE_ID_INTEL_NTB_B2B_GNR 0x0db4 +#define PCI_DEVICE_ID_INTEL_NTB_B2B_DMR 0x7868 /* Ntb control and link status */ #define NTB_CTL_CFG_LOCK BIT(0) @@ -235,4 +236,9 @@ static inline int pdev_is_gen5(struct pci_dev *pdev) return pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_GNR; } +static inline int pdev_is_gen6(struct pci_dev *pdev) +{ + return pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_DMR; +} + #endif diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index ad1786be2554..e38540b92716 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -29,7 +29,7 @@ MODULE_PARM_DESC(use_lut_mws, "Enable the use of the LUT based memory windows"); #define SWITCHTEC_NTB_MAGIC 0x45CC0001 -#define MAX_MWS 128 +#define MAX_MWS 256 struct shared_mw { u32 magic; @@ -288,7 +288,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (size != 0 && xlate_pos < 12) return -EINVAL; - if (!IS_ALIGNED(addr, BIT_ULL(xlate_pos))) { + if (xlate_pos >= 0 && !IS_ALIGNED(addr, BIT_ULL(xlate_pos))) { /* * In certain circumstances we can get a buffer that is * not aligned to its size. (Most of the time @@ -1202,7 +1202,8 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev) sndev->mmio_self_ctrl); sndev->nr_lut_mw = ioread16(&sndev->mmio_self_ctrl->lut_table_entries); - sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw); + if (sndev->nr_lut_mw) + sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw); dev_dbg(&sndev->stdev->dev, "MWs: %d direct, %d lut\n", sndev->nr_direct_mw, sndev->nr_lut_mw); @@ -1212,7 +1213,8 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev) sndev->peer_nr_lut_mw = ioread16(&sndev->mmio_peer_ctrl->lut_table_entries); - sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw); + if (sndev->peer_nr_lut_mw) + sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw); dev_dbg(&sndev->stdev->dev, "Peer MWs: %d direct, %d lut\n", sndev->peer_nr_direct_mw, sndev->peer_nr_lut_mw); @@ -1314,6 +1316,12 @@ static void switchtec_ntb_init_shared(struct switchtec_ntb *sndev) for (i = 0; i < sndev->nr_lut_mw; i++) { int idx = sndev->nr_direct_mw + i; + if (idx >= MAX_MWS) { + dev_err(&sndev->stdev->dev, + "Total number of MW cannot be bigger than %d", MAX_MWS); + break; + } + sndev->self_shared->mw_sizes[idx] = LUT_SIZE; } } diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index 6295e55ef85e..6817d504c12a 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -106,10 +106,10 @@ int ntb_msi_setup_mws(struct ntb_dev *ntb) if (!ntb->msi) return -EINVAL; - msi_lock_descs(&ntb->pdev->dev); - desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED); - addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); - msi_unlock_descs(&ntb->pdev->dev); + scoped_guard (msi_descs_lock, &ntb->pdev->dev) { + desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED); + addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); + } for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { peer_widx = ntb_peer_highest_mw_idx(ntb, peer); @@ -289,7 +289,7 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, if (!ntb->msi) return -EINVAL; - msi_lock_descs(dev); + guard(msi_descs_lock)(dev); msi_for_each_desc(entry, dev, MSI_DESC_ASSOCIATED) { if (irq_has_action(entry->irq)) continue; @@ -307,51 +307,14 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, ret = ntbm_msi_setup_callback(ntb, entry, msi_desc); if (ret) { devm_free_irq(&ntb->dev, entry->irq, dev_id); - goto unlock; + return ret; } - - ret = entry->irq; - goto unlock; + return entry->irq; } - ret = -ENODEV; - -unlock: - msi_unlock_descs(dev); - return ret; + return -ENODEV; } EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); -static int ntbm_msi_callback_match(struct device *dev, void *res, void *data) -{ - struct ntb_dev *ntb = dev_ntb(dev); - struct ntb_msi_devres *dr = res; - - return dr->ntb == ntb && dr->entry == data; -} - -/** - * ntbm_msi_free_irq() - free an interrupt - * @ntb: NTB device context - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * This function should be used to manually free IRQs allocated with - * ntbm_request_[threaded_]irq(). - */ -void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id) -{ - struct msi_desc *entry = irq_get_msi_desc(irq); - - entry->write_msi_msg = NULL; - entry->write_msi_msg_data = NULL; - - WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release, - ntbm_msi_callback_match, entry)); - - devm_free_irq(&ntb->dev, irq, dev_id); -} -EXPORT_SYMBOL(ntbm_msi_free_irq); - /** * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer * @ntb: NTB device context @@ -379,36 +342,3 @@ int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, return 0; } EXPORT_SYMBOL(ntb_msi_peer_trigger); - -/** - * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt - * @ntb: NTB device context - * @peer: Peer index - * @desc: MSI descriptor data which triggers the interrupt - * @msi_addr: Physical address to trigger the interrupt - * - * This function allows using DMA engines to trigger an interrupt - * (for example, trigger an interrupt to process the data after - * sending it). To trigger the interrupt, write @desc.data to the address - * returned in @msi_addr - * - * Return: Zero on success, otherwise a negative error number. - */ -int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, - struct ntb_msi_desc *desc, - phys_addr_t *msi_addr) -{ - int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer; - phys_addr_t mw_phys_addr; - int ret; - - ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL); - if (ret) - return ret; - - if (msi_addr) - *msi_addr = mw_phys_addr + desc->addr_offset; - - return 0; -} -EXPORT_SYMBOL(ntb_msi_peer_addr); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index a22ea4a4b202..7cabc82305d6 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -54,11 +54,15 @@ #include <linux/errno.h> #include <linux/export.h> #include <linux/interrupt.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/seq_file.h> #include <linux/types.h> #include <linux/uaccess.h> +#include <linux/mutex.h> +#include <linux/wait.h> #include "linux/ntb.h" #include "linux/ntb_transport.h" @@ -99,6 +103,10 @@ module_param(use_msi, bool, 0644); MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); #endif +static bool tx_memcpy_offload; +module_param(tx_memcpy_offload, bool, 0644); +MODULE_PARM_DESC(tx_memcpy_offload, "Offload TX memcpy_toio() to a kernel thread"); + static struct dentry *nt_debugfs_dir; /* Only two-ports NTB devices are supported */ @@ -112,7 +120,6 @@ struct ntb_queue_entry { void *buf; unsigned int len; unsigned int flags; - int retries; int errors; unsigned int tx_index; unsigned int rx_index; @@ -148,7 +155,9 @@ struct ntb_transport_qp { void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); struct list_head tx_free_q; + struct list_head tx_offl_q; spinlock_t ntb_tx_free_q_lock; + spinlock_t ntb_tx_offl_q_lock; void __iomem *tx_mw; phys_addr_t tx_mw_phys; size_t tx_mw_size; @@ -199,6 +208,9 @@ struct ntb_transport_qp { int msi_irq; struct ntb_msi_desc msi_desc; struct ntb_msi_desc peer_msi_desc; + + struct task_struct *tx_offload_thread; + wait_queue_head_t tx_offload_wq; }; struct ntb_transport_mw { @@ -241,6 +253,9 @@ struct ntb_transport_ctx { struct work_struct link_cleanup; struct dentry *debugfs_node_dir; + + /* Make sure workq of link event be executed serially */ + struct mutex link_event_lock; }; enum { @@ -281,8 +296,14 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp, static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset); static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset); static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset); +static int ntb_tx_memcpy_kthread(void *data); +static inline bool ntb_tx_offload_enabled(struct ntb_transport_qp *qp) +{ + return tx_memcpy_offload && qp && qp->tx_offload_thread; +} + static int ntb_transport_bus_match(struct device *dev, const struct device_driver *drv) { @@ -462,104 +483,49 @@ void ntb_transport_unregister_client(struct ntb_transport_client *drv) } EXPORT_SYMBOL_GPL(ntb_transport_unregister_client); -static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, - loff_t *offp) +static int ntb_qp_debugfs_stats_show(struct seq_file *s, void *v) { - struct ntb_transport_qp *qp; - char *buf; - ssize_t ret, out_offset, out_count; - - qp = filp->private_data; + struct ntb_transport_qp *qp = s->private; if (!qp || !qp->link_is_up) return 0; - out_count = 1000; - - buf = kmalloc(out_count, GFP_KERNEL); - if (!buf) - return -ENOMEM; + seq_puts(s, "\nNTB QP stats:\n\n"); + + seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes); + seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts); + seq_printf(s, "rx_memcpy - \t%llu\n", qp->rx_memcpy); + seq_printf(s, "rx_async - \t%llu\n", qp->rx_async); + seq_printf(s, "rx_ring_empty - %llu\n", qp->rx_ring_empty); + seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); + seq_printf(s, "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); + seq_printf(s, "rx_err_ver - \t%llu\n", qp->rx_err_ver); + seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff); + seq_printf(s, "rx_index - \t%u\n", qp->rx_index); + seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry); + seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); + + seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes); + seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts); + seq_printf(s, "tx_memcpy - \t%llu\n", qp->tx_memcpy); + seq_printf(s, "tx_async - \t%llu\n", qp->tx_async); + seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full); + seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); + seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw); + seq_printf(s, "tx_index (H) - \t%u\n", qp->tx_index); + seq_printf(s, "RRI (T) - \t%u\n", qp->remote_rx_info->entry); + seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry); + seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); + seq_putc(s, '\n'); + + seq_printf(s, "Using TX DMA - \t%s\n", qp->tx_dma_chan ? "Yes" : "No"); + seq_printf(s, "Using RX DMA - \t%s\n", qp->rx_dma_chan ? "Yes" : "No"); + seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); + seq_putc(s, '\n'); - out_offset = 0; - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "\nNTB QP stats:\n\n"); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_bytes - \t%llu\n", qp->rx_bytes); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_pkts - \t%llu\n", qp->rx_pkts); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_memcpy - \t%llu\n", qp->rx_memcpy); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_async - \t%llu\n", qp->rx_async); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_ring_empty - %llu\n", qp->rx_ring_empty); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_err_ver - \t%llu\n", qp->rx_err_ver); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_buff - \t0x%p\n", qp->rx_buff); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_index - \t%u\n", qp->rx_index); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n", qp->rx_max_entry); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); - - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_bytes - \t%llu\n", qp->tx_bytes); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_pkts - \t%llu\n", qp->tx_pkts); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_memcpy - \t%llu\n", qp->tx_memcpy); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_async - \t%llu\n", qp->tx_async); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_ring_full - \t%llu\n", qp->tx_ring_full); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_mw - \t0x%p\n", qp->tx_mw); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_index (H) - \t%u\n", qp->tx_index); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "RRI (T) - \t%u\n", - qp->remote_rx_info->entry); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "tx_max_entry - \t%u\n", qp->tx_max_entry); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "free tx - \t%u\n", - ntb_transport_tx_free_entry(qp)); - - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "\n"); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "Using TX DMA - \t%s\n", - qp->tx_dma_chan ? "Yes" : "No"); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "Using RX DMA - \t%s\n", - qp->rx_dma_chan ? "Yes" : "No"); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "QP Link - \t%s\n", - qp->link_is_up ? "Up" : "Down"); - out_offset += scnprintf(buf + out_offset, out_count - out_offset, - "\n"); - - if (out_offset > out_count) - out_offset = out_count; - - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); - kfree(buf); - return ret; -} - -static const struct file_operations ntb_qp_debugfs_stats = { - .owner = THIS_MODULE, - .open = simple_open, - .read = debugfs_read, -}; + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ntb_qp_debugfs_stats); static void ntb_list_add(spinlock_t *lock, struct list_head *entry, struct list_head *list) @@ -793,13 +759,13 @@ static void ntb_transport_msi_desc_changed(void *data) static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; - struct pci_dev *pdev = nt->ndev->pdev; + struct device *dma_dev = ntb_get_dma_dev(nt->ndev); if (!mw->virt_addr) return; ntb_mw_clear_trans(nt->ndev, PIDX, num_mw); - dma_free_coherent(&pdev->dev, mw->alloc_size, + dma_free_coherent(dma_dev, mw->alloc_size, mw->alloc_addr, mw->dma_addr); mw->xlat_size = 0; mw->buff_size = 0; @@ -869,7 +835,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, resource_size_t size) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; - struct pci_dev *pdev = nt->ndev->pdev; + struct device *dma_dev = ntb_get_dma_dev(nt->ndev); size_t xlat_size, buff_size; resource_size_t xlat_align; resource_size_t xlat_align_size; @@ -898,12 +864,12 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, mw->buff_size = buff_size; mw->alloc_size = buff_size; - rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); + rc = ntb_alloc_mw_buffer(mw, dma_dev, xlat_align); if (rc) { mw->alloc_size *= 2; - rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); + rc = ntb_alloc_mw_buffer(mw, dma_dev, xlat_align); if (rc) { - dev_err(&pdev->dev, + dev_err(dma_dev, "Unable to alloc aligned MW buff\n"); mw->xlat_size = 0; mw->buff_size = 0; @@ -916,7 +882,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr, mw->xlat_size); if (rc) { - dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw); + dev_err(dma_dev, "Unable to set mw%d translation", num_mw); ntb_free_mw(nt, num_mw); return -EIO; } @@ -1024,6 +990,7 @@ static void ntb_transport_link_cleanup_work(struct work_struct *work) struct ntb_transport_ctx *nt = container_of(work, struct ntb_transport_ctx, link_cleanup); + guard(mutex)(&nt->link_event_lock); ntb_transport_link_cleanup(nt); } @@ -1047,6 +1014,8 @@ static void ntb_transport_link_work(struct work_struct *work) u32 val; int rc = 0, i, spad; + guard(mutex)(&nt->link_event_lock); + /* send the local info, in the opposite order of the way we read it */ if (nt->use_msi) { @@ -1229,15 +1198,15 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, qp->tx_max_entry = tx_size / qp->tx_max_frame; if (nt->debugfs_node_dir) { - char debugfs_name[4]; + char debugfs_name[8]; - snprintf(debugfs_name, 4, "qp%d", qp_num); + snprintf(debugfs_name, sizeof(debugfs_name), "qp%d", qp_num); qp->debugfs_dir = debugfs_create_dir(debugfs_name, nt->debugfs_node_dir); qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, qp->debugfs_dir, qp, - &ntb_qp_debugfs_stats); + &ntb_qp_debugfs_stats_fops); } else { qp->debugfs_dir = NULL; qp->debugfs_stats = NULL; @@ -1248,11 +1217,13 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, spin_lock_init(&qp->ntb_rx_q_lock); spin_lock_init(&qp->ntb_tx_free_q_lock); + spin_lock_init(&qp->ntb_tx_offl_q_lock); INIT_LIST_HEAD(&qp->rx_post_q); INIT_LIST_HEAD(&qp->rx_pend_q); INIT_LIST_HEAD(&qp->rx_free_q); INIT_LIST_HEAD(&qp->tx_free_q); + INIT_LIST_HEAD(&qp->tx_offl_q); tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db, (unsigned long)qp); @@ -1353,7 +1324,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) qp_count = ilog2(qp_bitmap); if (nt->use_msi) { qp_count -= 1; - nt->msi_db_mask = 1 << qp_count; + nt->msi_db_mask = BIT_ULL(qp_count); ntb_db_clear_mask(ndev, nt->msi_db_mask); } @@ -1387,6 +1358,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) goto err2; } + mutex_init(&nt->link_event_lock); INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup_work); @@ -1563,15 +1535,15 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset) goto err; unmap->len = len; - unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), - pay_off, len, DMA_TO_DEVICE); + unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(offset), + len, DMA_TO_DEVICE, 0); if (dma_mapping_error(device->dev, unmap->addr[0])) goto err_get_unmap; unmap->to_cnt = 1; - unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf), - buff_off, len, DMA_FROM_DEVICE); + unmap->addr[1] = dma_map_phys(device->dev, virt_to_phys(buf), + len, DMA_FROM_DEVICE, 0); if (dma_mapping_error(device->dev, unmap->addr[1])) goto err_get_unmap; @@ -1623,9 +1595,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) if (res < 0) goto err; - if (!entry->retries) - qp->rx_async++; - + qp->rx_async++; return; err: @@ -1780,6 +1750,13 @@ static void ntb_tx_copy_callback(void *data, iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); + /* + * Make DONE flag visible before DB/MSI. WC + posted MWr may reorder + * across iATU/bridge (platform-dependent). Order and flush here. + */ + dma_mb(); + ioread32(&hdr->flags); + if (qp->use_msi) ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); else @@ -1800,14 +1777,15 @@ static void ntb_tx_copy_callback(void *data, ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); } -static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) +static void ntb_memcpy_tx_on_stack(struct ntb_queue_entry *entry, void __iomem *offset) { -#ifdef ARCH_HAS_NOCACHE_UACCESS +#ifdef copy_to_nontemporal /* * Using non-temporal mov to improve performance on non-cached - * writes, even though we aren't actually copying from user space. + * writes. This only works if __iomem is strictly memory-like, + * but that is the case on x86-64 */ - __copy_from_user_inatomic_nocache(offset, entry->buf, entry->len); + copy_to_nontemporal(offset, entry->buf, entry->len); #else memcpy_toio(offset, entry->buf, entry->len); #endif @@ -1818,6 +1796,54 @@ static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) ntb_tx_copy_callback(entry, NULL); } +static int ntb_tx_memcpy_kthread(void *data) +{ + struct ntb_transport_qp *qp = data; + struct ntb_queue_entry *entry, *tmp; + const int resched_nr = 64; + LIST_HEAD(local_list); + void __iomem *offset; + int processed = 0; + + while (!kthread_should_stop()) { + spin_lock_irq(&qp->ntb_tx_offl_q_lock); + wait_event_interruptible_lock_irq_timeout(qp->tx_offload_wq, + kthread_should_stop() || + !list_empty(&qp->tx_offl_q), + qp->ntb_tx_offl_q_lock, 5*HZ); + list_splice_tail_init(&qp->tx_offl_q, &local_list); + spin_unlock_irq(&qp->ntb_tx_offl_q_lock); + + list_for_each_entry_safe(entry, tmp, &local_list, entry) { + list_del(&entry->entry); + offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index; + ntb_memcpy_tx_on_stack(entry, offset); + if (++processed >= resched_nr) { + cond_resched(); + processed = 0; + } + } + cond_resched(); + } + + return 0; +} + +static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) +{ + struct ntb_transport_qp *qp = entry->qp; + + if (WARN_ON_ONCE(!qp)) + return; + + if (ntb_tx_offload_enabled(qp)) { + ntb_list_add(&qp->ntb_tx_offl_q_lock, &entry->entry, + &qp->tx_offl_q); + wake_up(&qp->tx_offload_wq); + } else + ntb_memcpy_tx_on_stack(entry, offset); +} + static int ntb_async_tx_submit(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) { @@ -1844,8 +1870,8 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp, goto err; unmap->len = len; - unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf), - buff_off, len, DMA_TO_DEVICE); + unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(buf), + len, DMA_TO_DEVICE, 0); if (dma_mapping_error(device->dev, unmap->addr[0])) goto err_get_unmap; @@ -1890,6 +1916,9 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); entry->tx_hdr = hdr; + WARN_ON_ONCE(!ntb_transport_tx_free_entry(qp)); + WRITE_ONCE(qp->tx_index, (qp->tx_index + 1) % qp->tx_max_entry); + iowrite32(entry->len, &hdr->len); iowrite32((u32)qp->tx_pkts, &hdr->ver); @@ -1903,9 +1932,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, if (res < 0) goto err; - if (!entry->retries) - qp->tx_async++; - + qp->tx_async++; return; err: @@ -1932,9 +1959,6 @@ static int ntb_process_tx(struct ntb_transport_qp *qp, ntb_async_tx(qp, entry); - qp->tx_index++; - qp->tx_index %= qp->tx_max_entry; - qp->tx_pkts++; return 0; @@ -2031,6 +2055,20 @@ ntb_transport_create_queue(void *data, struct device *client_dev, qp->tx_handler = handlers->tx_handler; qp->event_handler = handlers->event_handler; + init_waitqueue_head(&qp->tx_offload_wq); + if (tx_memcpy_offload) { + qp->tx_offload_thread = kthread_run(ntb_tx_memcpy_kthread, qp, + "ntb-txcpy/%s/%u", + pci_name(ndev->pdev), qp->qp_num); + if (IS_ERR(qp->tx_offload_thread)) { + dev_warn(&nt->ndev->dev, + "tx memcpy offload thread creation failed: %ld; falling back to inline copy\n", + PTR_ERR(qp->tx_offload_thread)); + qp->tx_offload_thread = NULL; + } + } else + qp->tx_offload_thread = NULL; + dma_cap_zero(dma_mask); dma_cap_set(DMA_MEMCPY, dma_mask); @@ -2138,6 +2176,11 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) qp->active = false; + if (qp->tx_offload_thread) { + kthread_stop(qp->tx_offload_thread); + qp->tx_offload_thread = NULL; + } + if (qp->tx_dma_chan) { struct dma_chan *chan = qp->tx_dma_chan; /* Putting the dma_chan to NULL will force any new traffic to be @@ -2201,6 +2244,9 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); + while ((entry = ntb_list_rm(&qp->ntb_tx_offl_q_lock, &qp->tx_offl_q))) + kfree(entry); + qp->transport->qp_bitmap_free |= qp_bit; dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num); @@ -2266,7 +2312,6 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, entry->buf = data; entry->len = len; entry->flags = 0; - entry->retries = 0; entry->errors = 0; entry->rx_index = 0; @@ -2316,7 +2361,6 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, entry->len = len; entry->flags = 0; entry->errors = 0; - entry->retries = 0; entry->tx_index = 0; rc = ntb_process_tx(qp, entry); diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c index 4e18e08776c9..f52d409ba6d2 100644 --- a/drivers/ntb/test/ntb_msi_test.c +++ b/drivers/ntb/test/ntb_msi_test.c @@ -164,7 +164,7 @@ static void ntb_msit_db_event(void *ctx, int vec) if (irq_count == -1) continue; - desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC); + desc = kzalloc_objs(*desc, irq_count, GFP_ATOMIC); if (!desc) continue; diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 72bc1d017a46..dfd175f79e8f 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -839,10 +839,8 @@ static int perf_copy_chunk(struct perf_thread *pthr, dma_set_unmap(tx, unmap); ret = dma_submit_error(dmaengine_submit(tx)); - if (ret) { - dmaengine_unmap_put(unmap); + if (ret) goto err_free_resource; - } dmaengine_unmap_put(unmap); diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 8aeca7914050..1c1c74f4ff2d 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -284,8 +284,7 @@ static struct pp_ctx *pp_create_data(struct ntb_dev *ntb) pp->ntb = ntb; atomic_set(&pp->count, 0); spin_lock_init(&pp->lock); - hrtimer_init(&pp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - pp->timer.function = pp_timer_func; + hrtimer_setup(&pp->timer, pp_timer_func, CLOCK_MONOTONIC, HRTIMER_MODE_REL); return pp; } diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 641cb7e05a47..06881047f5bc 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -936,7 +936,7 @@ static ssize_t tool_peer_mw_trans_write(struct file *filep, buf[buf_size] = '\0'; - n = sscanf(buf, "%lli:%zi", &addr, &wsize); + n = sscanf(buf, "%llu:%zu", &addr, &wsize); if (n != 2) return -EINVAL; |
