summaryrefslogtreecommitdiff
path: root/drivers/ntb
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/ntb')
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.c19
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.h1
-rw-r--r--drivers/ntb/hw/epf/Kconfig1
-rw-r--r--drivers/ntb/hw/epf/ntb_hw_epf.c118
-rw-r--r--drivers/ntb/hw/idt/ntb_hw_idt.c18
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen1.c14
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen3.c3
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen4.c22
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen4.h2
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.h6
-rw-r--r--drivers/ntb/hw/mscc/ntb_hw_switchtec.c16
-rw-r--r--drivers/ntb/msi.c86
-rw-r--r--drivers/ntb/ntb_transport.c294
-rw-r--r--drivers/ntb/test/ntb_msi_test.c2
-rw-r--r--drivers/ntb/test/ntb_perf.c4
-rw-r--r--drivers/ntb/test/ntb_pingpong.c3
-rw-r--r--drivers/ntb/test/ntb_tool.c2
17 files changed, 325 insertions, 286 deletions
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index d687e8c2cc78..1a163596ddf5 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -197,13 +197,22 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
static int amd_ntb_get_link_status(struct amd_ntb_dev *ndev)
{
- struct pci_dev *pdev = NULL;
+ struct pci_dev *pdev = ndev->ntb.pdev;
struct pci_dev *pci_swds = NULL;
struct pci_dev *pci_swus = NULL;
u32 stat;
int rc;
if (ndev->ntb.topo == NTB_TOPO_SEC) {
+ if (ndev->dev_data->is_endpoint) {
+ rc = pcie_capability_read_dword(pdev, PCI_EXP_LNKCTL, &stat);
+ if (rc)
+ return rc;
+
+ ndev->lnk_sta = stat;
+ return 0;
+ }
+
/* Locate the pointer to Downstream Switch for this device */
pci_swds = pci_upstream_bridge(ndev->ntb.pdev);
if (pci_swds) {
@@ -1311,6 +1320,11 @@ static const struct ntb_dev_data dev_data[] = {
.mw_count = 2,
.mw_idx = 2,
},
+ { /* for device 0x17d7 */
+ .mw_count = 2,
+ .mw_idx = 2,
+ .is_endpoint = true,
+ },
};
static const struct pci_device_id amd_ntb_pci_tbl[] = {
@@ -1318,6 +1332,9 @@ static const struct pci_device_id amd_ntb_pci_tbl[] = {
{ PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(AMD, 0x14c0), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(AMD, 0x14c3), (kernel_ulong_t)&dev_data[1] },
+ { PCI_VDEVICE(AMD, 0x155a), (kernel_ulong_t)&dev_data[1] },
+ { PCI_VDEVICE(AMD, 0x17d4), (kernel_ulong_t)&dev_data[1] },
+ { PCI_VDEVICE(AMD, 0x17d7), (kernel_ulong_t)&dev_data[2] },
{ PCI_VDEVICE(HYGON, 0x145b), (kernel_ulong_t)&dev_data[0] },
{ 0, }
};
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
index 5f337b1572a0..e8c3165fa38b 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.h
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
@@ -168,6 +168,7 @@ enum {
struct ntb_dev_data {
const unsigned char mw_count;
const unsigned int mw_idx;
+ const bool is_endpoint;
};
struct amd_ntb_dev;
diff --git a/drivers/ntb/hw/epf/Kconfig b/drivers/ntb/hw/epf/Kconfig
index 6197d1aab344..314485574bf8 100644
--- a/drivers/ntb/hw/epf/Kconfig
+++ b/drivers/ntb/hw/epf/Kconfig
@@ -1,6 +1,5 @@
config NTB_EPF
tristate "Generic EPF Non-Transparent Bridge support"
- depends on m
help
This driver supports EPF NTB on configurable endpoint.
If unsure, say N.
diff --git a/drivers/ntb/hw/epf/ntb_hw_epf.c b/drivers/ntb/hw/epf/ntb_hw_epf.c
index 00f0e78f685b..d3ecf25a5162 100644
--- a/drivers/ntb/hw/epf/ntb_hw_epf.c
+++ b/drivers/ntb/hw/epf/ntb_hw_epf.c
@@ -49,6 +49,7 @@
#define NTB_EPF_COMMAND_TIMEOUT 1000 /* 1 Sec */
enum pci_barno {
+ NO_BAR = -1,
BAR_0,
BAR_1,
BAR_2,
@@ -57,16 +58,26 @@ enum pci_barno {
BAR_5,
};
+enum epf_ntb_bar {
+ BAR_CONFIG,
+ BAR_PEER_SPAD,
+ BAR_DB,
+ BAR_MW1,
+ BAR_MW2,
+ BAR_MW3,
+ BAR_MW4,
+ NTB_BAR_NUM,
+};
+
+#define NTB_EPF_MAX_MW_COUNT (NTB_BAR_NUM - BAR_MW1)
+
struct ntb_epf_dev {
struct ntb_dev ntb;
struct device *dev;
/* Mutex to protect providing commands to NTB EPF */
struct mutex cmd_lock;
- enum pci_barno ctrl_reg_bar;
- enum pci_barno peer_spad_reg_bar;
- enum pci_barno db_reg_bar;
- enum pci_barno mw_bar;
+ const enum pci_barno *barno_map;
unsigned int mw_count;
unsigned int spad_count;
@@ -85,17 +96,6 @@ struct ntb_epf_dev {
#define ntb_ndev(__ntb) container_of(__ntb, struct ntb_epf_dev, ntb)
-struct ntb_epf_data {
- /* BAR that contains both control region and self spad region */
- enum pci_barno ctrl_reg_bar;
- /* BAR that contains peer spad region */
- enum pci_barno peer_spad_reg_bar;
- /* BAR that contains Doorbell region and Memory window '1' */
- enum pci_barno db_reg_bar;
- /* BAR that contains memory windows*/
- enum pci_barno mw_bar;
-};
-
static int ntb_epf_send_command(struct ntb_epf_dev *ndev, u32 command,
u32 argument)
{
@@ -144,7 +144,7 @@ static int ntb_epf_mw_to_bar(struct ntb_epf_dev *ndev, int idx)
return -EINVAL;
}
- return idx + 2;
+ return ndev->barno_map[BAR_MW1 + idx];
}
static int ntb_epf_mw_count(struct ntb_dev *ntb, int pidx)
@@ -413,7 +413,9 @@ static int ntb_epf_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
return -EINVAL;
}
- bar = idx + ndev->mw_bar;
+ bar = ntb_epf_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
mw_size = pci_resource_len(ntb->pdev, bar);
@@ -455,7 +457,9 @@ static int ntb_epf_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
if (idx == 0)
offset = readl(ndev->ctrl_reg + NTB_EPF_MW1_OFFSET);
- bar = idx + ndev->mw_bar;
+ bar = ntb_epf_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
if (base)
*base = pci_resource_start(ndev->ntb.pdev, bar) + offset;
@@ -560,6 +564,11 @@ static int ntb_epf_init_dev(struct ntb_epf_dev *ndev)
ndev->mw_count = readl(ndev->ctrl_reg + NTB_EPF_MW_COUNT);
ndev->spad_count = readl(ndev->ctrl_reg + NTB_EPF_SPAD_COUNT);
+ if (ndev->mw_count > NTB_EPF_MAX_MW_COUNT) {
+ dev_err(dev, "Unsupported MW count: %u\n", ndev->mw_count);
+ return -EINVAL;
+ }
+
return 0;
}
@@ -596,14 +605,15 @@ static int ntb_epf_init_pci(struct ntb_epf_dev *ndev,
dev_warn(&pdev->dev, "Cannot DMA highmem\n");
}
- ndev->ctrl_reg = pci_iomap(pdev, ndev->ctrl_reg_bar, 0);
+ ndev->ctrl_reg = pci_iomap(pdev, ndev->barno_map[BAR_CONFIG], 0);
if (!ndev->ctrl_reg) {
ret = -EIO;
goto err_pci_regions;
}
- if (ndev->peer_spad_reg_bar) {
- ndev->peer_spad_reg = pci_iomap(pdev, ndev->peer_spad_reg_bar, 0);
+ if (ndev->barno_map[BAR_PEER_SPAD] != ndev->barno_map[BAR_CONFIG]) {
+ ndev->peer_spad_reg = pci_iomap(pdev,
+ ndev->barno_map[BAR_PEER_SPAD], 0);
if (!ndev->peer_spad_reg) {
ret = -EIO;
goto err_pci_regions;
@@ -614,7 +624,7 @@ static int ntb_epf_init_pci(struct ntb_epf_dev *ndev,
ndev->peer_spad_reg = ndev->ctrl_reg + spad_off + spad_sz;
}
- ndev->db_reg = pci_iomap(pdev, ndev->db_reg_bar, 0);
+ ndev->db_reg = pci_iomap(pdev, ndev->barno_map[BAR_DB], 0);
if (!ndev->db_reg) {
ret = -EIO;
goto err_pci_regions;
@@ -659,12 +669,7 @@ static void ntb_epf_cleanup_isr(struct ntb_epf_dev *ndev)
static int ntb_epf_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
- enum pci_barno peer_spad_reg_bar = BAR_1;
- enum pci_barno ctrl_reg_bar = BAR_0;
- enum pci_barno db_reg_bar = BAR_2;
- enum pci_barno mw_bar = BAR_2;
struct device *dev = &pdev->dev;
- struct ntb_epf_data *data;
struct ntb_epf_dev *ndev;
int ret;
@@ -675,18 +680,10 @@ static int ntb_epf_pci_probe(struct pci_dev *pdev,
if (!ndev)
return -ENOMEM;
- data = (struct ntb_epf_data *)id->driver_data;
- if (data) {
- peer_spad_reg_bar = data->peer_spad_reg_bar;
- ctrl_reg_bar = data->ctrl_reg_bar;
- db_reg_bar = data->db_reg_bar;
- mw_bar = data->mw_bar;
- }
+ ndev->barno_map = (const enum pci_barno *)id->driver_data;
+ if (!ndev->barno_map)
+ return -EINVAL;
- ndev->peer_spad_reg_bar = peer_spad_reg_bar;
- ndev->ctrl_reg_bar = ctrl_reg_bar;
- ndev->db_reg_bar = db_reg_bar;
- ndev->mw_bar = mw_bar;
ndev->dev = dev;
ntb_epf_init_struct(ndev, pdev);
@@ -730,30 +727,51 @@ static void ntb_epf_pci_remove(struct pci_dev *pdev)
ntb_epf_deinit_pci(ndev);
}
-static const struct ntb_epf_data j721e_data = {
- .ctrl_reg_bar = BAR_0,
- .peer_spad_reg_bar = BAR_1,
- .db_reg_bar = BAR_2,
- .mw_bar = BAR_2,
+static const enum pci_barno j721e_map[NTB_BAR_NUM] = {
+ [BAR_CONFIG] = BAR_0,
+ [BAR_PEER_SPAD] = BAR_1,
+ [BAR_DB] = BAR_2,
+ [BAR_MW1] = BAR_2,
+ [BAR_MW2] = BAR_3,
+ [BAR_MW3] = BAR_4,
+ [BAR_MW4] = BAR_5
};
-static const struct ntb_epf_data mx8_data = {
- .ctrl_reg_bar = BAR_0,
- .peer_spad_reg_bar = BAR_0,
- .db_reg_bar = BAR_2,
- .mw_bar = BAR_4,
+static const enum pci_barno mx8_map[NTB_BAR_NUM] = {
+ [BAR_CONFIG] = BAR_0,
+ [BAR_PEER_SPAD] = BAR_0,
+ [BAR_DB] = BAR_2,
+ [BAR_MW1] = BAR_4,
+ [BAR_MW2] = BAR_5,
+ [BAR_MW3] = NO_BAR,
+ [BAR_MW4] = NO_BAR
+};
+
+static const enum pci_barno rcar_barno[NTB_BAR_NUM] = {
+ [BAR_CONFIG] = BAR_0,
+ [BAR_PEER_SPAD] = BAR_0,
+ [BAR_DB] = BAR_4,
+ [BAR_MW1] = BAR_2,
+ [BAR_MW2] = NO_BAR,
+ [BAR_MW3] = NO_BAR,
+ [BAR_MW4] = NO_BAR,
};
static const struct pci_device_id ntb_epf_pci_tbl[] = {
{
PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E),
.class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
- .driver_data = (kernel_ulong_t)&j721e_data,
+ .driver_data = (kernel_ulong_t)j721e_map,
},
{
PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x0809),
.class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
- .driver_data = (kernel_ulong_t)&mx8_data,
+ .driver_data = (kernel_ulong_t)mx8_map,
+ },
+ {
+ PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0030),
+ .class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
+ .driver_data = (kernel_ulong_t)rcar_barno,
},
{ },
};
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index 544d8a4d2af5..f27df8d7f3b9 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -1041,7 +1041,7 @@ static inline char *idt_get_mw_name(enum idt_mw_type mw_type)
static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
unsigned char *mw_cnt)
{
- struct idt_mw_cfg mws[IDT_MAX_NR_MWS], *ret_mws;
+ struct idt_mw_cfg *mws;
const struct idt_ntb_bar *bars;
enum idt_mw_type mw_type;
unsigned char widx, bidx, en_cnt;
@@ -1049,6 +1049,11 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
int aprt_size;
u32 data;
+ mws = devm_kcalloc(&ndev->ntb.pdev->dev, IDT_MAX_NR_MWS,
+ sizeof(*mws), GFP_KERNEL);
+ if (!mws)
+ return ERR_PTR(-ENOMEM);
+
/* Retrieve the array of the BARs registers */
bars = portdata_tbl[port].bars;
@@ -1103,16 +1108,7 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
}
}
- /* Allocate memory for memory window descriptors */
- ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws),
- GFP_KERNEL);
- if (!ret_mws)
- return ERR_PTR(-ENOMEM);
-
- /* Copy the info of detected memory windows */
- memcpy(ret_mws, mws, (*mw_cnt)*sizeof(*ret_mws));
-
- return ret_mws;
+ return mws;
}
/*
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
index 079b8cd79785..944d10b48ae4 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
@@ -763,7 +763,8 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
return ndev_ntb_debugfs_read(filp, ubuf, count, offp);
else if (pdev_is_gen3(ndev->ntb.pdev))
return ndev_ntb3_debugfs_read(filp, ubuf, count, offp);
- else if (pdev_is_gen4(ndev->ntb.pdev) || pdev_is_gen5(ndev->ntb.pdev))
+ else if (pdev_is_gen4(ndev->ntb.pdev) || pdev_is_gen5(ndev->ntb.pdev) ||
+ pdev_is_gen6(ndev->ntb.pdev))
return ndev_ntb4_debugfs_read(filp, ubuf, count, offp);
return -ENXIO;
@@ -1872,7 +1873,8 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev,
rc = gen3_init_dev(ndev);
if (rc)
goto err_init_dev;
- } else if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) {
+ } else if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev) ||
+ pdev_is_gen6(pdev)) {
ndev->ntb.ops = &intel_ntb4_ops;
rc = intel_ntb_init_pci(ndev, pdev);
if (rc)
@@ -1903,7 +1905,8 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev,
err_register:
ndev_deinit_debugfs(ndev);
if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) ||
- pdev_is_gen4(pdev) || pdev_is_gen5(pdev))
+ pdev_is_gen4(pdev) || pdev_is_gen5(pdev) ||
+ pdev_is_gen6(pdev))
xeon_deinit_dev(ndev);
err_init_dev:
intel_ntb_deinit_pci(ndev);
@@ -1920,7 +1923,8 @@ static void intel_ntb_pci_remove(struct pci_dev *pdev)
ntb_unregister_device(&ndev->ntb);
ndev_deinit_debugfs(ndev);
if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) ||
- pdev_is_gen4(pdev) || pdev_is_gen5(pdev))
+ pdev_is_gen4(pdev) || pdev_is_gen5(pdev) ||
+ pdev_is_gen6(pdev))
xeon_deinit_dev(ndev);
intel_ntb_deinit_pci(ndev);
kfree(ndev);
@@ -2049,6 +2053,8 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = {
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_ICX)},
/* GEN5 PCIe */
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_GNR)},
+ /* GEN6 PCIe */
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_DMR)},
{0}
};
MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl);
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c
index ffcfc3e02c35..a5aa96a31f4a 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen3.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c
@@ -215,6 +215,9 @@ static int gen3_init_ntb(struct intel_ntb_dev *ndev)
}
ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+ /* Make sure we are not using DB's used for link status */
+ if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD)
+ ndev->db_valid_mask &= ~ndev->db_link_mask;
ndev->reg->db_iowrite(ndev->db_valid_mask,
ndev->self_mmio +
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c
index 22cac7975b3c..a221a978a9d5 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen4.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c
@@ -46,6 +46,16 @@ static const struct intel_ntb_alt_reg gen4_b2b_reg = {
.spad = GEN4_EM_SPAD_OFFSET,
};
+static u64 get_ppd0(struct pci_dev *pdev)
+{
+ if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev))
+ return GEN4_PPD0_OFFSET;
+ else if (pdev_is_gen6(pdev))
+ return GEN6_PPD0_OFFSET;
+
+ return ULLONG_MAX;
+}
+
static int gen4_poll_link(struct intel_ntb_dev *ndev)
{
u16 reg_val;
@@ -183,7 +193,7 @@ static enum ntb_topo spr_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
int gen4_init_dev(struct intel_ntb_dev *ndev)
{
struct pci_dev *pdev = ndev->ntb.pdev;
- u32 ppd1/*, ppd0*/;
+ u32 ppd1;
u16 lnkctl;
int rc;
@@ -197,7 +207,7 @@ int gen4_init_dev(struct intel_ntb_dev *ndev)
ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET);
if (pdev_is_ICX(pdev))
ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1);
- else if (pdev_is_SPR(pdev) || pdev_is_gen5(pdev))
+ else if (pdev_is_SPR(pdev) || pdev_is_gen5(pdev) || pdev_is_gen6(pdev))
ndev->ntb.topo = spr_ppd_topo(ndev, ppd1);
dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1,
ntb_topo_string(ndev->ntb.topo));
@@ -432,10 +442,12 @@ static int intel_ntb4_link_enable(struct ntb_dev *ntb,
enum ntb_speed max_speed, enum ntb_width max_width)
{
struct intel_ntb_dev *ndev;
+ struct pci_dev *pdev;
u32 ntb_ctl, ppd0;
u16 lnkctl;
ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+ pdev = ntb->pdev;
dev_dbg(&ntb->pdev->dev,
"Enabling link with max_speed %d max_width %d\n",
@@ -476,12 +488,12 @@ static int intel_ntb4_link_enable(struct ntb_dev *ntb,
iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET);
/* start link training in PPD0 */
- ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET);
+ ppd0 = ioread32(ndev->self_mmio + get_ppd0(pdev));
ppd0 |= GEN4_PPD_LINKTRN;
- iowrite32(ppd0, ndev->self_mmio + GEN4_PPD0_OFFSET);
+ iowrite32(ppd0, ndev->self_mmio + get_ppd0(pdev));
/* make sure link training has started */
- ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET);
+ ppd0 = ioread32(ndev->self_mmio + get_ppd0(pdev));
if (!(ppd0 & GEN4_PPD_LINKTRN)) {
dev_warn(&ntb->pdev->dev, "Link is not training\n");
return -ENXIO;
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h
index f91323eaf5ce..1ba8203d6352 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen4.h
+++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h
@@ -103,6 +103,8 @@
#define NTB_LTR_IDLE_LATSCALE 0x0800 /* 1us scale */
#define NTB_LTR_IDLE_REQMNT 0x8000 /* snoop req enable */
+#define GEN6_PPD0_OFFSET 0xf0d4
+
ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *offp);
int gen4_init_dev(struct intel_ntb_dev *ndev);
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index da4d5fe55bab..0a3a3677f82a 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -71,6 +71,7 @@
#define PCI_DEVICE_ID_INTEL_NTB_B2B_SKX 0x201C
#define PCI_DEVICE_ID_INTEL_NTB_B2B_ICX 0x347e
#define PCI_DEVICE_ID_INTEL_NTB_B2B_GNR 0x0db4
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_DMR 0x7868
/* Ntb control and link status */
#define NTB_CTL_CFG_LOCK BIT(0)
@@ -235,4 +236,9 @@ static inline int pdev_is_gen5(struct pci_dev *pdev)
return pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_GNR;
}
+static inline int pdev_is_gen6(struct pci_dev *pdev)
+{
+ return pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_DMR;
+}
+
#endif
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index ad1786be2554..e38540b92716 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -29,7 +29,7 @@ MODULE_PARM_DESC(use_lut_mws,
"Enable the use of the LUT based memory windows");
#define SWITCHTEC_NTB_MAGIC 0x45CC0001
-#define MAX_MWS 128
+#define MAX_MWS 256
struct shared_mw {
u32 magic;
@@ -288,7 +288,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
if (size != 0 && xlate_pos < 12)
return -EINVAL;
- if (!IS_ALIGNED(addr, BIT_ULL(xlate_pos))) {
+ if (xlate_pos >= 0 && !IS_ALIGNED(addr, BIT_ULL(xlate_pos))) {
/*
* In certain circumstances we can get a buffer that is
* not aligned to its size. (Most of the time
@@ -1202,7 +1202,8 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev)
sndev->mmio_self_ctrl);
sndev->nr_lut_mw = ioread16(&sndev->mmio_self_ctrl->lut_table_entries);
- sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw);
+ if (sndev->nr_lut_mw)
+ sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw);
dev_dbg(&sndev->stdev->dev, "MWs: %d direct, %d lut\n",
sndev->nr_direct_mw, sndev->nr_lut_mw);
@@ -1212,7 +1213,8 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev)
sndev->peer_nr_lut_mw =
ioread16(&sndev->mmio_peer_ctrl->lut_table_entries);
- sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw);
+ if (sndev->peer_nr_lut_mw)
+ sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw);
dev_dbg(&sndev->stdev->dev, "Peer MWs: %d direct, %d lut\n",
sndev->peer_nr_direct_mw, sndev->peer_nr_lut_mw);
@@ -1314,6 +1316,12 @@ static void switchtec_ntb_init_shared(struct switchtec_ntb *sndev)
for (i = 0; i < sndev->nr_lut_mw; i++) {
int idx = sndev->nr_direct_mw + i;
+ if (idx >= MAX_MWS) {
+ dev_err(&sndev->stdev->dev,
+ "Total number of MW cannot be bigger than %d", MAX_MWS);
+ break;
+ }
+
sndev->self_shared->mw_sizes[idx] = LUT_SIZE;
}
}
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
index 6295e55ef85e..6817d504c12a 100644
--- a/drivers/ntb/msi.c
+++ b/drivers/ntb/msi.c
@@ -106,10 +106,10 @@ int ntb_msi_setup_mws(struct ntb_dev *ntb)
if (!ntb->msi)
return -EINVAL;
- msi_lock_descs(&ntb->pdev->dev);
- desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED);
- addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
- msi_unlock_descs(&ntb->pdev->dev);
+ scoped_guard (msi_descs_lock, &ntb->pdev->dev) {
+ desc = msi_first_desc(&ntb->pdev->dev, MSI_DESC_ASSOCIATED);
+ addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
+ }
for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
@@ -289,7 +289,7 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
if (!ntb->msi)
return -EINVAL;
- msi_lock_descs(dev);
+ guard(msi_descs_lock)(dev);
msi_for_each_desc(entry, dev, MSI_DESC_ASSOCIATED) {
if (irq_has_action(entry->irq))
continue;
@@ -307,51 +307,14 @@ int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
if (ret) {
devm_free_irq(&ntb->dev, entry->irq, dev_id);
- goto unlock;
+ return ret;
}
-
- ret = entry->irq;
- goto unlock;
+ return entry->irq;
}
- ret = -ENODEV;
-
-unlock:
- msi_unlock_descs(dev);
- return ret;
+ return -ENODEV;
}
EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
-static int ntbm_msi_callback_match(struct device *dev, void *res, void *data)
-{
- struct ntb_dev *ntb = dev_ntb(dev);
- struct ntb_msi_devres *dr = res;
-
- return dr->ntb == ntb && dr->entry == data;
-}
-
-/**
- * ntbm_msi_free_irq() - free an interrupt
- * @ntb: NTB device context
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * This function should be used to manually free IRQs allocated with
- * ntbm_request_[threaded_]irq().
- */
-void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id)
-{
- struct msi_desc *entry = irq_get_msi_desc(irq);
-
- entry->write_msi_msg = NULL;
- entry->write_msi_msg_data = NULL;
-
- WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release,
- ntbm_msi_callback_match, entry));
-
- devm_free_irq(&ntb->dev, irq, dev_id);
-}
-EXPORT_SYMBOL(ntbm_msi_free_irq);
-
/**
* ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer
* @ntb: NTB device context
@@ -379,36 +342,3 @@ int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
return 0;
}
EXPORT_SYMBOL(ntb_msi_peer_trigger);
-
-/**
- * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt
- * @ntb: NTB device context
- * @peer: Peer index
- * @desc: MSI descriptor data which triggers the interrupt
- * @msi_addr: Physical address to trigger the interrupt
- *
- * This function allows using DMA engines to trigger an interrupt
- * (for example, trigger an interrupt to process the data after
- * sending it). To trigger the interrupt, write @desc.data to the address
- * returned in @msi_addr
- *
- * Return: Zero on success, otherwise a negative error number.
- */
-int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
- struct ntb_msi_desc *desc,
- phys_addr_t *msi_addr)
-{
- int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer;
- phys_addr_t mw_phys_addr;
- int ret;
-
- ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL);
- if (ret)
- return ret;
-
- if (msi_addr)
- *msi_addr = mw_phys_addr + desc->addr_offset;
-
- return 0;
-}
-EXPORT_SYMBOL(ntb_msi_peer_addr);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index a22ea4a4b202..7cabc82305d6 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -54,11 +54,15 @@
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/interrupt.h>
+#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/seq_file.h>
#include <linux/types.h>
#include <linux/uaccess.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
#include "linux/ntb.h"
#include "linux/ntb_transport.h"
@@ -99,6 +103,10 @@ module_param(use_msi, bool, 0644);
MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
#endif
+static bool tx_memcpy_offload;
+module_param(tx_memcpy_offload, bool, 0644);
+MODULE_PARM_DESC(tx_memcpy_offload, "Offload TX memcpy_toio() to a kernel thread");
+
static struct dentry *nt_debugfs_dir;
/* Only two-ports NTB devices are supported */
@@ -112,7 +120,6 @@ struct ntb_queue_entry {
void *buf;
unsigned int len;
unsigned int flags;
- int retries;
int errors;
unsigned int tx_index;
unsigned int rx_index;
@@ -148,7 +155,9 @@ struct ntb_transport_qp {
void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
struct list_head tx_free_q;
+ struct list_head tx_offl_q;
spinlock_t ntb_tx_free_q_lock;
+ spinlock_t ntb_tx_offl_q_lock;
void __iomem *tx_mw;
phys_addr_t tx_mw_phys;
size_t tx_mw_size;
@@ -199,6 +208,9 @@ struct ntb_transport_qp {
int msi_irq;
struct ntb_msi_desc msi_desc;
struct ntb_msi_desc peer_msi_desc;
+
+ struct task_struct *tx_offload_thread;
+ wait_queue_head_t tx_offload_wq;
};
struct ntb_transport_mw {
@@ -241,6 +253,9 @@ struct ntb_transport_ctx {
struct work_struct link_cleanup;
struct dentry *debugfs_node_dir;
+
+ /* Make sure workq of link event be executed serially */
+ struct mutex link_event_lock;
};
enum {
@@ -281,8 +296,14 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset);
static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset);
static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset);
+static int ntb_tx_memcpy_kthread(void *data);
+static inline bool ntb_tx_offload_enabled(struct ntb_transport_qp *qp)
+{
+ return tx_memcpy_offload && qp && qp->tx_offload_thread;
+}
+
static int ntb_transport_bus_match(struct device *dev,
const struct device_driver *drv)
{
@@ -462,104 +483,49 @@ void ntb_transport_unregister_client(struct ntb_transport_client *drv)
}
EXPORT_SYMBOL_GPL(ntb_transport_unregister_client);
-static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
- loff_t *offp)
+static int ntb_qp_debugfs_stats_show(struct seq_file *s, void *v)
{
- struct ntb_transport_qp *qp;
- char *buf;
- ssize_t ret, out_offset, out_count;
-
- qp = filp->private_data;
+ struct ntb_transport_qp *qp = s->private;
if (!qp || !qp->link_is_up)
return 0;
- out_count = 1000;
-
- buf = kmalloc(out_count, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
+ seq_puts(s, "\nNTB QP stats:\n\n");
+
+ seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
+ seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
+ seq_printf(s, "rx_memcpy - \t%llu\n", qp->rx_memcpy);
+ seq_printf(s, "rx_async - \t%llu\n", qp->rx_async);
+ seq_printf(s, "rx_ring_empty - %llu\n", qp->rx_ring_empty);
+ seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
+ seq_printf(s, "rx_err_oflow - \t%llu\n", qp->rx_err_oflow);
+ seq_printf(s, "rx_err_ver - \t%llu\n", qp->rx_err_ver);
+ seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
+ seq_printf(s, "rx_index - \t%u\n", qp->rx_index);
+ seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
+ seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
+
+ seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
+ seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
+ seq_printf(s, "tx_memcpy - \t%llu\n", qp->tx_memcpy);
+ seq_printf(s, "tx_async - \t%llu\n", qp->tx_async);
+ seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
+ seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
+ seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
+ seq_printf(s, "tx_index (H) - \t%u\n", qp->tx_index);
+ seq_printf(s, "RRI (T) - \t%u\n", qp->remote_rx_info->entry);
+ seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
+ seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
+ seq_putc(s, '\n');
+
+ seq_printf(s, "Using TX DMA - \t%s\n", qp->tx_dma_chan ? "Yes" : "No");
+ seq_printf(s, "Using RX DMA - \t%s\n", qp->rx_dma_chan ? "Yes" : "No");
+ seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
+ seq_putc(s, '\n');
- out_offset = 0;
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "\nNTB QP stats:\n\n");
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_bytes - \t%llu\n", qp->rx_bytes);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_pkts - \t%llu\n", qp->rx_pkts);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_memcpy - \t%llu\n", qp->rx_memcpy);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_async - \t%llu\n", qp->rx_async);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_ring_empty - %llu\n", qp->rx_ring_empty);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_err_oflow - \t%llu\n", qp->rx_err_oflow);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_err_ver - \t%llu\n", qp->rx_err_ver);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_buff - \t0x%p\n", qp->rx_buff);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_index - \t%u\n", qp->rx_index);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_max_entry - \t%u\n", qp->rx_max_entry);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
-
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_bytes - \t%llu\n", qp->tx_bytes);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_pkts - \t%llu\n", qp->tx_pkts);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_memcpy - \t%llu\n", qp->tx_memcpy);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_async - \t%llu\n", qp->tx_async);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_ring_full - \t%llu\n", qp->tx_ring_full);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_mw - \t0x%p\n", qp->tx_mw);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_index (H) - \t%u\n", qp->tx_index);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "RRI (T) - \t%u\n",
- qp->remote_rx_info->entry);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "tx_max_entry - \t%u\n", qp->tx_max_entry);
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "free tx - \t%u\n",
- ntb_transport_tx_free_entry(qp));
-
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "\n");
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "Using TX DMA - \t%s\n",
- qp->tx_dma_chan ? "Yes" : "No");
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "Using RX DMA - \t%s\n",
- qp->rx_dma_chan ? "Yes" : "No");
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "QP Link - \t%s\n",
- qp->link_is_up ? "Up" : "Down");
- out_offset += scnprintf(buf + out_offset, out_count - out_offset,
- "\n");
-
- if (out_offset > out_count)
- out_offset = out_count;
-
- ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
- kfree(buf);
- return ret;
-}
-
-static const struct file_operations ntb_qp_debugfs_stats = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .read = debugfs_read,
-};
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ntb_qp_debugfs_stats);
static void ntb_list_add(spinlock_t *lock, struct list_head *entry,
struct list_head *list)
@@ -793,13 +759,13 @@ static void ntb_transport_msi_desc_changed(void *data)
static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
{
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
- struct pci_dev *pdev = nt->ndev->pdev;
+ struct device *dma_dev = ntb_get_dma_dev(nt->ndev);
if (!mw->virt_addr)
return;
ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
- dma_free_coherent(&pdev->dev, mw->alloc_size,
+ dma_free_coherent(dma_dev, mw->alloc_size,
mw->alloc_addr, mw->dma_addr);
mw->xlat_size = 0;
mw->buff_size = 0;
@@ -869,7 +835,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
resource_size_t size)
{
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
- struct pci_dev *pdev = nt->ndev->pdev;
+ struct device *dma_dev = ntb_get_dma_dev(nt->ndev);
size_t xlat_size, buff_size;
resource_size_t xlat_align;
resource_size_t xlat_align_size;
@@ -898,12 +864,12 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
mw->buff_size = buff_size;
mw->alloc_size = buff_size;
- rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+ rc = ntb_alloc_mw_buffer(mw, dma_dev, xlat_align);
if (rc) {
mw->alloc_size *= 2;
- rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+ rc = ntb_alloc_mw_buffer(mw, dma_dev, xlat_align);
if (rc) {
- dev_err(&pdev->dev,
+ dev_err(dma_dev,
"Unable to alloc aligned MW buff\n");
mw->xlat_size = 0;
mw->buff_size = 0;
@@ -916,7 +882,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
mw->xlat_size);
if (rc) {
- dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
+ dev_err(dma_dev, "Unable to set mw%d translation", num_mw);
ntb_free_mw(nt, num_mw);
return -EIO;
}
@@ -1024,6 +990,7 @@ static void ntb_transport_link_cleanup_work(struct work_struct *work)
struct ntb_transport_ctx *nt =
container_of(work, struct ntb_transport_ctx, link_cleanup);
+ guard(mutex)(&nt->link_event_lock);
ntb_transport_link_cleanup(nt);
}
@@ -1047,6 +1014,8 @@ static void ntb_transport_link_work(struct work_struct *work)
u32 val;
int rc = 0, i, spad;
+ guard(mutex)(&nt->link_event_lock);
+
/* send the local info, in the opposite order of the way we read it */
if (nt->use_msi) {
@@ -1229,15 +1198,15 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
qp->tx_max_entry = tx_size / qp->tx_max_frame;
if (nt->debugfs_node_dir) {
- char debugfs_name[4];
+ char debugfs_name[8];
- snprintf(debugfs_name, 4, "qp%d", qp_num);
+ snprintf(debugfs_name, sizeof(debugfs_name), "qp%d", qp_num);
qp->debugfs_dir = debugfs_create_dir(debugfs_name,
nt->debugfs_node_dir);
qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR,
qp->debugfs_dir, qp,
- &ntb_qp_debugfs_stats);
+ &ntb_qp_debugfs_stats_fops);
} else {
qp->debugfs_dir = NULL;
qp->debugfs_stats = NULL;
@@ -1248,11 +1217,13 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
spin_lock_init(&qp->ntb_rx_q_lock);
spin_lock_init(&qp->ntb_tx_free_q_lock);
+ spin_lock_init(&qp->ntb_tx_offl_q_lock);
INIT_LIST_HEAD(&qp->rx_post_q);
INIT_LIST_HEAD(&qp->rx_pend_q);
INIT_LIST_HEAD(&qp->rx_free_q);
INIT_LIST_HEAD(&qp->tx_free_q);
+ INIT_LIST_HEAD(&qp->tx_offl_q);
tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db,
(unsigned long)qp);
@@ -1353,7 +1324,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
qp_count = ilog2(qp_bitmap);
if (nt->use_msi) {
qp_count -= 1;
- nt->msi_db_mask = 1 << qp_count;
+ nt->msi_db_mask = BIT_ULL(qp_count);
ntb_db_clear_mask(ndev, nt->msi_db_mask);
}
@@ -1387,6 +1358,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
goto err2;
}
+ mutex_init(&nt->link_event_lock);
INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup_work);
@@ -1563,15 +1535,15 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
goto err;
unmap->len = len;
- unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
- pay_off, len, DMA_TO_DEVICE);
+ unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(offset),
+ len, DMA_TO_DEVICE, 0);
if (dma_mapping_error(device->dev, unmap->addr[0]))
goto err_get_unmap;
unmap->to_cnt = 1;
- unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
- buff_off, len, DMA_FROM_DEVICE);
+ unmap->addr[1] = dma_map_phys(device->dev, virt_to_phys(buf),
+ len, DMA_FROM_DEVICE, 0);
if (dma_mapping_error(device->dev, unmap->addr[1]))
goto err_get_unmap;
@@ -1623,9 +1595,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
if (res < 0)
goto err;
- if (!entry->retries)
- qp->rx_async++;
-
+ qp->rx_async++;
return;
err:
@@ -1780,6 +1750,13 @@ static void ntb_tx_copy_callback(void *data,
iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
+ /*
+ * Make DONE flag visible before DB/MSI. WC + posted MWr may reorder
+ * across iATU/bridge (platform-dependent). Order and flush here.
+ */
+ dma_mb();
+ ioread32(&hdr->flags);
+
if (qp->use_msi)
ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
else
@@ -1800,14 +1777,15 @@ static void ntb_tx_copy_callback(void *data,
ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
}
-static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
+static void ntb_memcpy_tx_on_stack(struct ntb_queue_entry *entry, void __iomem *offset)
{
-#ifdef ARCH_HAS_NOCACHE_UACCESS
+#ifdef copy_to_nontemporal
/*
* Using non-temporal mov to improve performance on non-cached
- * writes, even though we aren't actually copying from user space.
+ * writes. This only works if __iomem is strictly memory-like,
+ * but that is the case on x86-64
*/
- __copy_from_user_inatomic_nocache(offset, entry->buf, entry->len);
+ copy_to_nontemporal(offset, entry->buf, entry->len);
#else
memcpy_toio(offset, entry->buf, entry->len);
#endif
@@ -1818,6 +1796,54 @@ static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
ntb_tx_copy_callback(entry, NULL);
}
+static int ntb_tx_memcpy_kthread(void *data)
+{
+ struct ntb_transport_qp *qp = data;
+ struct ntb_queue_entry *entry, *tmp;
+ const int resched_nr = 64;
+ LIST_HEAD(local_list);
+ void __iomem *offset;
+ int processed = 0;
+
+ while (!kthread_should_stop()) {
+ spin_lock_irq(&qp->ntb_tx_offl_q_lock);
+ wait_event_interruptible_lock_irq_timeout(qp->tx_offload_wq,
+ kthread_should_stop() ||
+ !list_empty(&qp->tx_offl_q),
+ qp->ntb_tx_offl_q_lock, 5*HZ);
+ list_splice_tail_init(&qp->tx_offl_q, &local_list);
+ spin_unlock_irq(&qp->ntb_tx_offl_q_lock);
+
+ list_for_each_entry_safe(entry, tmp, &local_list, entry) {
+ list_del(&entry->entry);
+ offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index;
+ ntb_memcpy_tx_on_stack(entry, offset);
+ if (++processed >= resched_nr) {
+ cond_resched();
+ processed = 0;
+ }
+ }
+ cond_resched();
+ }
+
+ return 0;
+}
+
+static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
+{
+ struct ntb_transport_qp *qp = entry->qp;
+
+ if (WARN_ON_ONCE(!qp))
+ return;
+
+ if (ntb_tx_offload_enabled(qp)) {
+ ntb_list_add(&qp->ntb_tx_offl_q_lock, &entry->entry,
+ &qp->tx_offl_q);
+ wake_up(&qp->tx_offload_wq);
+ } else
+ ntb_memcpy_tx_on_stack(entry, offset);
+}
+
static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
struct ntb_queue_entry *entry)
{
@@ -1844,8 +1870,8 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
goto err;
unmap->len = len;
- unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
- buff_off, len, DMA_TO_DEVICE);
+ unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(buf),
+ len, DMA_TO_DEVICE, 0);
if (dma_mapping_error(device->dev, unmap->addr[0]))
goto err_get_unmap;
@@ -1890,6 +1916,9 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
entry->tx_hdr = hdr;
+ WARN_ON_ONCE(!ntb_transport_tx_free_entry(qp));
+ WRITE_ONCE(qp->tx_index, (qp->tx_index + 1) % qp->tx_max_entry);
+
iowrite32(entry->len, &hdr->len);
iowrite32((u32)qp->tx_pkts, &hdr->ver);
@@ -1903,9 +1932,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
if (res < 0)
goto err;
- if (!entry->retries)
- qp->tx_async++;
-
+ qp->tx_async++;
return;
err:
@@ -1932,9 +1959,6 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
ntb_async_tx(qp, entry);
- qp->tx_index++;
- qp->tx_index %= qp->tx_max_entry;
-
qp->tx_pkts++;
return 0;
@@ -2031,6 +2055,20 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
qp->tx_handler = handlers->tx_handler;
qp->event_handler = handlers->event_handler;
+ init_waitqueue_head(&qp->tx_offload_wq);
+ if (tx_memcpy_offload) {
+ qp->tx_offload_thread = kthread_run(ntb_tx_memcpy_kthread, qp,
+ "ntb-txcpy/%s/%u",
+ pci_name(ndev->pdev), qp->qp_num);
+ if (IS_ERR(qp->tx_offload_thread)) {
+ dev_warn(&nt->ndev->dev,
+ "tx memcpy offload thread creation failed: %ld; falling back to inline copy\n",
+ PTR_ERR(qp->tx_offload_thread));
+ qp->tx_offload_thread = NULL;
+ }
+ } else
+ qp->tx_offload_thread = NULL;
+
dma_cap_zero(dma_mask);
dma_cap_set(DMA_MEMCPY, dma_mask);
@@ -2138,6 +2176,11 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
qp->active = false;
+ if (qp->tx_offload_thread) {
+ kthread_stop(qp->tx_offload_thread);
+ qp->tx_offload_thread = NULL;
+ }
+
if (qp->tx_dma_chan) {
struct dma_chan *chan = qp->tx_dma_chan;
/* Putting the dma_chan to NULL will force any new traffic to be
@@ -2201,6 +2244,9 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
kfree(entry);
+ while ((entry = ntb_list_rm(&qp->ntb_tx_offl_q_lock, &qp->tx_offl_q)))
+ kfree(entry);
+
qp->transport->qp_bitmap_free |= qp_bit;
dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num);
@@ -2266,7 +2312,6 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
entry->buf = data;
entry->len = len;
entry->flags = 0;
- entry->retries = 0;
entry->errors = 0;
entry->rx_index = 0;
@@ -2316,7 +2361,6 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
entry->len = len;
entry->flags = 0;
entry->errors = 0;
- entry->retries = 0;
entry->tx_index = 0;
rc = ntb_process_tx(qp, entry);
diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c
index 4e18e08776c9..f52d409ba6d2 100644
--- a/drivers/ntb/test/ntb_msi_test.c
+++ b/drivers/ntb/test/ntb_msi_test.c
@@ -164,7 +164,7 @@ static void ntb_msit_db_event(void *ctx, int vec)
if (irq_count == -1)
continue;
- desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC);
+ desc = kzalloc_objs(*desc, irq_count, GFP_ATOMIC);
if (!desc)
continue;
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 72bc1d017a46..dfd175f79e8f 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -839,10 +839,8 @@ static int perf_copy_chunk(struct perf_thread *pthr,
dma_set_unmap(tx, unmap);
ret = dma_submit_error(dmaengine_submit(tx));
- if (ret) {
- dmaengine_unmap_put(unmap);
+ if (ret)
goto err_free_resource;
- }
dmaengine_unmap_put(unmap);
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 8aeca7914050..1c1c74f4ff2d 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -284,8 +284,7 @@ static struct pp_ctx *pp_create_data(struct ntb_dev *ntb)
pp->ntb = ntb;
atomic_set(&pp->count, 0);
spin_lock_init(&pp->lock);
- hrtimer_init(&pp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- pp->timer.function = pp_timer_func;
+ hrtimer_setup(&pp->timer, pp_timer_func, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
return pp;
}
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 641cb7e05a47..06881047f5bc 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -936,7 +936,7 @@ static ssize_t tool_peer_mw_trans_write(struct file *filep,
buf[buf_size] = '\0';
- n = sscanf(buf, "%lli:%zi", &addr, &wsize);
+ n = sscanf(buf, "%llu:%zu", &addr, &wsize);
if (n != 2)
return -EINVAL;