diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-05 23:56:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-05 23:56:11 -0400 |
commit | 32199ec3cf8db2de1709cec9339844555b55c16e (patch) | |
tree | b2c6d465919cd114b9bdafafbf66d44e7bb0e35e /drivers | |
parent | a02040d8d5d533773f98e02e1a8e56db5fa7a363 (diff) | |
parent | 95f1464f695055c72de6044d7c8a2a7a1e0c7ea2 (diff) | |
download | lwn-32199ec3cf8db2de1709cec9339844555b55c16e.tar.gz lwn-32199ec3cf8db2de1709cec9339844555b55c16e.zip |
Merge tag 'ntb-4.8' of git://github.com/jonmason/ntb
Pull NTB updates from Jon Mason:
"NTB bug fixes for the ntb_tool and ntb_perf, and improvements to the
ntb_perf and ntb_pingpong for increased debugability.
Also, modification to the ntb_transport layer to increase/decrease
the number of transport entries depending on the ring size"
* tag 'ntb-4.8' of git://github.com/jonmason/ntb:
NTB: ntb_hw_intel: use local variable pdev
NTB: ntb_hw_intel: show BAR size in debugfs info
ntb_test: Add a selftest script for the NTB subsystem
ntb_perf: clear link_is_up flag when the link goes down.
ntb_pingpong: Add a debugfs file to get the ping count
ntb_tool: Add link status and files to debugfs
ntb_tool: Postpone memory window initialization for the user
ntb_perf: Wait for link before running test
ntb_perf: Return results by reading the run file
ntb_perf: Improve thread handling to increase robustness
ntb_perf: Schedule based on time not on performance
ntb_transport: Check the number of spads the hardware supports
ntb_tool: Add memory window debug support
ntb_perf: Allow limiting the size of the memory windows
NTB: allocate number transport entries depending on size of ring size
ntb_tool: BUG: Ensure the buffer size is large enough to return all spads
ntb_tool: Fix infinite loop bug when writing spad/peer_spad file
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_intel.c | 49 | ||||
-rw-r--r-- | drivers/ntb/ntb_transport.c | 38 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_perf.c | 240 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_pingpong.c | 62 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_tool.c | 459 |
5 files changed, 749 insertions, 99 deletions
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 40d04ef5da9e..0d5c29ae51de 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct intel_ntb_dev *ndev; + struct pci_dev *pdev; void __iomem *mmio; char *buf; size_t buf_size; ssize_t ret, off; - union { u64 v64; u32 v32; u16 v16; } u; + union { u64 v64; u32 v32; u16 v16; u8 v8; } u; ndev = filp->private_data; + pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; buf_size = min(count, 0x800ul); @@ -632,6 +634,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "Doorbell Bell -\t\t%#llx\n", u.v64); off += scnprintf(buf + off, buf_size - off, + "\nNTB Window Size:\n"); + + pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR5SZ %hhu\n", u.v8); + } + + pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR5SZ %hhu\n", u.v8); + } + + off += scnprintf(buf + off, buf_size - off, "\nNTB Incoming XLAT:\n"); u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2)); @@ -669,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "LMT45 -\t\t\t%#018llx\n", u.v64); } - if (pdev_is_xeon(ndev->ntb.pdev)) { + if (pdev_is_xeon(pdev)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) { off += scnprintf(buf + off, buf_size - off, "\nNTB Outgoing B2B XLAT:\n"); @@ -750,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "\nXEON NTB Hardware Errors:\n"); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_DEVSTS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "DEVSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_LINK_STATUS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "LNKSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_UNCERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "UNCERRSTS -\t\t%#06x\n", u.v32); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_CORERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "CORERRSTS -\t\t%#06x\n", u.v32); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 2ef9d9130864..d5c5894f252e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -153,6 +153,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + unsigned int rx_alloc_entry; dma_cookie_t last_cookie; struct tasklet_struct rxc_db_work; @@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n", qp->rx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, { struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; struct ntb_transport_mw *mw; + struct ntb_dev *ndev = nt->ndev; + struct ntb_queue_entry *entry; unsigned int rx_size, num_qps_mw; unsigned int mw_num, mw_count, qp_count; unsigned int i; + int node; mw_count = nt->mw_count; qp_count = nt->qp_count; @@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, qp->rx_max_entry = rx_size / qp->rx_max_frame; qp->rx_index = 0; + /* + * Checking to see if we have more entries than the default. + * We should add additional entries if that is the case so we + * can be in sync with the transport frames. + */ + node = dev_to_node(&ndev->dev); + for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) { + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); + if (!entry) + return -ENOMEM; + + entry->qp = qp; + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, + &qp->rx_free_q); + qp->rx_alloc_entry++; + } + qp->remote_rx_info->entry = qp->rx_max_entry - 1; /* setup the hdr offsets with 0's */ @@ -1037,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) int node; int rc, i; + mw_count = ntb_mw_count(ndev); + if (ntb_spad_count(ndev) < (NUM_MWS + 1 + mw_count * 2)) { + dev_err(&ndev->dev, "Not enough scratch pad registers for %s", + NTB_TRANSPORT_NAME); + return -EIO; + } + if (ntb_db_is_unsafe(ndev)) dev_dbg(&ndev->dev, "doorbell is unsafe, proceed anyway...\n"); @@ -1052,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) nt->ndev = ndev; - mw_count = ntb_mw_count(ndev); - nt->mw_count = mw_count; nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), @@ -1722,8 +1750,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev, ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); } + qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES; - for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + for (i = 0; i < qp->tx_max_entry; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err2; @@ -1744,6 +1773,7 @@ err2: while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); err1: + qp->rx_alloc_entry = 0; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->tx_dma_chan) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 8dfce9c9aad0..6a50f20bf1cd 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -58,6 +58,7 @@ #include <linux/delay.h> #include <linux/sizes.h> #include <linux/ntb.h> +#include <linux/mutex.h> #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" @@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); static struct dentry *perf_debugfs_dir; +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); + static unsigned int seg_order = 19; /* 512K */ module_param(seg_order, uint, 0644); MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); @@ -117,6 +122,10 @@ struct pthr_ctx { int dma_prep_err; int src_idx; void *srcs[MAX_SRCS]; + wait_queue_head_t *wq; + int status; + u64 copied; + u64 diff_us; }; struct perf_ctx { @@ -124,23 +133,23 @@ struct perf_ctx { spinlock_t db_lock; struct perf_mw mw; bool link_is_up; - struct work_struct link_cleanup; struct delayed_work link_work; + wait_queue_head_t link_wq; struct dentry *debugfs_node_dir; struct dentry *debugfs_run; struct dentry *debugfs_threads; u8 perf_threads; - bool run; + /* mutex ensures only one set of threads run at once */ + struct mutex run_mutex; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; + atomic_t tdone; }; enum { VERSION = 0, MW_SZ_HIGH, MW_SZ_LOW, - SPAD_MSG, - SPAD_ACK, MAX_SPAD }; @@ -148,10 +157,16 @@ static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; - if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) + if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { schedule_delayed_work(&perf->link_work, 2*HZ); - else - schedule_work(&perf->link_cleanup); + } else { + dev_dbg(&perf->ntb->pdev->dev, "link down\n"); + + if (!perf->link_is_up) + cancel_delayed_work_sync(&perf->link_work); + + perf->link_is_up = false; + } } static void perf_db_event(void *ctx, int vec) @@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, char __iomem *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; + unsigned long last_sleep = jiffies; chunks = div64_u64(win_size, buf_size); total_chunks = div64_u64(total, buf_size); @@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, } else tmp += buf_size; - /* Probably should schedule every 4GB to prevent soft hang. */ - if (((copied % SZ_4G) == 0) && !use_dma) { + /* Probably should schedule every 5s to prevent soft hang. */ + if (unlikely((jiffies - last_sleep) > 5 * HZ)) { + last_sleep = jiffies; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } + + if (unlikely(kthread_should_stop())) + break; } if (use_dma) { - pr_info("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) + pr_debug("%s: All DMA descriptors submitted\n", current->comm); + while (atomic_read(&pctx->dma_sync) != 0) { + if (kthread_should_stop()) + break; msleep(20); + } } kstop = ktime_get(); kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); - pr_info("%s: copied %llu bytes\n", current->comm, copied); + pr_debug("%s: copied %llu bytes\n", current->comm, copied); - pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); + pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); perf = div64_u64(copied, diff_us); - pr_info("%s: MBytes/s: %llu\n", current->comm, perf); + pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); + + pctx->copied = copied; + pctx->diff_us = diff_us; return 0; } @@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data) int rc, node, i; struct dma_chan *dma_chan = NULL; - pr_info("kthread %s starting...\n", current->comm); + pr_debug("kthread %s starting...\n", current->comm); node = dev_to_node(&pdev->dev); @@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data) pctx->srcs[i] = NULL; } - return 0; + atomic_inc(&perf->tdone); + wake_up(pctx->wq); + rc = 0; + goto done; err: for (i = 0; i < MAX_SRCS; i++) { @@ -402,6 +431,16 @@ err: pctx->dma_chan = NULL; } +done: + /* Wait until we are told to stop */ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return rc; } @@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work) dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); size = perf->mw.phys_size; + + if (max_mw_size && size > max_mw_size) + size = max_mw_size; + ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); @@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work) goto out1; perf->link_is_up = true; + wake_up(&perf->link_wq); return; @@ -508,18 +552,6 @@ out: msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); } -static void perf_link_cleanup(struct work_struct *work) -{ - struct perf_ctx *perf = container_of(work, - struct perf_ctx, - link_cleanup); - - dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); - - if (!perf->link_is_up) - cancel_delayed_work_sync(&perf->link_work); -} - static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) { struct perf_mw *mw; @@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, { struct perf_ctx *perf = filp->private_data; char *buf; - ssize_t ret, out_offset; + ssize_t ret, out_off = 0; + struct pthr_ctx *pctx; + int i; + u64 rate; if (!perf) return 0; - buf = kmalloc(64, GFP_KERNEL); + buf = kmalloc(1024, GFP_KERNEL); if (!buf) return -ENOMEM; - out_offset = snprintf(buf, 64, "%d\n", perf->run); - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + + if (mutex_is_locked(&perf->run_mutex)) { + out_off = snprintf(buf, 64, "running\n"); + goto read_from_buf; + } + + for (i = 0; i < MAX_THREADS; i++) { + pctx = &perf->pthr_ctx[i]; + + if (pctx->status == -ENODATA) + break; + + if (pctx->status) { + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: error %d\n", i, + pctx->status); + continue; + } + + rate = div64_u64(pctx->copied, pctx->diff_us); + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", + i, pctx->copied, pctx->diff_us, rate); + } + +read_from_buf: + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); kfree(buf); return ret; @@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf) struct pthr_ctx *pctx; int i; - perf->run = false; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { - kthread_stop(pctx->thread); + pctx->status = kthread_stop(pctx->thread); pctx->thread = NULL; } } } +static void perf_clear_thread_status(struct perf_ctx *perf) +{ + int i; + + for (i = 0; i < MAX_THREADS; i++) + perf->pthr_ctx[i].status = -ENODATA; +} + static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; + DECLARE_WAIT_QUEUE_HEAD(wq); - if (!perf->link_is_up) - return 0; + if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) + return -ENOLINK; if (perf->perf_threads == 0) - return 0; + return -EINVAL; - if (atomic_read(&perf->tsync) == 0) - perf->run = false; + if (!mutex_trylock(&perf->run_mutex)) + return -EBUSY; - if (perf->run) - threads_cleanup(perf); - else { - perf->run = true; + perf_clear_thread_status(perf); - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); + } - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } + /* no greater than 1M */ + if (seg_order > MAX_SEG_ORDER) { + seg_order = MAX_SEG_ORDER; + pr_info("Fix seg_order to %u\n", seg_order); + } - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } - node = dev_to_node(&perf->ntb->pdev->dev); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; - - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else - wake_up_process(pctx->thread); - - if (perf->run == false) - return -ENXIO; - } + node = dev_to_node(&perf->ntb->pdev->dev); + atomic_set(&perf->tdone, 0); + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->wq = &wq; + pctx->thread = + kthread_create_on_node(ntb_perf_thread, + (void *)pctx, + node, "ntb_perf %d", i); + if (IS_ERR(pctx->thread)) { + pctx->thread = NULL; + goto err; + } else { + wake_up_process(pctx->thread); + } } + wait_event_interruptible(wq, + atomic_read(&perf->tdone) == perf->perf_threads); + + threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return -ENXIO; } @@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) int node; int rc = 0; + if (ntb_spad_count(ntb) < MAX_SPAD) { + dev_err(&ntb->dev, "Not enough scratch pad registers for %s", + DRIVER_NAME); + return -EIO; + } + node = dev_to_node(&pdev->dev); perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); @@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf->ntb = ntb; perf->perf_threads = 1; atomic_set(&perf->tsync, 0); - perf->run = false; + mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); + init_waitqueue_head(&perf->link_wq); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); - INIT_WORK(&perf->link_cleanup, perf_link_cleanup); rc = ntb_set_ctx(ntb, perf, &perf_ops); if (rc) @@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) if (rc) goto err_ctx; + perf_clear_thread_status(perf); + return 0; err_ctx: cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); kfree(perf); err_perf: return rc; @@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + mutex_lock(&perf->run_mutex); + cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); ntb_clear_ctx(ntb); ntb_link_disable(ntb); diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index fe1600566981..7d311799fca1 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -61,6 +61,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/debugfs.h> #include <linux/ntb.h> @@ -96,8 +97,13 @@ struct pp_ctx { spinlock_t db_lock; struct timer_list db_timer; unsigned long db_delay; + struct dentry *debugfs_node_dir; + struct dentry *debugfs_count; + atomic_t count; }; +static struct dentry *pp_debugfs_dir; + static void pp_ping(unsigned long ctx) { struct pp_ctx *pp = (void *)ctx; @@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec) dev_dbg(&pp->ntb->dev, "Pong vec %d bits %#llx\n", vec, db_bits); + atomic_inc(&pp->count); } spin_unlock_irqrestore(&pp->db_lock, irqflags); } +static int pp_debugfs_setup(struct pp_ctx *pp) +{ + struct pci_dev *pdev = pp->ntb->pdev; + + if (!pp_debugfs_dir) + return -ENODEV; + + pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev), + pp_debugfs_dir); + if (!pp->debugfs_node_dir) + return -ENODEV; + + pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR, + pp->debugfs_node_dir, + &pp->count); + if (!pp->debugfs_count) + return -ENODEV; + + return 0; +} + static const struct ntb_ctx_ops pp_ops = { .link_event = pp_link_event, .db_event = pp_db_event, @@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client, pp->ntb = ntb; pp->db_bits = 0; + atomic_set(&pp->count, 0); spin_lock_init(&pp->db_lock); setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); pp->db_delay = msecs_to_jiffies(delay_ms); @@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client, if (rc) goto err_ctx; + rc = pp_debugfs_setup(pp); + if (rc) + goto err_ctx; + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); ntb_link_event(ntb); @@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client, { struct pp_ctx *pp = ntb->ctx; + debugfs_remove_recursive(pp->debugfs_node_dir); + ntb_clear_ctx(ntb); del_timer_sync(&pp->db_timer); ntb_link_disable(ntb); @@ -247,4 +282,29 @@ static struct ntb_client pp_client = { .remove = pp_remove, }, }; -module_ntb_client(pp_client); + +static int __init pp_init(void) +{ + int rc; + + if (debugfs_initialized()) + pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = ntb_register_client(&pp_client); + if (rc) + goto err_client; + + return 0; + +err_client: + debugfs_remove_recursive(pp_debugfs_dir); + return rc; +} +module_init(pp_init); + +static void __exit pp_exit(void) +{ + ntb_unregister_client(&pp_client); + debugfs_remove_recursive(pp_debugfs_dir); +} +module_exit(pp_exit); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 6f5dc6ca673d..61bf2ef87e0e 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -59,6 +59,12 @@ * * Eg: check if clearing the doorbell mask generates an interrupt. * + * # Check the link status + * root@self# cat $DBG_DIR/link + * + * # Block until the link is up + * root@self# echo Y > $DBG_DIR/link_event + * * # Set the doorbell mask * root@self# echo 's 1' > $DBG_DIR/mask * @@ -79,6 +85,13 @@ * root@self# cat $DBG_DIR/spad * * Observe that spad 0 and 1 have the values set by the peer. + * + * # Check the memory window translation info + * cat $DBG_DIR/peer_trans0 + * + * # Setup a 16k memory window buffer + * echo 16384 > $DBG_DIR/peer_trans0 + * */ #include <linux/init.h> @@ -89,6 +102,7 @@ #include <linux/dma-mapping.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/ntb.h> @@ -105,11 +119,27 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +#define MAX_MWS 16 + static struct dentry *tool_dbgfs; +struct tool_mw { + int idx; + struct tool_ctx *tc; + resource_size_t win_size; + resource_size_t size; + u8 __iomem *local; + u8 *peer; + dma_addr_t peer_dma; + struct dentry *peer_dbg_file; +}; + struct tool_ctx { struct ntb_dev *ntb; struct dentry *dbgfs; + wait_queue_head_t link_wq; + int mw_count; + struct tool_mw mws[MAX_MWS]; }; #define SPAD_FNAME_SIZE 0x10 @@ -135,6 +165,8 @@ static void tool_link_event(void *ctx) dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", up ? "up" : "down", speed, width); + + wake_up(&tc->link_wq); } static void tool_db_event(void *ctx, int vec) @@ -239,7 +271,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, if (!spad_read_fn) return -EINVAL; - buf_size = min_t(size_t, size, 0x100); + spad_count = ntb_spad_count(tc->ntb); + + /* + * We multiply the number of spads by 15 to get the buffer size + * this is from 3 for the %d, 10 for the largest hex value + * (0x00000000) and 2 for the tab and line feed. + */ + buf_size = min_t(size_t, size, spad_count * 15); buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) @@ -247,7 +286,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, pos = 0; - spad_count = ntb_spad_count(tc->ntb); for (i = 0; i < spad_count; ++i) { pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", i, spad_read_fn(tc->ntb, i)); @@ -268,7 +306,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, { int spad_idx; u32 spad_val; - char *buf; + char *buf, *buf_ptr; int pos, n; ssize_t rc; @@ -288,14 +326,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, } buf[size] = 0; - - n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); + buf_ptr = buf; + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); while (n == 2) { + buf_ptr += pos; rc = spad_write_fn(tc->ntb, spad_idx, spad_val); if (rc) break; - n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); } if (n < 0) @@ -442,8 +481,384 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops, tool_peer_spad_read, tool_peer_spad_write); +static ssize_t tool_link_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[3]; + + buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + + return simple_read_from_buffer(ubuf, size, offp, buf, 2); +} + +static ssize_t tool_link_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (val) + rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + else + rc = ntb_link_disable(tc->ntb); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_fops, + tool_link_read, + tool_link_write); + +static ssize_t tool_link_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (wait_event_interruptible(tc->link_wq, + ntb_link_is_up(tc->ntb, NULL, NULL) == val)) + return -ERESTART; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_event_fops, + NULL, + tool_link_event_write); + +static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (mw->local == NULL) + return -EIO; + if (pos < 0) + return -EINVAL; + if (pos >= mw->win_size || !size) + return 0; + if (size > mw->win_size - pos) + size = mw->win_size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy_fromio(buf, mw->local + pos, size); + rc = copy_to_user(ubuf, buf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + +err_free: + kfree(buf); + + return rc; +} + +static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (pos < 0) + return -EINVAL; + if (pos >= mw->win_size || !size) + return 0; + if (size > mw->win_size - pos) + size = mw->win_size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = copy_from_user(buf, ubuf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + + memcpy_toio(mw->local + pos, buf, size); + +err_free: + kfree(buf); + + return rc; +} + +static TOOL_FOPS_RDWR(tool_mw_fops, + tool_mw_read, + tool_mw_write); + +static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + if (!mw->peer) + return -ENXIO; + + return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size); +} + +static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + if (!mw->peer) + return -ENXIO; + + return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size); +} + +static TOOL_FOPS_RDWR(tool_peer_mw_fops, + tool_peer_mw_read, + tool_peer_mw_write); + +static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size) +{ + int rc; + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + resource_size_t size, align, align_size; + char buf[16]; + + if (mw->peer) + return 0; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align, + &align_size); + if (rc) + return rc; + + mw->size = min_t(resource_size_t, req_size, size); + mw->size = round_up(mw->size, align); + mw->size = round_up(mw->size, align_size); + mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, + &mw->peer_dma, GFP_KERNEL); + + if (!mw->peer) + return -ENOMEM; + + rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size); + if (rc) + goto err_free_dma; + + snprintf(buf, sizeof(buf), "peer_mw%d", idx); + mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR, + mw->tc->dbgfs, mw, + &tool_peer_mw_fops); + + return 0; + +err_free_dma: + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + mw->peer = NULL; + mw->peer_dma = 0; + mw->size = 0; + + return rc; +} + +static void tool_free_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + + if (mw->peer) { + ntb_mw_clear_trans(tc->ntb, idx); + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + } + + mw->peer = NULL; + mw->peer_dma = 0; + + debugfs_remove(mw->peer_dbg_file); + + mw->peer_dbg_file = NULL; +} + +static ssize_t tool_peer_mw_trans_read(struct file *filep, + char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char *buf; + size_t buf_size; + ssize_t ret, off = 0; + + phys_addr_t base; + resource_size_t mw_size; + resource_size_t align; + resource_size_t align_size; + + buf_size = min_t(size_t, size, 512); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ntb_mw_get_range(mw->tc->ntb, mw->idx, + &base, &mw_size, &align, &align_size); + + off += scnprintf(buf + off, buf_size - off, + "Peer MW %d Information:\n", mw->idx); + + off += scnprintf(buf + off, buf_size - off, + "Physical Address \t%pa[p]\n", + &base); + + off += scnprintf(buf + off, buf_size - off, + "Window Size \t%lld\n", + (unsigned long long)mw_size); + + off += scnprintf(buf + off, buf_size - off, + "Alignment \t%lld\n", + (unsigned long long)align); + + off += scnprintf(buf + off, buf_size - off, + "Size Alignment \t%lld\n", + (unsigned long long)align_size); + + off += scnprintf(buf + off, buf_size - off, + "Ready \t%c\n", + (mw->peer) ? 'Y' : 'N'); + + off += scnprintf(buf + off, buf_size - off, + "Allocated Size \t%zd\n", + (mw->peer) ? (size_t)mw->size : 0); + + ret = simple_read_from_buffer(ubuf, size, offp, buf, off); + kfree(buf); + return ret; +} + +static ssize_t tool_peer_mw_trans_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char buf[32]; + size_t buf_size; + unsigned long long val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + tool_free_mw(mw->tc, mw->idx); + if (val) + rc = tool_setup_mw(mw->tc, mw->idx, val); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops, + tool_peer_mw_trans_read, + tool_peer_mw_trans_write); + +static int tool_init_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + int rc; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size, + NULL, NULL); + if (rc) + return rc; + + mw->tc = tc; + mw->idx = idx; + mw->local = ioremap_wc(base, mw->win_size); + if (!mw->local) + return -EFAULT; + + return 0; +} + +static void tool_free_mws(struct tool_ctx *tc) +{ + int i; + + for (i = 0; i < tc->mw_count; i++) { + tool_free_mw(tc, i); + + if (tc->mws[i].local) + iounmap(tc->mws[i].local); + + tc->mws[i].local = NULL; + } +} + static void tool_setup_dbgfs(struct tool_ctx *tc) { + int i; + /* This modules is useless without dbgfs... */ if (!tool_dbgfs) { tc->dbgfs = NULL; @@ -472,12 +887,31 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, tc, &tool_peer_spad_fops); + + debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_link_fops); + + debugfs_create_file("link_event", S_IWUSR, tc->dbgfs, + tc, &tool_link_event_fops); + + for (i = 0; i < tc->mw_count; i++) { + char buf[30]; + + snprintf(buf, sizeof(buf), "mw%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_mw_fops); + + snprintf(buf, sizeof(buf), "peer_trans%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_peer_mw_trans_fops); + } } static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc; int rc; + int i; if (ntb_db_is_unsafe(ntb)) dev_dbg(&ntb->dev, "doorbell is unsafe\n"); @@ -485,13 +919,21 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) if (ntb_spad_is_unsafe(ntb)) dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); - tc = kmalloc(sizeof(*tc), GFP_KERNEL); + tc = kzalloc(sizeof(*tc), GFP_KERNEL); if (!tc) { rc = -ENOMEM; goto err_tc; } tc->ntb = ntb; + init_waitqueue_head(&tc->link_wq); + + tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); + for (i = 0; i < tc->mw_count; i++) { + rc = tool_init_mw(tc, i); + if (rc) + goto err_ctx; + } tool_setup_dbgfs(tc); @@ -505,6 +947,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) return 0; err_ctx: + tool_free_mws(tc); debugfs_remove_recursive(tc->dbgfs); kfree(tc); err_tc: @@ -515,6 +958,8 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc = ntb->ctx; + tool_free_mws(tc); + ntb_clear_ctx(ntb); ntb_link_disable(ntb); |