diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2017-03-31 13:01:46 +0200 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2017-03-31 13:01:46 +0200 |
commit | 6fb81d69d0bb455e6dbb5bd99529e2dea74b93e7 (patch) | |
tree | 50ec4711bfb152cf463616b5789f42858409c5ad /drivers/s390 | |
parent | 485527ba578254bb1171b13c55394257fd63cd59 (diff) | |
parent | 1877888d0ad21858693d8a5594734b125a068638 (diff) | |
download | lwn-6fb81d69d0bb455e6dbb5bd99529e2dea74b93e7.tar.gz lwn-6fb81d69d0bb455e6dbb5bd99529e2dea74b93e7.zip |
Merge branch 'vfio-ccw-for-martin' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw into features
Pull vfio-ccw branch to add the basic channel I/O passthrough
intrastructure based on vfio.
The focus is on supporting dasd-eckd(cu_type/dev_type = 0x3990/0x3390)
as the target device.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'drivers/s390')
-rw-r--r-- | drivers/s390/cio/Makefile | 3 | ||||
-rw-r--r-- | drivers/s390/cio/cio.c | 69 | ||||
-rw-r--r-- | drivers/s390/cio/cio.h | 1 | ||||
-rw-r--r-- | drivers/s390/cio/device_fsm.c | 54 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_cp.c | 842 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_cp.h | 42 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_drv.c | 308 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_fsm.c | 207 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_ops.c | 447 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_private.h | 96 |
10 files changed, 2023 insertions, 46 deletions
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile index 3ab9aedeb84a..bdf47526038a 100644 --- a/drivers/s390/cio/Makefile +++ b/drivers/s390/cio/Makefile @@ -17,3 +17,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o obj-$(CONFIG_QDIO) += qdio.o + +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o +obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 1b350665c823..89216174fcbb 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -170,12 +170,14 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */ return ccode; } } +EXPORT_SYMBOL_GPL(cio_start_key); int cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm) { return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY); } +EXPORT_SYMBOL_GPL(cio_start); /* * resume suspended I/O operation @@ -208,6 +210,7 @@ cio_resume (struct subchannel *sch) return -ENODEV; } } +EXPORT_SYMBOL_GPL(cio_resume); /* * halt I/O operation @@ -241,6 +244,7 @@ cio_halt(struct subchannel *sch) return -ENODEV; } } +EXPORT_SYMBOL_GPL(cio_halt); /* * Clear I/O operation @@ -271,6 +275,7 @@ cio_clear(struct subchannel *sch) return -ENODEV; } } +EXPORT_SYMBOL_GPL(cio_clear); /* * Function: cio_cancel @@ -308,7 +313,68 @@ cio_cancel (struct subchannel *sch) return -ENODEV; } } +EXPORT_SYMBOL_GPL(cio_cancel); +/** + * cio_cancel_halt_clear - Cancel running I/O by performing cancel, halt + * and clear ordinally if subchannel is valid. + * @sch: subchannel on which to perform the cancel_halt_clear operation + * @iretry: the number of the times remained to retry the next operation + * + * This should be called repeatedly since halt/clear are asynchronous + * operations. We do one try with cio_cancel, three tries with cio_halt, + * 255 tries with cio_clear. The caller should initialize @iretry with + * the value 255 for its first call to this, and keep using the same + * @iretry in the subsequent calls until it gets a non -EBUSY return. + * + * Returns 0 if device now idle, -ENODEV for device not operational, + * -EBUSY if an interrupt is expected (either from halt/clear or from a + * status pending), and -EIO if out of retries. + */ +int cio_cancel_halt_clear(struct subchannel *sch, int *iretry) +{ + int ret; + + if (cio_update_schib(sch)) + return -ENODEV; + if (!sch->schib.pmcw.ena) + /* Not operational -> done. */ + return 0; + /* Stage 1: cancel io. */ + if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) && + !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) { + if (!scsw_is_tm(&sch->schib.scsw)) { + ret = cio_cancel(sch); + if (ret != -EINVAL) + return ret; + } + /* + * Cancel io unsuccessful or not applicable (transport mode). + * Continue with asynchronous instructions. + */ + *iretry = 3; /* 3 halt retries. */ + } + /* Stage 2: halt io. */ + if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) { + if (*iretry) { + *iretry -= 1; + ret = cio_halt(sch); + if (ret != -EBUSY) + return (ret == 0) ? -EBUSY : ret; + } + /* Halt io unsuccessful. */ + *iretry = 255; /* 255 clear retries. */ + } + /* Stage 3: clear io. */ + if (*iretry) { + *iretry -= 1; + ret = cio_clear(sch); + return (ret == 0) ? -EBUSY : ret; + } + /* Function was unsuccessful */ + return -EIO; +} +EXPORT_SYMBOL_GPL(cio_cancel_halt_clear); static void cio_apply_config(struct subchannel *sch, struct schib *schib) { @@ -382,6 +448,7 @@ int cio_commit_config(struct subchannel *sch) } return ret; } +EXPORT_SYMBOL_GPL(cio_commit_config); /** * cio_update_schib - Perform stsch and update schib if subchannel is valid. @@ -987,6 +1054,7 @@ int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key) return cio_start_handle_notoper(sch, lpm); } } +EXPORT_SYMBOL_GPL(cio_tm_start_key); /** * cio_tm_intrg - perform interrogate function @@ -1012,3 +1080,4 @@ int cio_tm_intrg(struct subchannel *sch) return -ENODEV; } } +EXPORT_SYMBOL_GPL(cio_tm_intrg); diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index f0e57aefb5f2..939596d81b73 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -123,6 +123,7 @@ extern int cio_enable_subchannel(struct subchannel *, u32); extern int cio_disable_subchannel (struct subchannel *); extern int cio_cancel (struct subchannel *); extern int cio_clear (struct subchannel *); +extern int cio_cancel_halt_clear(struct subchannel *, int *); extern int cio_resume (struct subchannel *); extern int cio_halt (struct subchannel *); extern int cio_start (struct subchannel *, struct ccw1 *, __u8); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 9afb5ce13007..12016e32e519 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -124,14 +124,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires) add_timer(&cdev->private->timer); } -/* - * Cancel running i/o. This is called repeatedly since halt/clear are - * asynchronous operations. We do one try with cio_cancel, two tries - * with cio_halt, 255 tries with cio_clear. If everythings fails panic. - * Returns 0 if device now idle, -ENODEV for device not operational and - * -EBUSY if an interrupt is expected (either from halt/clear or from a - * status pending). - */ int ccw_device_cancel_halt_clear(struct ccw_device *cdev) { @@ -139,44 +131,14 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev) int ret; sch = to_subchannel(cdev->dev.parent); - if (cio_update_schib(sch)) - return -ENODEV; - if (!sch->schib.pmcw.ena) - /* Not operational -> done. */ - return 0; - /* Stage 1: cancel io. */ - if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) && - !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) { - if (!scsw_is_tm(&sch->schib.scsw)) { - ret = cio_cancel(sch); - if (ret != -EINVAL) - return ret; - } - /* cancel io unsuccessful or not applicable (transport mode). - * Continue with asynchronous instructions. */ - cdev->private->iretry = 3; /* 3 halt retries. */ - } - if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) { - /* Stage 2: halt io. */ - if (cdev->private->iretry) { - cdev->private->iretry--; - ret = cio_halt(sch); - if (ret != -EBUSY) - return (ret == 0) ? -EBUSY : ret; - } - /* halt io unsuccessful. */ - cdev->private->iretry = 255; /* 255 clear retries. */ - } - /* Stage 3: clear io. */ - if (cdev->private->iretry) { - cdev->private->iretry--; - ret = cio_clear (sch); - return (ret == 0) ? -EBUSY : ret; - } - /* Function was unsuccessful */ - CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n", - cdev->private->dev_id.ssid, cdev->private->dev_id.devno); - return -EIO; + ret = cio_cancel_halt_clear(sch, &cdev->private->iretry); + + if (ret == -EIO) + CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n", + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno); + + return ret; } void ccw_device_update_sense_data(struct ccw_device *cdev) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c new file mode 100644 index 000000000000..ba6ac83a6c25 --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -0,0 +1,842 @@ +/* + * channel program interfaces + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/vfio.h> +#include <asm/idals.h> + +#include "vfio_ccw_cp.h" + +/* + * Max length for ccw chain. + * XXX: Limit to 256, need to check more? + */ +#define CCWCHAIN_LEN_MAX 256 + +struct pfn_array { + unsigned long pa_iova; + unsigned long *pa_iova_pfn; + unsigned long *pa_pfn; + int pa_nr; +}; + +struct pfn_array_table { + struct pfn_array *pat_pa; + int pat_nr; +}; + +struct ccwchain { + struct list_head next; + struct ccw1 *ch_ccw; + /* Guest physical address of the current chain. */ + u64 ch_iova; + /* Count of the valid ccws in chain. */ + int ch_len; + /* Pinned PAGEs for the original data. */ + struct pfn_array_table *ch_pat; +}; + +/* + * pfn_array_pin() - pin user pages in memory + * @pa: pfn_array on which to perform the operation + * @mdev: the mediated device to perform pin/unpin operations + * + * Attempt to pin user pages in memory. + * + * Usage of pfn_array: + * @pa->pa_iova starting guest physical I/O address. Assigned by caller. + * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated + * by caller. + * @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by + * caller. + * @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by + * caller. + * number of pages pinned. Assigned by callee. + * + * Returns: + * Number of pages pinned on success. + * If @pa->pa_nr is 0 or negative, returns 0. + * If no pages were pinned, returns -errno. + */ +static int pfn_array_pin(struct pfn_array *pa, struct device *mdev) +{ + int i, ret; + + if (pa->pa_nr <= 0) { + pa->pa_nr = 0; + return 0; + } + + pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; + for (i = 1; i < pa->pa_nr; i++) + pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; + + ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, + IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); + + if (ret > 0 && ret != pa->pa_nr) { + vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret); + pa->pa_nr = 0; + return 0; + } + + return ret; +} + +/* Unpin the pages before releasing the memory. */ +static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) +{ + vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); + pa->pa_nr = 0; + kfree(pa->pa_iova_pfn); +} + +/* Alloc memory for PFNs, then pin pages with them. */ +static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev, + u64 iova, unsigned int len) +{ + int ret = 0; + + if (!len || pa->pa_nr) + return -EINVAL; + + pa->pa_iova = iova; + + pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (!pa->pa_nr) + return -EINVAL; + + pa->pa_iova_pfn = kcalloc(pa->pa_nr, + sizeof(*pa->pa_iova_pfn) + + sizeof(*pa->pa_pfn), + GFP_KERNEL); + if (unlikely(!pa->pa_iova_pfn)) + return -ENOMEM; + pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr; + + ret = pfn_array_pin(pa, mdev); + + if (ret > 0) + return ret; + else if (!ret) + ret = -EINVAL; + + kfree(pa->pa_iova_pfn); + + return ret; +} + +static int pfn_array_table_init(struct pfn_array_table *pat, int nr) +{ + pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL); + if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) { + pat->pat_nr = 0; + return -ENOMEM; + } + + pat->pat_nr = nr; + + return 0; +} + +static void pfn_array_table_unpin_free(struct pfn_array_table *pat, + struct device *mdev) +{ + int i; + + for (i = 0; i < pat->pat_nr; i++) + pfn_array_unpin_free(pat->pat_pa + i, mdev); + + if (pat->pat_nr) { + kfree(pat->pat_pa); + pat->pat_pa = NULL; + pat->pat_nr = 0; + } +} + +static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat, + unsigned long iova) +{ + struct pfn_array *pa = pat->pat_pa; + unsigned long iova_pfn = iova >> PAGE_SHIFT; + int i, j; + + for (i = 0; i < pat->pat_nr; i++, pa++) + for (j = 0; j < pa->pa_nr; j++) + if (pa->pa_iova_pfn[i] == iova_pfn) + return true; + + return false; +} +/* Create the list idal words for a pfn_array_table. */ +static inline void pfn_array_table_idal_create_words( + struct pfn_array_table *pat, + unsigned long *idaws) +{ + struct pfn_array *pa; + int i, j, k; + + /* + * Idal words (execept the first one) rely on the memory being 4k + * aligned. If a user virtual address is 4K aligned, then it's + * corresponding kernel physical address will also be 4K aligned. Thus + * there will be no problem here to simply use the phys to create an + * idaw. + */ + k = 0; + for (i = 0; i < pat->pat_nr; i++) { + pa = pat->pat_pa + i; + for (j = 0; j < pa->pa_nr; j++) { + idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT; + if (k == 0) + idaws[k] += pa->pa_iova & (PAGE_SIZE - 1); + k++; + } + } +} + + +/* + * Within the domain (@mdev), copy @n bytes from a guest physical + * address (@iova) to a host physical address (@to). + */ +static long copy_from_iova(struct device *mdev, + void *to, u64 iova, + unsigned long n) +{ + struct pfn_array pa = {0}; + u64 from; + int i, ret; + unsigned long l, m; + + ret = pfn_array_alloc_pin(&pa, mdev, iova, n); + if (ret <= 0) + return ret; + + l = n; + for (i = 0; i < pa.pa_nr; i++) { + from = pa.pa_pfn[i] << PAGE_SHIFT; + m = PAGE_SIZE; + if (i == 0) { + from += iova & (PAGE_SIZE - 1); + m -= iova & (PAGE_SIZE - 1); + } + + m = min(l, m); + memcpy(to + (n - l), (void *)from, m); + + l -= m; + if (l == 0) + break; + } + + pfn_array_unpin_free(&pa, mdev); + + return l; +} + +static long copy_ccw_from_iova(struct channel_program *cp, + struct ccw1 *to, u64 iova, + unsigned long len) +{ + struct ccw0 ccw0; + struct ccw1 *pccw1; + int ret; + int i; + + ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1)); + if (ret) + return ret; + + if (!cp->orb.cmd.fmt) { + pccw1 = to; + for (i = 0; i < len; i++) { + ccw0 = *(struct ccw0 *)pccw1; + if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) { + pccw1->cmd_code = CCW_CMD_TIC; + pccw1->flags = 0; + pccw1->count = 0; + } else { + pccw1->cmd_code = ccw0.cmd_code; + pccw1->flags = ccw0.flags; + pccw1->count = ccw0.count; + } + pccw1->cda = ccw0.cda; + pccw1++; + } + } + + return ret; +} + +/* + * Helpers to operate ccwchain. + */ +#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) + +#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) + +#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) + +#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA) + + +#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) + +static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) +{ + struct ccwchain *chain; + void *data; + size_t size; + + /* Make ccw address aligned to 8. */ + size = ((sizeof(*chain) + 7L) & -8L) + + sizeof(*chain->ch_ccw) * len + + sizeof(*chain->ch_pat) * len; + chain = kzalloc(size, GFP_DMA | GFP_KERNEL); + if (!chain) + return NULL; + + data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L); + chain->ch_ccw = (struct ccw1 *)data; + + data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len; + chain->ch_pat = (struct pfn_array_table *)data; + + chain->ch_len = len; + + list_add_tail(&chain->next, &cp->ccwchain_list); + + return chain; +} + +static void ccwchain_free(struct ccwchain *chain) +{ + list_del(&chain->next); + kfree(chain); +} + +/* Free resource for a ccw that allocated memory for its cda. */ +static void ccwchain_cda_free(struct ccwchain *chain, int idx) +{ + struct ccw1 *ccw = chain->ch_ccw + idx; + + if (!ccw->count) + return; + + kfree((void *)(u64)ccw->cda); +} + +/* Unpin the pages then free the memory resources. */ +static void cp_unpin_free(struct channel_program *cp) +{ + struct ccwchain *chain, *temp; + int i; + + list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { + for (i = 0; i < chain->ch_len; i++) { + pfn_array_table_unpin_free(chain->ch_pat + i, + cp->mdev); + ccwchain_cda_free(chain, i); + } + ccwchain_free(chain); + } +} + +/** + * ccwchain_calc_length - calculate the length of the ccw chain. + * @iova: guest physical address of the target ccw chain + * @cp: channel_program on which to perform the operation + * + * This is the chain length not considering any TICs. + * You need to do a new round for each TIC target. + * + * Returns: the length of the ccw chain or -errno. + */ +static int ccwchain_calc_length(u64 iova, struct channel_program *cp) +{ + struct ccw1 *ccw, *p; + int cnt; + + /* + * Copy current chain from guest to host kernel. + * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256). + * So copying 2K is enough (safe). + */ + p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL); + if (!ccw) + return -ENOMEM; + + cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX); + if (cnt) { + kfree(ccw); + return cnt; + } + + cnt = 0; + do { + cnt++; + + if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw))) + break; + + ccw++; + } while (cnt < CCWCHAIN_LEN_MAX + 1); + + if (cnt == CCWCHAIN_LEN_MAX + 1) + cnt = -EINVAL; + + kfree(p); + return cnt; +} + +static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp) +{ + struct ccwchain *chain; + u32 ccw_head, ccw_tail; + + list_for_each_entry(chain, &cp->ccwchain_list, next) { + ccw_head = chain->ch_iova; + ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1); + + if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail)) + return 1; + } + + return 0; +} + +static int ccwchain_loop_tic(struct ccwchain *chain, + struct channel_program *cp); + +static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp) +{ + struct ccwchain *chain; + int len, ret; + + /* May transfer to an existing chain. */ + if (tic_target_chain_exists(tic, cp)) + return 0; + + /* Get chain length. */ + len = ccwchain_calc_length(tic->cda, cp); + if (len < 0) + return len; + + /* Need alloc a new chain for this one. */ + chain = ccwchain_alloc(cp, len); + if (!chain) + return -ENOMEM; + chain->ch_iova = tic->cda; + + /* Copy the new chain from user. */ + ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len); + if (ret) { + ccwchain_free(chain); + return ret; + } + + /* Loop for tics on this new chain. */ + return ccwchain_loop_tic(chain, cp); +} + +/* Loop for TICs. */ +static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp) +{ + struct ccw1 *tic; + int i, ret; + + for (i = 0; i < chain->ch_len; i++) { + tic = chain->ch_ccw + i; + + if (!ccw_is_tic(tic)) + continue; + + ret = ccwchain_handle_tic(tic, cp); + if (ret) + return ret; + } + + return 0; +} + +static int ccwchain_fetch_tic(struct ccwchain *chain, + int idx, + struct channel_program *cp) +{ + struct ccw1 *ccw = chain->ch_ccw + idx; + struct ccwchain *iter; + u32 ccw_head, ccw_tail; + + list_for_each_entry(iter, &cp->ccwchain_list, next) { + ccw_head = iter->ch_iova; + ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1); + + if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) { + ccw->cda = (__u32) (addr_t) (iter->ch_ccw + + (ccw->cda - ccw_head)); + return 0; + } + } + + return -EFAULT; +} + +static int ccwchain_fetch_direct(struct ccwchain *chain, + int idx, + struct channel_program *cp) +{ + struct ccw1 *ccw; + struct pfn_array_table *pat; + unsigned long *idaws; + int idaw_nr; + + ccw = chain->ch_ccw + idx; + + /* + * Pin data page(s) in memory. + * The number of pages actually is the count of the idaws which will be + * needed when translating a direct ccw to a idal ccw. + */ + pat = chain->ch_pat + idx; + if (pfn_array_table_init(pat, 1)) + return -ENOMEM; + idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, + ccw->cda, ccw->count); + if (idaw_nr < 0) + return idaw_nr; + + /* Translate this direct ccw to a idal ccw. */ + idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL); + if (!idaws) { + pfn_array_table_unpin_free(pat, cp->mdev); + return -ENOMEM; + } + ccw->cda = (__u32) virt_to_phys(idaws); + ccw->flags |= CCW_FLAG_IDA; + + pfn_array_table_idal_create_words(pat, idaws); + + return 0; +} + +static int ccwchain_fetch_idal(struct ccwchain *chain, + int idx, + struct channel_program *cp) +{ + struct ccw1 *ccw; + struct pfn_array_table *pat; + unsigned long *idaws; + u64 idaw_iova; + unsigned int idaw_nr, idaw_len; + int i, ret; + + ccw = chain->ch_ccw + idx; + + /* Calculate size of idaws. */ + ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova)); + if (ret) + return ret; + idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count); + idaw_len = idaw_nr * sizeof(*idaws); + + /* Pin data page(s) in memory. */ + pat = chain->ch_pat + idx; + ret = pfn_array_table_init(pat, idaw_nr); + if (ret) + return ret; + + /* Translate idal ccw to use new allocated idaws. */ + idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL); + if (!idaws) { + ret = -ENOMEM; + goto out_unpin; + } + + ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len); + if (ret) + goto out_free_idaws; + + ccw->cda = virt_to_phys(idaws); + + for (i = 0; i < idaw_nr; i++) { + idaw_iova = *(idaws + i); + if (IS_ERR_VALUE(idaw_iova)) { + ret = -EFAULT; + goto out_free_idaws; + } + + ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev, + idaw_iova, 1); + if (ret < 0) + goto out_free_idaws; + } + + pfn_array_table_idal_create_words(pat, idaws); + + return 0; + +out_free_idaws: + kfree(idaws); +out_unpin: + pfn_array_table_unpin_free(pat, cp->mdev); + return ret; +} + +/* + * Fetch one ccw. + * To reduce memory copy, we'll pin the cda page in memory, + * and to get rid of the cda 2G limitiaion of ccw1, we'll translate + * direct ccws to idal ccws. + */ +static int ccwchain_fetch_one(struct ccwchain *chain, + int idx, + struct channel_program *cp) +{ + struct ccw1 *ccw = chain->ch_ccw + idx; + + if (ccw_is_test(ccw) || ccw_is_noop(ccw)) + return 0; + + if (ccw_is_tic(ccw)) + return ccwchain_fetch_tic(chain, idx, cp); + + if (ccw_is_idal(ccw)) + return ccwchain_fetch_idal(chain, idx, cp); + + return ccwchain_fetch_direct(chain, idx, cp); +} + +/** + * cp_init() - allocate ccwchains for a channel program. + * @cp: channel_program on which to perform the operation + * @mdev: the mediated device to perform pin/unpin operations + * @orb: control block for the channel program from the guest + * + * This creates one or more ccwchain(s), and copies the raw data of + * the target channel program from @orb->cmd.iova to the new ccwchain(s). + * + * Limitations: + * 1. Supports only prefetch enabled mode. + * 2. Supports idal(c64) ccw chaining. + * 3. Supports 4k idaw. + * + * Returns: + * %0 on success and a negative error value on failure. + */ +int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) +{ + u64 iova = orb->cmd.cpa; + struct ccwchain *chain; + int len, ret; + + /* + * XXX: + * Only support prefetch enable mode now. + * Only support 64bit addressing idal. + * Only support 4k IDAW. + */ + if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k) + return -EOPNOTSUPP; + + INIT_LIST_HEAD(&cp->ccwchain_list); + memcpy(&cp->orb, orb, sizeof(*orb)); + cp->mdev = mdev; + + /* Get chain length. */ + len = ccwchain_calc_length(iova, cp); + if (len < 0) + return len; + + /* Alloc mem for the head chain. */ + chain = ccwchain_alloc(cp, len); + if (!chain) + return -ENOMEM; + chain->ch_iova = iova; + + /* Copy the head chain from guest. */ + ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len); + if (ret) { + ccwchain_free(chain); + return ret; + } + + /* Now loop for its TICs. */ + ret = ccwchain_loop_tic(chain, cp); + if (ret) + cp_unpin_free(cp); + + return ret; +} + + +/** + * cp_free() - free resources for channel program. + * @cp: channel_program on which to perform the operation + * + * This unpins the memory pages and frees the memory space occupied by + * @cp, which must have been returned by a previous call to cp_init(). + * Otherwise, undefined behavior occurs. + */ +void cp_free(struct channel_program *cp) +{ + cp_unpin_free(cp); +} + +/** + * cp_prefetch() - translate a guest physical address channel program to + * a real-device runnable channel program. + * @cp: channel_program on which to perform the operation + * + * This function translates the guest-physical-address channel program + * and stores the result to ccwchain list. @cp must have been + * initialized by a previous call with cp_init(). Otherwise, undefined + * behavior occurs. + * + * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced + * as helpers to do ccw chain translation inside the kernel. Basically + * they accept a channel program issued by a virtual machine, and + * translate the channel program to a real-device runnable channel + * program. + * + * These APIs will copy the ccws into kernel-space buffers, and update + * the guest phsical addresses with their corresponding host physical + * addresses. Then channel I/O device drivers could issue the + * translated channel program to real devices to perform an I/O + * operation. + * + * These interfaces are designed to support translation only for + * channel programs, which are generated and formatted by a + * guest. Thus this will make it possible for things like VFIO to + * leverage the interfaces to passthrough a channel I/O mediated + * device in QEMU. + * + * We support direct ccw chaining by translating them to idal ccws. + * + * Returns: + * %0 on success and a negative error value on failure. + */ +int cp_prefetch(struct channel_program *cp) +{ + struct ccwchain *chain; + int len, idx, ret; + + list_for_each_entry(chain, &cp->ccwchain_list, next) { + len = chain->ch_len; + for (idx = 0; idx < len; idx++) { + ret = ccwchain_fetch_one(chain, idx, cp); + if (ret) + return ret; + } + } + + return 0; +} + +/** + * cp_get_orb() - get the orb of the channel program + * @cp: channel_program on which to perform the operation + * @intparm: new intparm for the returned orb + * @lpm: candidate value of the logical-path mask for the returned orb + * + * This function returns the address of the updated orb of the channel + * program. Channel I/O device drivers could use this orb to issue a + * ssch. + */ +union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm) +{ + union orb *orb; + struct ccwchain *chain; + struct ccw1 *cpa; + + orb = &cp->orb; + + orb->cmd.intparm = intparm; + orb->cmd.fmt = 1; + orb->cmd.key = PAGE_DEFAULT_KEY >> 4; + + if (orb->cmd.lpm == 0) + orb->cmd.lpm = lpm; + + chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next); + cpa = chain->ch_ccw; + orb->cmd.cpa = (__u32) __pa(cpa); + + return orb; +} + +/** + * cp_update_scsw() - update scsw for a channel program. + * @cp: channel_program on which to perform the operation + * @scsw: I/O results of the channel program and also the target to be + * updated + * + * @scsw contains the I/O results of the channel program that pointed + * to by @cp. However what @scsw->cpa stores is a host physical + * address, which is meaningless for the guest, which is waiting for + * the I/O results. + * + * This function updates @scsw->cpa to its coressponding guest physical + * address. + */ +void cp_update_scsw(struct channel_program *cp, union scsw *scsw) +{ + struct ccwchain *chain; + u32 cpa = scsw->cmd.cpa; + u32 ccw_head, ccw_tail; + + /* + * LATER: + * For now, only update the cmd.cpa part. We may need to deal with + * other portions of the schib as well, even if we don't return them + * in the ioctl directly. Path status changes etc. + */ + list_for_each_entry(chain, &cp->ccwchain_list, next) { + ccw_head = (u32)(u64)chain->ch_ccw; + ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1); + + if ((ccw_head <= cpa) && (cpa <= ccw_tail)) { + /* + * (cpa - ccw_head) is the offset value of the host + * physical ccw to its chain head. + * Adding this value to the guest physical ccw chain + * head gets us the guest cpa. + */ + cpa = chain->ch_iova + (cpa - ccw_head); + break; + } + } + + scsw->cmd.cpa = cpa; +} + +/** + * cp_iova_pinned() - check if an iova is pinned for a ccw chain. + * @cmd: ccwchain command on which to perform the operation + * @iova: the iova to check + * + * If the @iova is currently pinned for the ccw chain, return true; + * else return false. + */ +bool cp_iova_pinned(struct channel_program *cp, u64 iova) +{ + struct ccwchain *chain; + int i; + + list_for_each_entry(chain, &cp->ccwchain_list, next) { + for (i = 0; i < chain->ch_len; i++) + if (pfn_array_table_iova_pinned(chain->ch_pat + i, + iova)) + return true; + } + + return false; +} diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h new file mode 100644 index 000000000000..7a1996b3b36d --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_cp.h @@ -0,0 +1,42 @@ +/* + * channel program interfaces + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + */ + +#ifndef _VFIO_CCW_CP_H_ +#define _VFIO_CCW_CP_H_ + +#include <asm/cio.h> +#include <asm/scsw.h> + +#include "orb.h" + +/** + * struct channel_program - manage information for channel program + * @ccwchain_list: list head of ccwchains + * @orb: orb for the currently processed ssch request + * @mdev: the mediated device to perform page pinning/unpinning + * + * @ccwchain_list is the head of a ccwchain list, that contents the + * translated result of the guest channel program that pointed out by + * the iova parameter when calling cp_init. + */ +struct channel_program { + struct list_head ccwchain_list; + union orb orb; + struct device *mdev; +}; + +extern int cp_init(struct channel_program *cp, struct device *mdev, + union orb *orb); +extern void cp_free(struct channel_program *cp); +extern int cp_prefetch(struct channel_program *cp); +extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); +extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw); +extern bool cp_iova_pinned(struct channel_program *cp, u64 iova); + +#endif diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c new file mode 100644 index 000000000000..e90dd43d2a55 --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -0,0 +1,308 @@ +/* + * VFIO based Physical Subchannel device driver + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/uuid.h> +#include <linux/mdev.h> + +#include <asm/isc.h> + +#include "ioasm.h" +#include "css.h" +#include "vfio_ccw_private.h" + +struct workqueue_struct *vfio_ccw_work_q; + +/* + * Helpers + */ +int vfio_ccw_sch_quiesce(struct subchannel *sch) +{ + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + DECLARE_COMPLETION_ONSTACK(completion); + int iretry, ret = 0; + + spin_lock_irq(sch->lock); + if (!sch->schib.pmcw.ena) + goto out_unlock; + ret = cio_disable_subchannel(sch); + if (ret != -EBUSY) + goto out_unlock; + + do { + iretry = 255; + + ret = cio_cancel_halt_clear(sch, &iretry); + while (ret == -EBUSY) { + /* + * Flush all I/O and wait for + * cancel/halt/clear completion. + */ + private->completion = &completion; + spin_unlock_irq(sch->lock); + + wait_for_completion_timeout(&completion, 3*HZ); + + spin_lock_irq(sch->lock); + private->completion = NULL; + flush_workqueue(vfio_ccw_work_q); + ret = cio_cancel_halt_clear(sch, &iretry); + }; + + ret = cio_disable_subchannel(sch); + } while (ret == -EBUSY); +out_unlock: + private->state = VFIO_CCW_STATE_NOT_OPER; + spin_unlock_irq(sch->lock); + return ret; +} + +static void vfio_ccw_sch_io_todo(struct work_struct *work) +{ + struct vfio_ccw_private *private; + struct subchannel *sch; + struct irb *irb; + + private = container_of(work, struct vfio_ccw_private, io_work); + irb = &private->irb; + sch = private->sch; + + if (scsw_is_solicited(&irb->scsw)) { + cp_update_scsw(&private->cp, &irb->scsw); + cp_free(&private->cp); + } + memcpy(private->io_region.irb_area, irb, sizeof(*irb)); + + if (private->io_trigger) + eventfd_signal(private->io_trigger, 1); + + if (private->mdev) + private->state = VFIO_CCW_STATE_IDLE; +} + +/* + * Sysfs interfaces + */ +static ssize_t chpids_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct subchannel *sch = to_subchannel(dev); + struct chsc_ssd_info *ssd = &sch->ssd_info; + ssize_t ret = 0; + int chp; + int mask; + + for (chp = 0; chp < 8; chp++) { + mask = 0x80 >> chp; + if (ssd->path_mask & mask) + ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id); + else + ret += sprintf(buf + ret, "00 "); + } + ret += sprintf(buf+ret, "\n"); + return ret; +} + +static ssize_t pimpampom_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct subchannel *sch = to_subchannel(dev); + struct pmcw *pmcw = &sch->schib.pmcw; + + return sprintf(buf, "%02x %02x %02x\n", + pmcw->pim, pmcw->pam, pmcw->pom); +} + +static DEVICE_ATTR(chpids, 0444, chpids_show, NULL); +static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL); + +static struct attribute *vfio_subchannel_attrs[] = { + &dev_attr_chpids.attr, + &dev_attr_pimpampom.attr, + NULL, +}; + +static struct attribute_group vfio_subchannel_attr_group = { + .attrs = vfio_subchannel_attrs, +}; + +/* + * Css driver callbacks + */ +static void vfio_ccw_sch_irq(struct subchannel *sch) +{ + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + + inc_irq_stat(IRQIO_CIO); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); +} + +static int vfio_ccw_sch_probe(struct subchannel *sch) +{ + struct pmcw *pmcw = &sch->schib.pmcw; + struct vfio_ccw_private *private; + int ret; + + if (pmcw->qf) { + dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n", + dev_name(&sch->dev)); + return -ENODEV; + } + + private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); + if (!private) + return -ENOMEM; + private->sch = sch; + dev_set_drvdata(&sch->dev, private); + + spin_lock_irq(sch->lock); + private->state = VFIO_CCW_STATE_NOT_OPER; + sch->isc = VFIO_CCW_ISC; + ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); + spin_unlock_irq(sch->lock); + if (ret) + goto out_free; + + ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group); + if (ret) + goto out_disable; + + ret = vfio_ccw_mdev_reg(sch); + if (ret) + goto out_rm_group; + + INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); + atomic_set(&private->avail, 1); + private->state = VFIO_CCW_STATE_STANDBY; + + return 0; + +out_rm_group: + sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group); +out_disable: + cio_disable_subchannel(sch); +out_free: + dev_set_drvdata(&sch->dev, NULL); + kfree(private); + return ret; +} + +static int vfio_ccw_sch_remove(struct subchannel *sch) +{ + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + + vfio_ccw_sch_quiesce(sch); + + vfio_ccw_mdev_unreg(sch); + + sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group); + + dev_set_drvdata(&sch->dev, NULL); + + kfree(private); + + return 0; +} + +static void vfio_ccw_sch_shutdown(struct subchannel *sch) +{ + vfio_ccw_sch_quiesce(sch); +} + +/** + * vfio_ccw_sch_event - process subchannel event + * @sch: subchannel + * @process: non-zero if function is called in process context + * + * An unspecified event occurred for this subchannel. Adjust data according + * to the current operational state of the subchannel. Return zero when the + * event has been handled sufficiently or -EAGAIN when this function should + * be called again in process context. + */ +static int vfio_ccw_sch_event(struct subchannel *sch, int process) +{ + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + unsigned long flags; + + spin_lock_irqsave(sch->lock, flags); + if (!device_is_registered(&sch->dev)) + goto out_unlock; + + if (work_pending(&sch->todo_work)) + goto out_unlock; + + if (cio_update_schib(sch)) { + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + goto out_unlock; + } + + private = dev_get_drvdata(&sch->dev); + if (private->state == VFIO_CCW_STATE_NOT_OPER) { + private->state = private->mdev ? VFIO_CCW_STATE_IDLE : + VFIO_CCW_STATE_STANDBY; + } + +out_unlock: + spin_unlock_irqrestore(sch->lock, flags); + + return 0; +} + +static struct css_device_id vfio_ccw_sch_ids[] = { + { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, }, + { /* end of list */ }, +}; +MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids); + +static struct css_driver vfio_ccw_sch_driver = { + .drv = { + .name = "vfio_ccw", + .owner = THIS_MODULE, + }, + .subchannel_type = vfio_ccw_sch_ids, + .irq = vfio_ccw_sch_irq, + .probe = vfio_ccw_sch_probe, + .remove = vfio_ccw_sch_remove, + .shutdown = vfio_ccw_sch_shutdown, + .sch_event = vfio_ccw_sch_event, +}; + +static int __init vfio_ccw_sch_init(void) +{ + int ret; + + vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw"); + if (!vfio_ccw_work_q) + return -ENOMEM; + + isc_register(VFIO_CCW_ISC); + ret = css_driver_register(&vfio_ccw_sch_driver); + if (ret) { + isc_unregister(VFIO_CCW_ISC); + destroy_workqueue(vfio_ccw_work_q); + } + + return ret; +} + +static void __exit vfio_ccw_sch_exit(void) +{ + css_driver_unregister(&vfio_ccw_sch_driver); + isc_unregister(VFIO_CCW_ISC); + destroy_workqueue(vfio_ccw_work_q); +} +module_init(vfio_ccw_sch_init); +module_exit(vfio_ccw_sch_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c new file mode 100644 index 000000000000..55b6cc55758e --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -0,0 +1,207 @@ +/* + * Finite state machine for vfio-ccw device handling + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + */ + +#include <linux/vfio.h> +#include <linux/mdev.h> + +#include "ioasm.h" +#include "vfio_ccw_private.h" + +static int fsm_io_helper(struct vfio_ccw_private *private) +{ + struct subchannel *sch; + union orb *orb; + int ccode; + __u8 lpm; + unsigned long flags; + + sch = private->sch; + + spin_lock_irqsave(sch->lock, flags); + private->state = VFIO_CCW_STATE_BUSY; + spin_unlock_irqrestore(sch->lock, flags); + + orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm); + + /* Issue "Start Subchannel" */ + ccode = ssch(sch->schid, orb); + + switch (ccode) { + case 0: + /* + * Initialize device status information + */ + sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND; + return 0; + case 1: /* Status pending */ + case 2: /* Busy */ + return -EBUSY; + case 3: /* Device/path not operational */ + { + lpm = orb->cmd.lpm; + if (lpm != 0) + sch->lpm &= ~lpm; + else + sch->lpm = 0; + + if (cio_update_schib(sch)) + return -ENODEV; + + return sch->lpm ? -EACCES : -ENODEV; + } + default: + return ccode; + } +} + +static void fsm_notoper(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct subchannel *sch = private->sch; + + /* + * TODO: + * Probably we should send the machine check to the guest. + */ + css_sched_sch_todo(sch, SCH_TODO_UNREG); + private->state = VFIO_CCW_STATE_NOT_OPER; +} + +/* + * No operation action. + */ +static void fsm_nop(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ +} + +static void fsm_io_error(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state); + private->io_region.ret_code = -EIO; +} + +static void fsm_io_busy(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + private->io_region.ret_code = -EBUSY; +} + +static void fsm_disabled_irq(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct subchannel *sch = private->sch; + + /* + * An interrupt in a disabled state means a previous disable was not + * successful - should not happen, but we try to disable again. + */ + cio_disable_subchannel(sch); +} + +/* + * Deal with the ccw command request from the userspace. + */ +static void fsm_io_request(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + union orb *orb; + union scsw *scsw = &private->scsw; + struct ccw_io_region *io_region = &private->io_region; + struct mdev_device *mdev = private->mdev; + + private->state = VFIO_CCW_STATE_BOXED; + + memcpy(scsw, io_region->scsw_area, sizeof(*scsw)); + + if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) { + orb = (union orb *)io_region->orb_area; + + io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), + orb); + if (io_region->ret_code) + goto err_out; + + io_region->ret_code = cp_prefetch(&private->cp); + if (io_region->ret_code) { + cp_free(&private->cp); + goto err_out; + } + + /* Start channel program and wait for I/O interrupt. */ + io_region->ret_code = fsm_io_helper(private); + if (io_region->ret_code) { + cp_free(&private->cp); + goto err_out; + } + return; + } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) { + /* XXX: Handle halt. */ + io_region->ret_code = -EOPNOTSUPP; + goto err_out; + } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) { + /* XXX: Handle clear. */ + io_region->ret_code = -EOPNOTSUPP; + goto err_out; + } + +err_out: + private->state = VFIO_CCW_STATE_IDLE; +} + +/* + * Got an interrupt for a normal io (state busy). + */ +static void fsm_irq(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct irb *irb; + + if (!private) + return; + + irb = this_cpu_ptr(&cio_irb); + memcpy(&private->irb, irb, sizeof(*irb)); + + queue_work(vfio_ccw_work_q, &private->io_work); + + if (private->completion) + complete(private->completion); +} + +/* + * Device statemachine + */ +fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { + [VFIO_CCW_STATE_NOT_OPER] = { + [VFIO_CCW_EVENT_NOT_OPER] = fsm_nop, + [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, + }, + [VFIO_CCW_STATE_STANDBY] = { + [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, + [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + }, + [VFIO_CCW_STATE_IDLE] = { + [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, + [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + }, + [VFIO_CCW_STATE_BOXED] = { + [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, + [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + }, + [VFIO_CCW_STATE_BUSY] = { + [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, + [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + }, +}; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c new file mode 100644 index 000000000000..b2e615404034 --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -0,0 +1,447 @@ +/* + * Physical device callbacks for vfio_ccw + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + */ + +#include <linux/vfio.h> +#include <linux/mdev.h> + +#include "vfio_ccw_private.h" + +static int vfio_ccw_mdev_reset(struct mdev_device *mdev) +{ + struct vfio_ccw_private *private; + struct subchannel *sch; + int ret; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + + sch = private->sch; + /* + * TODO: + * In the cureent stage, some things like "no I/O running" and "no + * interrupt pending" are clear, but we are not sure what other state + * we need to care about. + * There are still a lot more instructions need to be handled. We + * should come back here later. + */ + ret = vfio_ccw_sch_quiesce(sch); + if (ret) + return ret; + + ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); + if (!ret) + private->state = VFIO_CCW_STATE_IDLE; + + return ret; +} + +static int vfio_ccw_mdev_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct vfio_ccw_private *private = + container_of(nb, struct vfio_ccw_private, nb); + + if (!private) + return NOTIFY_STOP; + + /* + * Vendor drivers MUST unpin pages in response to an + * invalidation. + */ + if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { + struct vfio_iommu_type1_dma_unmap *unmap = data; + + if (!cp_iova_pinned(&private->cp, unmap->iova)) + return NOTIFY_OK; + + if (vfio_ccw_mdev_reset(private->mdev)) + return NOTIFY_BAD; + + cp_free(&private->cp); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf) +{ + return sprintf(buf, "I/O subchannel (Non-QDIO)\n"); +} +MDEV_TYPE_ATTR_RO(name); + +static ssize_t device_api_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING); +} +MDEV_TYPE_ATTR_RO(device_api); + +static ssize_t available_instances_show(struct kobject *kobj, + struct device *dev, char *buf) +{ + struct vfio_ccw_private *private = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", atomic_read(&private->avail)); +} +MDEV_TYPE_ATTR_RO(available_instances); + +static struct attribute *mdev_types_attrs[] = { + &mdev_type_attr_name.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_available_instances.attr, + NULL, +}; + +static struct attribute_group mdev_type_group = { + .name = "io", + .attrs = mdev_types_attrs, +}; + +struct attribute_group *mdev_type_groups[] = { + &mdev_type_group, + NULL, +}; + +static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev) +{ + struct vfio_ccw_private *private = + dev_get_drvdata(mdev_parent_dev(mdev)); + + if (private->state == VFIO_CCW_STATE_NOT_OPER) + return -ENODEV; + + if (atomic_dec_if_positive(&private->avail) < 0) + return -EPERM; + + private->mdev = mdev; + private->state = VFIO_CCW_STATE_IDLE; + + return 0; +} + +static int vfio_ccw_mdev_remove(struct mdev_device *mdev) +{ + struct vfio_ccw_private *private = + dev_get_drvdata(mdev_parent_dev(mdev)); + int ret; + + if (!private) + goto out; + + if ((private->state == VFIO_CCW_STATE_NOT_OPER) || + (private->state == VFIO_CCW_STATE_STANDBY)) + goto out; + + ret = vfio_ccw_mdev_reset(mdev); + if (ret) + return ret; + + private->state = VFIO_CCW_STATE_STANDBY; + +out: + private->mdev = NULL; + atomic_inc(&private->avail); + + return 0; +} + +static int vfio_ccw_mdev_open(struct mdev_device *mdev) +{ + struct vfio_ccw_private *private = + dev_get_drvdata(mdev_parent_dev(mdev)); + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; + + private->nb.notifier_call = vfio_ccw_mdev_notifier; + + return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, + &events, &private->nb); +} + +void vfio_ccw_mdev_release(struct mdev_device *mdev) +{ + struct vfio_ccw_private *private = + dev_get_drvdata(mdev_parent_dev(mdev)); + + vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, + &private->nb); +} + +static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev, + char __user *buf, + size_t count, + loff_t *ppos) +{ + struct vfio_ccw_private *private; + struct ccw_io_region *region; + + if (*ppos + count > sizeof(*region)) + return -EINVAL; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + + region = &private->io_region; + if (copy_to_user(buf, (void *)region + *ppos, count)) + return -EFAULT; + + return count; +} + +static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, + const char __user *buf, + size_t count, + loff_t *ppos) +{ + struct vfio_ccw_private *private; + struct ccw_io_region *region; + + if (*ppos + count > sizeof(*region)) + return -EINVAL; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + if (private->state != VFIO_CCW_STATE_IDLE) + return -EACCES; + + region = &private->io_region; + if (copy_from_user((void *)region + *ppos, buf, count)) + return -EFAULT; + + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ); + if (region->ret_code != 0) { + private->state = VFIO_CCW_STATE_IDLE; + return region->ret_code; + } + + return count; +} + +static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info) +{ + info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET; + info->num_regions = VFIO_CCW_NUM_REGIONS; + info->num_irqs = VFIO_CCW_NUM_IRQS; + + return 0; +} + +static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info, + u16 *cap_type_id, + void **cap_type) +{ + switch (info->index) { + case VFIO_CCW_CONFIG_REGION_INDEX: + info->offset = 0; + info->size = sizeof(struct ccw_io_region); + info->flags = VFIO_REGION_INFO_FLAG_READ + | VFIO_REGION_INFO_FLAG_WRITE; + return 0; + default: + return -EINVAL; + } +} + +int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info) +{ + if (info->index != VFIO_CCW_IO_IRQ_INDEX) + return -EINVAL; + + info->count = 1; + info->flags = VFIO_IRQ_INFO_EVENTFD; + + return 0; +} + +static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev, + uint32_t flags, + void __user *data) +{ + struct vfio_ccw_private *private; + struct eventfd_ctx **ctx; + + if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER)) + return -EINVAL; + + private = dev_get_drvdata(mdev_parent_dev(mdev)); + if (!private) + return -ENODEV; + + ctx = &private->io_trigger; + + switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { + case VFIO_IRQ_SET_DATA_NONE: + { + if (*ctx) + eventfd_signal(*ctx, 1); + return 0; + } + case VFIO_IRQ_SET_DATA_BOOL: + { + uint8_t trigger; + + if (get_user(trigger, (uint8_t __user *)data)) + return -EFAULT; + + if (trigger && *ctx) + eventfd_signal(*ctx, 1); + return 0; + } + case VFIO_IRQ_SET_DATA_EVENTFD: + { + int32_t fd; + + if (get_user(fd, (int32_t __user *)data)) + return -EFAULT; + + if (fd == -1) { + if (*ctx) + eventfd_ctx_put(*ctx); + *ctx = NULL; + } else if (fd >= 0) { + struct eventfd_ctx *efdctx; + + efdctx = eventfd_ctx_fdget(fd); + if (IS_ERR(efdctx)) + return PTR_ERR(efdctx); + + if (*ctx) + eventfd_ctx_put(*ctx); + + *ctx = efdctx; + } else + return -EINVAL; + + return 0; + } + default: + return -EINVAL; + } +} + +static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, + unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + unsigned long minsz; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = vfio_ccw_mdev_get_device_info(&info); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + u16 cap_type_id = 0; + void *cap_type = NULL; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id, + &cap_type); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS) + return -EINVAL; + + ret = vfio_ccw_mdev_get_irq_info(&info); + if (ret) + return ret; + + if (info.count == -1) + return -EINVAL; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_SET_IRQS: + { + struct vfio_irq_set hdr; + size_t data_size; + void __user *data; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + ret = vfio_set_irqs_validate_and_prepare(&hdr, 1, + VFIO_CCW_NUM_IRQS, + &data_size); + if (ret) + return ret; + + data = (void __user *)(arg + minsz); + return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data); + } + case VFIO_DEVICE_RESET: + return vfio_ccw_mdev_reset(mdev); + default: + return -ENOTTY; + } +} + +static const struct mdev_parent_ops vfio_ccw_mdev_ops = { + .owner = THIS_MODULE, + .supported_type_groups = mdev_type_groups, + .create = vfio_ccw_mdev_create, + .remove = vfio_ccw_mdev_remove, + .open = vfio_ccw_mdev_open, + .release = vfio_ccw_mdev_release, + .read = vfio_ccw_mdev_read, + .write = vfio_ccw_mdev_write, + .ioctl = vfio_ccw_mdev_ioctl, +}; + +int vfio_ccw_mdev_reg(struct subchannel *sch) +{ + return mdev_register_device(&sch->dev, &vfio_ccw_mdev_ops); +} + +void vfio_ccw_mdev_unreg(struct subchannel *sch) +{ + mdev_unregister_device(&sch->dev); +} diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h new file mode 100644 index 000000000000..fc0f01c16ef9 --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -0,0 +1,96 @@ +/* + * Private stuff for vfio_ccw driver + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + */ + +#ifndef _VFIO_CCW_PRIVATE_H_ +#define _VFIO_CCW_PRIVATE_H_ + +#include <linux/completion.h> +#include <linux/eventfd.h> +#include <linux/workqueue.h> +#include <linux/vfio_ccw.h> + +#include "css.h" +#include "vfio_ccw_cp.h" + +/** + * struct vfio_ccw_private + * @sch: pointer to the subchannel + * @state: internal state of the device + * @completion: synchronization helper of the I/O completion + * @avail: available for creating a mediated device + * @mdev: pointer to the mediated device + * @nb: notifier for vfio events + * @io_region: MMIO region to input/output I/O arguments/results + * @cp: channel program for the current I/O operation + * @irb: irb info received from interrupt + * @scsw: scsw info + * @io_trigger: eventfd ctx for signaling userspace I/O results + * @io_work: work for deferral process of I/O handling + */ +struct vfio_ccw_private { + struct subchannel *sch; + int state; + struct completion *completion; + atomic_t avail; + struct mdev_device *mdev; + struct notifier_block nb; + struct ccw_io_region io_region; + + struct channel_program cp; + struct irb irb; + union scsw scsw; + + struct eventfd_ctx *io_trigger; + struct work_struct io_work; +} __aligned(8); + +extern int vfio_ccw_mdev_reg(struct subchannel *sch); +extern void vfio_ccw_mdev_unreg(struct subchannel *sch); + +extern int vfio_ccw_sch_quiesce(struct subchannel *sch); + +/* + * States of the device statemachine. + */ +enum vfio_ccw_state { + VFIO_CCW_STATE_NOT_OPER, + VFIO_CCW_STATE_STANDBY, + VFIO_CCW_STATE_IDLE, + VFIO_CCW_STATE_BOXED, + VFIO_CCW_STATE_BUSY, + /* last element! */ + NR_VFIO_CCW_STATES +}; + +/* + * Asynchronous events of the device statemachine. + */ +enum vfio_ccw_event { + VFIO_CCW_EVENT_NOT_OPER, + VFIO_CCW_EVENT_IO_REQ, + VFIO_CCW_EVENT_INTERRUPT, + /* last element! */ + NR_VFIO_CCW_EVENTS +}; + +/* + * Action called through jumptable. + */ +typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event); +extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS]; + +static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private, + int event) +{ + vfio_ccw_jumptable[private->state][event](private, event); +} + +extern struct workqueue_struct *vfio_ccw_work_q; + +#endif |