diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 08:46:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 08:46:32 -0700 |
commit | 08d183e3c1f650b4db1d07d764502116861542fa (patch) | |
tree | f868a813f36744597bc7a8260c63cd37a3a94338 /include | |
parent | 4b1f2af6752a4cc9acc1c22ddf3842478965f113 (diff) | |
parent | 6096f884515466f400864ad23d16f20b731a7ce7 (diff) | |
download | lwn-08d183e3c1f650b4db1d07d764502116861542fa.tar.gz lwn-08d183e3c1f650b4db1d07d764502116861542fa.zip |
Merge tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc updates from Michael Ellerman:
- disable the 32-bit vdso when building LE, so we can build with a
64-bit only toolchain.
- EEH fixes from Gavin & Richard.
- enable the sys_kcmp syscall from Laurent.
- sysfs control for fastsleep workaround from Shreyas.
- expose OPAL events as an irq chip by Alistair.
- MSI ops moved to pci_controller_ops by Daniel.
- fix for kernel to userspace backtraces for perf from Anton.
- merge pseries and pseries_le defconfigs from Cyril.
- CXL in-kernel API from Mikey.
- OPAL prd driver from Jeremy.
- fix for DSCR handling & tests from Anshuman.
- Powernv flash mtd driver from Cyril.
- dynamic DMA Window support on powernv from Alexey.
- LLVM clang fixes & workarounds from Anton.
- reworked version of the patch to abort syscalls when transactional.
- fix the swap encoding to support 4TB, from Aneesh.
- various fixes as usual.
- Freescale updates from Scott: Highlights include more 8xx
optimizations, an e6500 hugetlb optimization, QMan device tree nodes,
t1024/t1023 support, and various fixes and cleanup.
* tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (180 commits)
cxl: Fix typo in debug print
cxl: Add CXL_KERNEL_API config option
powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma()
powerpc/mm: Change the swap encoding in pte.
powerpc/mm: PTE_RPN_MAX is not used, remove the same
powerpc/tm: Abort syscalls in active transactions
powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off
powerpc/include: Add opal-prd to installed uapi headers
powerpc/powernv: fix construction of opal PRD messages
powerpc/powernv: Increase opal-irqchip initcall priority
powerpc: Make doorbell check preemption safe
powerpc/powernv: pnv_init_idle_states() should only run on powernv
macintosh/nvram: Remove as unused
powerpc: Don't use gcc specific options on clang
powerpc: Don't use -mno-strict-align on clang
powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it
powerpc: Only use -mabi=altivec if toolchain supports it
powerpc: Fix duplicate const clang warning in user access code
vfio: powerpc/spapr: Support Dynamic DMA windows
vfio: powerpc/spapr: Register memory and define IOMMU v2
...
Diffstat (limited to 'include')
-rw-r--r-- | include/misc/cxl-base.h | 48 | ||||
-rw-r--r-- | include/misc/cxl.h | 207 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 102 | ||||
-rw-r--r-- | include/uapi/misc/cxl.h | 22 |
4 files changed, 349 insertions, 30 deletions
diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h new file mode 100644 index 000000000000..5ae962512fb8 --- /dev/null +++ b/include/misc/cxl-base.h @@ -0,0 +1,48 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MISC_CXL_BASE_H +#define _MISC_CXL_BASE_H + +#ifdef CONFIG_CXL_BASE + +#define CXL_IRQ_RANGES 4 + +struct cxl_irq_ranges { + irq_hw_number_t offset[CXL_IRQ_RANGES]; + irq_hw_number_t range[CXL_IRQ_RANGES]; +}; + +extern atomic_t cxl_use_count; + +static inline bool cxl_ctx_in_use(void) +{ + return (atomic_read(&cxl_use_count) != 0); +} + +static inline void cxl_ctx_get(void) +{ + atomic_inc(&cxl_use_count); +} + +static inline void cxl_ctx_put(void) +{ + atomic_dec(&cxl_use_count); +} + +void cxl_slbia(struct mm_struct *mm); + +#else /* CONFIG_CXL_BASE */ + +static inline bool cxl_ctx_in_use(void) { return false; } +static inline void cxl_slbia(struct mm_struct *mm) {} + +#endif /* CONFIG_CXL_BASE */ + +#endif diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 975cc7861f18..7a6c1d6cc173 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 IBM Corp. + * Copyright 2015 IBM Corp. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -10,39 +10,194 @@ #ifndef _MISC_CXL_H #define _MISC_CXL_H -#ifdef CONFIG_CXL_BASE +#include <linux/pci.h> +#include <linux/poll.h> +#include <linux/interrupt.h> +#include <uapi/misc/cxl.h> -#define CXL_IRQ_RANGES 4 +/* + * This documents the in kernel API for driver to use CXL. It allows kernel + * drivers to bind to AFUs using an AFU configuration record exposed as a PCI + * configuration record. + * + * This API enables control over AFU and contexts which can't be part of the + * generic PCI API. This API is agnostic to the actual AFU. + */ + +/* Get the AFU associated with a pci_dev */ +struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); + +/* Get the AFU conf record number associated with a pci_dev */ +unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev); + +/* Get the physical device (ie. the PCIe card) which the AFU is attached */ +struct device *cxl_get_phys_dev(struct pci_dev *dev); + + +/* + * Context lifetime overview: + * + * An AFU context may be inited and then started and stoppped multiple times + * before it's released. ie. + * - cxl_dev_context_init() + * - cxl_start_context() + * - cxl_stop_context() + * - cxl_start_context() + * - cxl_stop_context() + * ...repeat... + * - cxl_release_context() + * Once released, a context can't be started again. + * + * One context is inited by the cxl driver for every pci_dev. This is to be + * used as a default kernel context. cxl_get_context() will get this + * context. This context will be released by PCI hot unplug, so doesn't need to + * be released explicitly by drivers. + * + * Additional kernel contexts may be inited using cxl_dev_context_init(). + * These must be released using cxl_context_detach(). + * + * Once a context has been inited, IRQs may be configured. Firstly these IRQs + * must be allocated (cxl_allocate_afu_irqs()), then individually mapped to + * specific handlers (cxl_map_afu_irq()). + * + * These IRQs can be unmapped (cxl_unmap_afu_irq()) and finally released + * (cxl_free_afu_irqs()). + * + * The AFU can be reset (cxl_afu_reset()). This will cause the PSL/AFU + * hardware to lose track of all contexts. It's upto the caller of + * cxl_afu_reset() to restart these contexts. + */ + +/* + * On pci_enabled_device(), the cxl driver will init a single cxl context for + * use by the driver. It doesn't start this context (as that will likely + * generate DMA traffic for most AFUs). + * + * This gets the default context associated with this pci_dev. This context + * doesn't need to be released as this will be done by the PCI subsystem on hot + * unplug. + */ +struct cxl_context *cxl_get_context(struct pci_dev *dev); +/* + * Allocate and initalise a context associated with a AFU PCI device. This + * doesn't start the context in the AFU. + */ +struct cxl_context *cxl_dev_context_init(struct pci_dev *dev); +/* + * Release and free a context. Context should be stopped before calling. + */ +int cxl_release_context(struct cxl_context *ctx); -struct cxl_irq_ranges { - irq_hw_number_t offset[CXL_IRQ_RANGES]; - irq_hw_number_t range[CXL_IRQ_RANGES]; -}; +/* + * Allocate AFU interrupts for this context. num=0 will allocate the default + * for this AFU as given in the AFU descriptor. This number doesn't include the + * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be + * used must map a handler with cxl_map_afu_irq. + */ +int cxl_allocate_afu_irqs(struct cxl_context *cxl, int num); +/* Free allocated interrupts */ +void cxl_free_afu_irqs(struct cxl_context *cxl); + +/* + * Map a handler for an AFU interrupt associated with a particular context. AFU + * IRQS numbers start from 1 (CAIA defines AFU IRQ 0 for page faults). cookie + * is private data is that will be provided to the interrupt handler. + */ +int cxl_map_afu_irq(struct cxl_context *cxl, int num, + irq_handler_t handler, void *cookie, char *name); +/* unmap mapped IRQ handlers */ +void cxl_unmap_afu_irq(struct cxl_context *cxl, int num, void *cookie); -extern atomic_t cxl_use_count; +/* + * Start work on the AFU. This starts an cxl context and associates it with a + * task. task == NULL will make it a kernel context. + */ +int cxl_start_context(struct cxl_context *ctx, u64 wed, + struct task_struct *task); +/* + * Stop a context and remove it from the PSL + */ +int cxl_stop_context(struct cxl_context *ctx); -static inline bool cxl_ctx_in_use(void) -{ - return (atomic_read(&cxl_use_count) != 0); -} +/* Reset the AFU */ +int cxl_afu_reset(struct cxl_context *ctx); -static inline void cxl_ctx_get(void) -{ - atomic_inc(&cxl_use_count); -} +/* + * Set a context as a master context. + * This sets the default problem space area mapped as the full space, rather + * than just the per context area (for slaves). + */ +void cxl_set_master(struct cxl_context *ctx); -static inline void cxl_ctx_put(void) -{ - atomic_dec(&cxl_use_count); -} +/* + * Map and unmap the AFU Problem Space area. The amount and location mapped + * depends on if this context is a master or slave. + */ +void __iomem *cxl_psa_map(struct cxl_context *ctx); +void cxl_psa_unmap(void __iomem *addr); -void cxl_slbia(struct mm_struct *mm); +/* Get the process element for this context */ +int cxl_process_element(struct cxl_context *ctx); -#else /* CONFIG_CXL_BASE */ -static inline bool cxl_ctx_in_use(void) { return false; } -static inline void cxl_slbia(struct mm_struct *mm) {} +/* + * These calls allow drivers to create their own file descriptors and make them + * identical to the cxl file descriptor user API. An example use case: + * + * struct file_operations cxl_my_fops = {}; + * ...... + * // Init the context + * ctx = cxl_dev_context_init(dev); + * if (IS_ERR(ctx)) + * return PTR_ERR(ctx); + * // Create and attach a new file descriptor to my file ops + * file = cxl_get_fd(ctx, &cxl_my_fops, &fd); + * // Start context + * rc = cxl_start_work(ctx, &work.work); + * if (rc) { + * fput(file); + * put_unused_fd(fd); + * return -ENODEV; + * } + * // No error paths after installing the fd + * fd_install(fd, file); + * return fd; + * + * This inits a context, and gets a file descriptor and associates some file + * ops to that file descriptor. If the file ops are blank, the cxl driver will + * fill them in with the default ones that mimic the standard user API. Once + * completed, the file descriptor can be installed. Once the file descriptor is + * installed, it's visible to the user so no errors must occur past this point. + * + * If cxl_fd_release() file op call is installed, the context will be stopped + * and released when the fd is released. Hence the driver won't need to manage + * this itself. + */ -#endif /* CONFIG_CXL_BASE */ +/* + * Take a context and associate it with my file ops. Returns the associated + * file and file descriptor. Any file ops which are blank are filled in by the + * cxl driver with the default ops to mimic the standard API. + */ +struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, + int *fd); +/* Get the context associated with this file */ +struct cxl_context *cxl_fops_get_context(struct file *file); +/* + * Start a context associated a struct cxl_ioctl_start_work used by the + * standard cxl user API. + */ +int cxl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work *work); +/* + * Export all the existing fops so drivers can use them + */ +int cxl_fd_open(struct inode *inode, struct file *file); +int cxl_fd_release(struct inode *inode, struct file *file); +long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm); +unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll); +ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, + loff_t *off); -#endif +#endif /* _MISC_CXL_H */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index b57b750c222f..9fd7b5d8df2f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -443,6 +445,23 @@ struct vfio_iommu_type1_dma_unmap { /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + +/* * The SPAPR TCE info struct provides the information about the PCI bus * address ranges available for DMA, these values are programmed into * the hardware so the guest has to know that information. @@ -452,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap { * addresses too so the window works as a filter rather than an offset * for IOVA addresses. * - * A flag will need to be added if other page sizes are supported, - * so as defined here, it is always 4k. + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. */ struct vfio_iommu_spapr_tce_info { __u32 argsz; - __u32 flags; /* reserved for future use */ + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ __u32 dma32_window_start; /* 32 bit window start (bytes) */ __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; }; #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) @@ -470,12 +492,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; + union { + struct vfio_eeh_pe_err err; + }; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -492,9 +525,70 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u64 window_size; + __u32 levels; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index cd6d789b73ec..99a8ca15fe64 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -32,10 +32,32 @@ struct cxl_ioctl_start_work { #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ CXL_START_WORK_NUM_IRQS) + +/* Possible modes that an afu can be in */ +#define CXL_MODE_DEDICATED 0x1 +#define CXL_MODE_DIRECTED 0x2 + +/* possible flags for the cxl_afu_id flags field */ +#define CXL_AFUID_FLAG_SLAVE 0x1 /* In directed-mode afu is in slave mode */ + +struct cxl_afu_id { + __u64 flags; /* One of CXL_AFUID_FLAG_X */ + __u32 card_id; + __u32 afu_offset; + __u32 afu_mode; /* one of the CXL_MODE_X */ + __u32 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; + __u64 reserved5; + __u64 reserved6; +}; + /* ioctl numbers */ #define CXL_MAGIC 0xCA #define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) #define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) +#define CXL_IOCTL_GET_AFU_ID _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id) #define CXL_READ_MIN_SIZE 0x1000 /* 4K */ |