diff options
Diffstat (limited to 'drivers')
533 files changed, 43563 insertions, 7711 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 954824438fd1..53abb4a5f736 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -138,6 +138,7 @@ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ +obj-$(CONFIG_VHOST) += vhost/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index aebd944bdaa1..445ce28475b3 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -221,6 +221,9 @@ config ACPI_PROCESSOR_IDLE bool select CPU_IDLE +config ACPI_MCFG + bool + config ACPI_CPPC_LIB bool depends on ACPI_PROCESSOR diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 35a6ccbe3025..5ae9d85c5159 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -40,6 +40,7 @@ acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o acpi-y += ec.o acpi-$(CONFIG_ACPI_DOCK) += dock.o acpi-y += pci_root.o pci_link.o pci_irq.o +obj-$(CONFIG_ACPI_MCFG) += pci_mcfg.o acpi-y += acpi_lpss.o acpi_apd.o acpi-y += acpi_platform.o acpi-y += acpi_pnp.o diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c index 81dc75033f15..0980a133916f 100644 --- a/drivers/acpi/acpi_cmos_rtc.c +++ b/drivers/acpi/acpi_cmos_rtc.c @@ -14,7 +14,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> -#include <asm-generic/rtc.h> +#include <linux/mc146818rtc.h> #include "internal.h" diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 148f4e5ca104..31abb0bdd4f2 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -232,8 +232,10 @@ remove_dev_dir: acpi_device_dir(device) = NULL; remove_lid_dir: remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir); + acpi_lid_dir = NULL; remove_button_dir: remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir); + acpi_button_dir = NULL; goto done; } @@ -250,7 +252,9 @@ static int acpi_button_remove_fs(struct acpi_device *device) acpi_lid_dir); acpi_device_dir(device) = NULL; remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir); + acpi_lid_dir = NULL; remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir); + acpi_button_dir = NULL; return 0; } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 999a10914678..e7bd57cc550a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -101,6 +101,7 @@ enum ec_command { #define ACPI_EC_UDELAY_POLL 550 /* Wait 1ms for EC transaction polling */ #define ACPI_EC_CLEAR_MAX 100 /* Maximum number of events to query * when trying to clear the EC */ +#define ACPI_EC_MAX_QUERIES 16 /* Maximum number of parallel queries */ enum { EC_FLAGS_QUERY_PENDING, /* Query is pending */ @@ -121,6 +122,10 @@ static unsigned int ec_delay __read_mostly = ACPI_EC_DELAY; module_param(ec_delay, uint, 0644); MODULE_PARM_DESC(ec_delay, "Timeout(ms) waited until an EC command completes"); +static unsigned int ec_max_queries __read_mostly = ACPI_EC_MAX_QUERIES; +module_param(ec_max_queries, uint, 0644); +MODULE_PARM_DESC(ec_max_queries, "Maximum parallel _Qxx evaluations"); + static bool ec_busy_polling __read_mostly; module_param(ec_busy_polling, bool, 0644); MODULE_PARM_DESC(ec_busy_polling, "Use busy polling to advance EC transaction"); @@ -174,6 +179,7 @@ static void acpi_ec_event_processor(struct work_struct *work); struct acpi_ec *boot_ec, *first_ec; EXPORT_SYMBOL(first_ec); +static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ @@ -1098,7 +1104,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) * work queue execution. */ ec_dbg_evt("Query(0x%02x) scheduled", value); - if (!schedule_work(&q->work)) { + if (!queue_work(ec_query_wq, &q->work)) { ec_dbg_evt("Query(0x%02x) overlapped", value); result = -EBUSY; } @@ -1660,15 +1666,41 @@ static struct acpi_driver acpi_ec_driver = { }, }; +static inline int acpi_ec_query_init(void) +{ + if (!ec_query_wq) { + ec_query_wq = alloc_workqueue("kec_query", 0, + ec_max_queries); + if (!ec_query_wq) + return -ENODEV; + } + return 0; +} + +static inline void acpi_ec_query_exit(void) +{ + if (ec_query_wq) { + destroy_workqueue(ec_query_wq); + ec_query_wq = NULL; + } +} + int __init acpi_ec_init(void) { - int result = 0; + int result; + /* register workqueue for _Qxx evaluations */ + result = acpi_ec_query_init(); + if (result) + goto err_exit; /* Now register the driver for the EC */ result = acpi_bus_register_driver(&acpi_ec_driver); - if (result < 0) - return -ENODEV; + if (result) + goto err_exit; +err_exit: + if (result) + acpi_ec_query_exit(); return result; } @@ -1678,5 +1710,6 @@ static void __exit acpi_ec_exit(void) { acpi_bus_unregister_driver(&acpi_ec_driver); + acpi_ec_query_exit(); } #endif /* 0 */ diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index b108f1358a32..4305ee9db4b2 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -309,7 +309,7 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) * During early init (when acpi_gbl_permanent_mmap has not been set yet) this * routine simply calls __acpi_map_table() to get the job done. */ -void __iomem *__init_refok +void __iomem *__ref acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; @@ -362,8 +362,7 @@ out: } EXPORT_SYMBOL_GPL(acpi_os_map_iomem); -void *__init_refok -acpi_os_map_memory(acpi_physical_address phys, acpi_size size) +void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) { return (void *)acpi_os_map_iomem(phys, size); } diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c new file mode 100644 index 000000000000..b5b376e081f5 --- /dev/null +++ b/drivers/acpi/pci_mcfg.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 Broadcom + * Author: Jayachandran C <jchandra@broadcom.com> + * Copyright (C) 2016 Semihalf + * Author: Tomasz Nowicki <tn@semihalf.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +#define pr_fmt(fmt) "ACPI: " fmt + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/pci-acpi.h> + +/* Structure to hold entries from the MCFG table */ +struct mcfg_entry { + struct list_head list; + phys_addr_t addr; + u16 segment; + u8 bus_start; + u8 bus_end; +}; + +/* List to save MCFG entries */ +static LIST_HEAD(pci_mcfg_list); + +phys_addr_t pci_mcfg_lookup(u16 seg, struct resource *bus_res) +{ + struct mcfg_entry *e; + + /* + * We expect exact match, unless MCFG entry end bus covers more than + * specified by caller. + */ + list_for_each_entry(e, &pci_mcfg_list, list) { + if (e->segment == seg && e->bus_start == bus_res->start && + e->bus_end >= bus_res->end) + return e->addr; + } + + return 0; +} + +static __init int pci_mcfg_parse(struct acpi_table_header *header) +{ + struct acpi_table_mcfg *mcfg; + struct acpi_mcfg_allocation *mptr; + struct mcfg_entry *e, *arr; + int i, n; + + if (header->length < sizeof(struct acpi_table_mcfg)) + return -EINVAL; + + n = (header->length - sizeof(struct acpi_table_mcfg)) / + sizeof(struct acpi_mcfg_allocation); + mcfg = (struct acpi_table_mcfg *)header; + mptr = (struct acpi_mcfg_allocation *) &mcfg[1]; + + arr = kcalloc(n, sizeof(*arr), GFP_KERNEL); + if (!arr) + return -ENOMEM; + + for (i = 0, e = arr; i < n; i++, mptr++, e++) { + e->segment = mptr->pci_segment; + e->addr = mptr->address; + e->bus_start = mptr->start_bus_number; + e->bus_end = mptr->end_bus_number; + list_add(&e->list, &pci_mcfg_list); + } + + pr_info("MCFG table detected, %d entries\n", n); + return 0; +} + +/* Interface called by ACPI - parse and save MCFG table */ +void __init pci_mmcfg_late_init(void) +{ + int err = acpi_table_parse(ACPI_SIG_MCFG, pci_mcfg_parse); + if (err) + pr_err("Failed to parse MCFG (%d)\n", err); +} diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index ae3fe4e64203..d144168d4ef9 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -720,6 +720,36 @@ next: } } +static void acpi_pci_root_remap_iospace(struct resource_entry *entry) +{ +#ifdef PCI_IOBASE + struct resource *res = entry->res; + resource_size_t cpu_addr = res->start; + resource_size_t pci_addr = cpu_addr - entry->offset; + resource_size_t length = resource_size(res); + unsigned long port; + + if (pci_register_io_range(cpu_addr, length)) + goto err; + + port = pci_address_to_pio(cpu_addr); + if (port == (unsigned long)-1) + goto err; + + res->start = port; + res->end = port + length - 1; + entry->offset = port - pci_addr; + + if (pci_remap_iospace(res, cpu_addr) < 0) + goto err; + + pr_info("Remapped I/O %pa to %pR\n", &cpu_addr, res); + return; +err: + res->flags |= IORESOURCE_DISABLED; +#endif +} + int acpi_pci_probe_root_resources(struct acpi_pci_root_info *info) { int ret; @@ -740,6 +770,9 @@ int acpi_pci_probe_root_resources(struct acpi_pci_root_info *info) "no IO and memory resources present in _CRS\n"); else { resource_list_for_each_entry_safe(entry, tmp, list) { + if (entry->res->flags & IORESOURCE_IO) + acpi_pci_root_remap_iospace(entry); + if (entry->res->flags & IORESOURCE_DISABLED) resource_list_destroy_entry(entry); else @@ -811,6 +844,8 @@ static void acpi_pci_root_release_info(struct pci_host_bridge *bridge) resource_list_for_each_entry(entry, &bridge->windows) { res = entry->res; + if (res->flags & IORESOURCE_IO) + pci_unmap_iospace(res); if (res->parent && (res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) release_resource(res); diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 773fc3099769..22d1760a4278 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -46,7 +46,8 @@ MODULE_LICENSE("GPL"); extern struct builtin_fw __start_builtin_fw[]; extern struct builtin_fw __end_builtin_fw[]; -static bool fw_get_builtin_firmware(struct firmware *fw, const char *name) +static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, + void *buf, size_t size) { struct builtin_fw *b_fw; @@ -54,6 +55,9 @@ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name) if (strcmp(name, b_fw->name) == 0) { fw->size = b_fw->size; fw->data = b_fw->data; + + if (buf && fw->size <= size) + memcpy(buf, fw->data, fw->size); return true; } } @@ -74,7 +78,9 @@ static bool fw_is_builtin_firmware(const struct firmware *fw) #else /* Module case - no builtin firmware support */ -static inline bool fw_get_builtin_firmware(struct firmware *fw, const char *name) +static inline bool fw_get_builtin_firmware(struct firmware *fw, + const char *name, void *buf, + size_t size) { return false; } @@ -112,6 +118,7 @@ static inline long firmware_loading_timeout(void) #define FW_OPT_FALLBACK 0 #endif #define FW_OPT_NO_WARN (1U << 3) +#define FW_OPT_NOCACHE (1U << 4) struct firmware_cache { /* firmware_buf instance will be added into the below list */ @@ -143,6 +150,7 @@ struct firmware_buf { unsigned long status; void *data; size_t size; + size_t allocated_size; #ifdef CONFIG_FW_LOADER_USER_HELPER bool is_paged_buf; bool need_uevent; @@ -178,7 +186,8 @@ static DEFINE_MUTEX(fw_lock); static struct firmware_cache fw_cache; static struct firmware_buf *__allocate_fw_buf(const char *fw_name, - struct firmware_cache *fwc) + struct firmware_cache *fwc, + void *dbuf, size_t size) { struct firmware_buf *buf; @@ -194,6 +203,8 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name, kref_init(&buf->ref); buf->fwc = fwc; + buf->data = dbuf; + buf->allocated_size = size; init_completion(&buf->completion); #ifdef CONFIG_FW_LOADER_USER_HELPER INIT_LIST_HEAD(&buf->pending_list); @@ -217,7 +228,8 @@ static struct firmware_buf *__fw_lookup_buf(const char *fw_name) static int fw_lookup_and_allocate_buf(const char *fw_name, struct firmware_cache *fwc, - struct firmware_buf **buf) + struct firmware_buf **buf, void *dbuf, + size_t size) { struct firmware_buf *tmp; @@ -229,7 +241,7 @@ static int fw_lookup_and_allocate_buf(const char *fw_name, *buf = tmp; return 1; } - tmp = __allocate_fw_buf(fw_name, fwc); + tmp = __allocate_fw_buf(fw_name, fwc, dbuf, size); if (tmp) list_add(&tmp->list, &fwc->head); spin_unlock(&fwc->lock); @@ -261,6 +273,7 @@ static void __fw_free_buf(struct kref *ref) vfree(buf->pages); } else #endif + if (!buf->allocated_size) vfree(buf->data); kfree_const(buf->fw_id); kfree(buf); @@ -301,13 +314,21 @@ static void fw_finish_direct_load(struct device *device, mutex_unlock(&fw_lock); } -static int fw_get_filesystem_firmware(struct device *device, - struct firmware_buf *buf) +static int +fw_get_filesystem_firmware(struct device *device, struct firmware_buf *buf) { loff_t size; int i, len; int rc = -ENOENT; char *path; + enum kernel_read_file_id id = READING_FIRMWARE; + size_t msize = INT_MAX; + + /* Already populated data member means we're loading into a buffer */ + if (buf->data) { + id = READING_FIRMWARE_PREALLOC_BUFFER; + msize = buf->allocated_size; + } path = __getname(); if (!path) @@ -326,8 +347,8 @@ static int fw_get_filesystem_firmware(struct device *device, } buf->size = 0; - rc = kernel_read_file_from_path(path, &buf->data, &size, - INT_MAX, READING_FIRMWARE); + rc = kernel_read_file_from_path(path, &buf->data, &size, msize, + id); if (rc) { if (rc == -ENOENT) dev_dbg(device, "loading %s failed with error %d\n", @@ -691,6 +712,38 @@ out: static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); +static void firmware_rw_buf(struct firmware_buf *buf, char *buffer, + loff_t offset, size_t count, bool read) +{ + if (read) + memcpy(buffer, buf->data + offset, count); + else + memcpy(buf->data + offset, buffer, count); +} + +static void firmware_rw(struct firmware_buf *buf, char *buffer, + loff_t offset, size_t count, bool read) +{ + while (count) { + void *page_data; + int page_nr = offset >> PAGE_SHIFT; + int page_ofs = offset & (PAGE_SIZE-1); + int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count); + + page_data = kmap(buf->pages[page_nr]); + + if (read) + memcpy(buffer, page_data + page_ofs, page_cnt); + else + memcpy(page_data + page_ofs, buffer, page_cnt); + + kunmap(buf->pages[page_nr]); + buffer += page_cnt; + offset += page_cnt; + count -= page_cnt; + } +} + static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) @@ -715,21 +768,11 @@ static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, ret_count = count; - while (count) { - void *page_data; - int page_nr = offset >> PAGE_SHIFT; - int page_ofs = offset & (PAGE_SIZE-1); - int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count); - - page_data = kmap(buf->pages[page_nr]); - - memcpy(buffer, page_data + page_ofs, page_cnt); + if (buf->data) + firmware_rw_buf(buf, buffer, offset, count, true); + else + firmware_rw(buf, buffer, offset, count, true); - kunmap(buf->pages[page_nr]); - buffer += page_cnt; - offset += page_cnt; - count -= page_cnt; - } out: mutex_unlock(&fw_lock); return ret_count; @@ -804,29 +847,23 @@ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj, goto out; } - retval = fw_realloc_buffer(fw_priv, offset + count); - if (retval) - goto out; - - retval = count; - - while (count) { - void *page_data; - int page_nr = offset >> PAGE_SHIFT; - int page_ofs = offset & (PAGE_SIZE - 1); - int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count); - - page_data = kmap(buf->pages[page_nr]); - - memcpy(page_data + page_ofs, buffer, page_cnt); + if (buf->data) { + if (offset + count > buf->allocated_size) { + retval = -ENOMEM; + goto out; + } + firmware_rw_buf(buf, buffer, offset, count, false); + retval = count; + } else { + retval = fw_realloc_buffer(fw_priv, offset + count); + if (retval) + goto out; - kunmap(buf->pages[page_nr]); - buffer += page_cnt; - offset += page_cnt; - count -= page_cnt; + retval = count; + firmware_rw(buf, buffer, offset, count, false); } - buf->size = max_t(size_t, offset, buf->size); + buf->size = max_t(size_t, offset + count, buf->size); out: mutex_unlock(&fw_lock); return retval; @@ -894,7 +931,8 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, struct firmware_buf *buf = fw_priv->buf; /* fall back on userspace loading */ - buf->is_paged_buf = true; + if (!buf->data) + buf->is_paged_buf = true; dev_set_uevent_suppress(f_dev, true); @@ -929,7 +967,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, if (is_fw_load_aborted(buf)) retval = -EAGAIN; - else if (!buf->data) + else if (buf->is_paged_buf && !buf->data) retval = -ENOMEM; device_del(f_dev); @@ -1012,7 +1050,7 @@ static int sync_cached_firmware_buf(struct firmware_buf *buf) */ static int _request_firmware_prepare(struct firmware **firmware_p, const char *name, - struct device *device) + struct device *device, void *dbuf, size_t size) { struct firmware *firmware; struct firmware_buf *buf; @@ -1025,12 +1063,12 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name, return -ENOMEM; } - if (fw_get_builtin_firmware(firmware, name)) { + if (fw_get_builtin_firmware(firmware, name, dbuf, size)) { dev_dbg(device, "using built-in %s\n", name); return 0; /* assigned */ } - ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf); + ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf, dbuf, size); /* * bind with 'buf' now to avoid warning in failure path @@ -1070,14 +1108,16 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device, * should be fixed in devres or driver core. */ /* don't cache firmware handled without uevent */ - if (device && (opt_flags & FW_OPT_UEVENT)) + if (device && (opt_flags & FW_OPT_UEVENT) && + !(opt_flags & FW_OPT_NOCACHE)) fw_add_devm_name(device, buf->fw_id); /* * After caching firmware image is started, let it piggyback * on request firmware. */ - if (buf->fwc->state == FW_LOADER_START_CACHE) { + if (!(opt_flags & FW_OPT_NOCACHE) && + buf->fwc->state == FW_LOADER_START_CACHE) { if (fw_cache_piggyback_on_request(buf->fw_id)) kref_get(&buf->ref); } @@ -1091,7 +1131,8 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device, /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, - struct device *device, unsigned int opt_flags) + struct device *device, void *buf, size_t size, + unsigned int opt_flags) { struct firmware *fw = NULL; long timeout; @@ -1105,7 +1146,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } - ret = _request_firmware_prepare(&fw, name, device); + ret = _request_firmware_prepare(&fw, name, device, buf, size); if (ret <= 0) /* error or already assigned */ goto out; @@ -1184,7 +1225,7 @@ request_firmware(const struct firmware **firmware_p, const char *name, /* Need to pin this module until return */ __module_get(THIS_MODULE); - ret = _request_firmware(firmware_p, name, device, + ret = _request_firmware(firmware_p, name, device, NULL, 0, FW_OPT_UEVENT | FW_OPT_FALLBACK); module_put(THIS_MODULE); return ret; @@ -1208,7 +1249,7 @@ int request_firmware_direct(const struct firmware **firmware_p, int ret; __module_get(THIS_MODULE); - ret = _request_firmware(firmware_p, name, device, + ret = _request_firmware(firmware_p, name, device, NULL, 0, FW_OPT_UEVENT | FW_OPT_NO_WARN); module_put(THIS_MODULE); return ret; @@ -1216,6 +1257,36 @@ int request_firmware_direct(const struct firmware **firmware_p, EXPORT_SYMBOL_GPL(request_firmware_direct); /** + * request_firmware_into_buf - load firmware into a previously allocated buffer + * @firmware_p: pointer to firmware image + * @name: name of firmware file + * @device: device for which firmware is being loaded and DMA region allocated + * @buf: address of buffer to load firmware into + * @size: size of buffer + * + * This function works pretty much like request_firmware(), but it doesn't + * allocate a buffer to hold the firmware data. Instead, the firmware + * is loaded directly into the buffer pointed to by @buf and the @firmware_p + * data member is pointed at @buf. + * + * This function doesn't cache firmware either. + */ +int +request_firmware_into_buf(const struct firmware **firmware_p, const char *name, + struct device *device, void *buf, size_t size) +{ + int ret; + + __module_get(THIS_MODULE); + ret = _request_firmware(firmware_p, name, device, buf, size, + FW_OPT_UEVENT | FW_OPT_FALLBACK | + FW_OPT_NOCACHE); + module_put(THIS_MODULE); + return ret; +} +EXPORT_SYMBOL(request_firmware_into_buf); + +/** * release_firmware: - release the resource associated with a firmware image * @fw: firmware resource to release **/ @@ -1247,7 +1318,7 @@ static void request_firmware_work_func(struct work_struct *work) fw_work = container_of(work, struct firmware_work, work); - _request_firmware(&fw, fw_work->name, fw_work->device, + _request_firmware(&fw, fw_work->name, fw_work->device, NULL, 0, fw_work->opt_flags); fw_work->cont(fw, fw_work->context); put_device(fw_work->device); /* taken in request_firmware_nowait() */ @@ -1380,7 +1451,7 @@ static int uncache_firmware(const char *fw_name) pr_debug("%s: %s\n", __func__, fw_name); - if (fw_get_builtin_firmware(&fw, fw_name)) + if (fw_get_builtin_firmware(&fw, fw_name, NULL, 0)) return 0; buf = fw_lookup_buf(fw_name); diff --git a/drivers/base/node.c b/drivers/base/node.c index 29cd96661b30..5548f9686016 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -370,7 +370,7 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE #define page_initialized(page) (page->lru.next) -static int __init_refok get_nid_for_pfn(unsigned long pfn) +static int __ref get_nid_for_pfn(unsigned long pfn) { struct page *page; diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 7c04c87738a6..df0c70963d9e 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -402,6 +402,22 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); +static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table, + unsigned long *freq) +{ + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->rate >= *freq) { + opp = temp_opp; + *freq = opp->rate; + break; + } + } + + return opp; +} + /** * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq * @dev: device for which we do this operation @@ -427,7 +443,6 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { struct opp_table *opp_table; - struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); @@ -440,15 +455,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, if (IS_ERR(opp_table)) return ERR_CAST(opp_table); - list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { - if (temp_opp->available && temp_opp->rate >= *freq) { - opp = temp_opp; - *freq = opp->rate; - break; - } - } - - return opp; + return _find_freq_ceil(opp_table, freq); } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil); @@ -612,7 +619,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) return PTR_ERR(opp_table); } - old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq); + old_opp = _find_freq_ceil(opp_table, &old_freq); if (!IS_ERR(old_opp)) { ou_volt = old_opp->u_volt; ou_volt_min = old_opp->u_volt_min; @@ -622,7 +629,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) __func__, old_freq, PTR_ERR(old_opp)); } - opp = dev_pm_opp_find_freq_ceil(dev, &freq); + opp = _find_freq_ceil(opp_table, &freq); if (IS_ERR(opp)) { ret = PTR_ERR(opp); dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index a6975795e7f3..efec10b49d59 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -11,7 +11,7 @@ #include <linux/export.h> #include <linux/rtc.h> -#include <asm/rtc.h> +#include <linux/mc146818rtc.h> #include "power.h" @@ -103,7 +103,7 @@ static int set_magic_time(unsigned int user, unsigned int file, unsigned int dev n /= 24; time.tm_min = (n % 20) * 3; n /= 20; - set_rtc_time(&time); + mc146818_set_time(&time); return n ? -1 : 0; } @@ -112,7 +112,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - get_rtc_time(&time); + mc146818_get_time(&time); pr_info("RTC time: %2d:%02d:%02d, date: %02d/%02d/%02d\n", time.tm_hour, time.tm_min, time.tm_sec, time.tm_mon + 1, time.tm_mday, time.tm_year % 100); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 5fb7718f256c..62e4de2aa8d1 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -334,10 +334,9 @@ void device_wakeup_arm_wake_irqs(void) struct wakeup_source *ws; rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - if (ws->wakeirq) - dev_pm_arm_wake_irq(ws->wakeirq); - } + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_arm_wake_irq(ws->wakeirq); + rcu_read_unlock(); } @@ -351,10 +350,9 @@ void device_wakeup_disarm_wake_irqs(void) struct wakeup_source *ws; rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - if (ws->wakeirq) - dev_pm_disarm_wake_irq(ws->wakeirq); - } + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_disarm_wake_irq(ws->wakeirq); + rcu_read_unlock(); } @@ -390,9 +388,7 @@ int device_wakeup_disable(struct device *dev) return -EINVAL; ws = device_wakeup_detach(dev); - if (ws) - wakeup_source_unregister(ws); - + wakeup_source_unregister(ws); return 0; } EXPORT_SYMBOL_GPL(device_wakeup_disable); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3022dad24071..3439b28cce8b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -300,20 +300,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, int rw, + unsigned int len, unsigned int off, int op, sector_t sector) { void *mem; int err = 0; - if (rw != READ) { + if (op_is_write(op)) { err = copy_to_brd_setup(brd, sector, len); if (err) goto out; } mem = kmap_atomic(page); - if (rw == READ) { + if (!op_is_write(op)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -330,7 +330,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct brd_device *brd = bdev->bd_disk->private_data; - int rw; struct bio_vec bvec; sector_t sector; struct bvec_iter iter; @@ -347,14 +346,12 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) goto out; } - rw = bio_data_dir(bio); - bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; err = brd_do_bvec(brd, bvec.bv_page, len, - bvec.bv_offset, rw, sector); + bvec.bv_offset, bio_op(bio), sector); if (err) goto io_error; sector += len >> SECTOR_SHIFT; @@ -369,11 +366,11 @@ io_error: } static int brd_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { struct brd_device *brd = bdev->bd_disk->private_data; - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, err); + int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector); + page_endio(page, op, err); return err; } diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 0a1aaf8c24c4..2d3d50ab74bf 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -27,7 +27,6 @@ #include <linux/crc32c.h> #include <linux/drbd.h> #include <linux/drbd_limits.h> -#include <linux/dynamic_debug.h> #include "drbd_int.h" diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7b54354976a5..4cb8f21ff4ef 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -41,6 +41,7 @@ #include <linux/backing-dev.h> #include <linux/genhd.h> #include <linux/idr.h> +#include <linux/dynamic_debug.h> #include <net/tcp.h> #include <linux/lru_cache.h> #include <linux/prefetch.h> diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c557057fe8ae..b71a9c767009 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; - if (!(mode & (FMODE_READ|FMODE_WRITE))) { - res = -EINVAL; - goto out; - } - res = -ENXIO; if (!floppy_track_buffer) { @@ -3711,13 +3706,15 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } res = -EROFS; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 075377eee0c0..c9f2107f7095 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -510,14 +510,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, return 0; } - -static inline int lo_rw_simple(struct loop_device *lo, - struct request *rq, loff_t pos, bool rw) +static int do_req_filebacked(struct loop_device *lo, struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, rw); + loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; /* * lo_write_simple and lo_read_simple should have been covered @@ -528,37 +524,30 @@ static inline int lo_rw_simple(struct loop_device *lo, * of the req at one time. And direct read IO doesn't need to * run flush_dcache_page(). */ - if (rw == WRITE) - return lo_write_simple(lo, rq, pos); - else - return lo_read_simple(lo, rq, pos); -} - -static int do_req_filebacked(struct loop_device *lo, struct request *rq) -{ - loff_t pos; - int ret; - - pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; - - if (op_is_write(req_op(rq))) { - if (req_op(rq) == REQ_OP_FLUSH) - ret = lo_req_flush(lo, rq); - else if (req_op(rq) == REQ_OP_DISCARD) - ret = lo_discard(lo, rq, pos); - else if (lo->transfer) - ret = lo_write_transfer(lo, rq, pos); + switch (req_op(rq)) { + case REQ_OP_FLUSH: + return lo_req_flush(lo, rq); + case REQ_OP_DISCARD: + return lo_discard(lo, rq, pos); + case REQ_OP_WRITE: + if (lo->transfer) + return lo_write_transfer(lo, rq, pos); + else if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, WRITE); else - ret = lo_rw_simple(lo, rq, pos, WRITE); - - } else { + return lo_write_simple(lo, rq, pos); + case REQ_OP_READ: if (lo->transfer) - ret = lo_read_transfer(lo, rq, pos); + return lo_read_transfer(lo, rq, pos); + else if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, READ); else - ret = lo_rw_simple(lo, rq, pos, READ); + return lo_read_simple(lo, rq, pos); + default: + WARN_ON_ONCE(1); + return -EIO; + break; } - - return ret; } struct switch_request { @@ -1659,11 +1648,15 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (lo->lo_state != Lo_bound) return -EIO; - if (lo->use_dio && (req_op(cmd->rq) != REQ_OP_FLUSH || - req_op(cmd->rq) == REQ_OP_DISCARD)) - cmd->use_aio = true; - else + switch (req_op(cmd->rq)) { + case REQ_OP_FLUSH: + case REQ_OP_DISCARD: cmd->use_aio = false; + break; + default: + cmd->use_aio = lo->use_dio; + break; + } queue_kthread_work(&lo->worker, &cmd->work); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6f55b262b5ce..a9e398019f38 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -451,14 +451,9 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) sk_set_memalloc(nbd->sock->sk); - nbd->task_recv = current; - ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - - nbd->task_recv = NULL; - return ret; } @@ -477,9 +472,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) nbd_size_clear(nbd, bdev); device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - - nbd->task_recv = NULL; - return ret; } @@ -788,6 +780,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!nbd->sock) return -EINVAL; + /* We have to claim the device under the lock */ + nbd->task_recv = current; mutex_unlock(&nbd->tx_lock); nbd_parse_flags(nbd, bdev); @@ -796,6 +790,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_name(nbd)); if (IS_ERR(thread)) { mutex_lock(&nbd->tx_lock); + nbd->task_recv = NULL; return PTR_ERR(thread); } @@ -805,6 +800,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, kthread_stop(thread); mutex_lock(&nbd->tx_lock); + nbd->task_recv = NULL; sock_shutdown(nbd); nbd_clear_que(nbd); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 450662055d97..1a04af6d2421 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1937,7 +1937,7 @@ static struct ceph_osd_request *rbd_osd_req_create( osd_req->r_callback = rbd_osd_req_callback; osd_req->r_priv = obj_request; - osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); + osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id; if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s", obj_request->object_name)) goto fail; @@ -1991,7 +1991,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) osd_req->r_callback = rbd_osd_req_callback; osd_req->r_priv = obj_request; - osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); + osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id; if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s", obj_request->object_name)) goto fail; @@ -3995,10 +3995,11 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, /* Initialize the layout used for all rbd requests */ - rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); - rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + rbd_dev->layout.stripe_unit = 1 << RBD_MAX_OBJ_ORDER; + rbd_dev->layout.stripe_count = 1; + rbd_dev->layout.object_size = 1 << RBD_MAX_OBJ_ORDER; + rbd_dev->layout.pool_id = spec->pool_id; + RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); /* * If this is a mapping rbd_dev (as opposed to a parent one), @@ -5187,7 +5188,7 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); - rbd_dev->header_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); + rbd_dev->header_oloc.pool = rbd_dev->layout.pool_id; if (rbd_dev->image_format == 1) ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", spec->image_name, RBD_SUFFIX); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7454cf188c8e..ca29649c4b08 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -843,15 +843,15 @@ static void zram_bio_discard(struct zram *zram, u32 index, } static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, int rw) + int offset, int op) { unsigned long start_time = jiffies; int ret; - generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, + generic_start_io_acct(op, bvec->bv_len >> SECTOR_SHIFT, &zram->disk->part0); - if (rw == READ) { + if (!op_is_write(op)) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset); } else { @@ -859,10 +859,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset); } - generic_end_io_acct(rw, &zram->disk->part0, start_time); + generic_end_io_acct(op, &zram->disk->part0, start_time); if (unlikely(ret)) { - if (rw == READ) + if (!op_is_write(op)) atomic64_inc(&zram->stats.failed_reads); else atomic64_inc(&zram->stats.failed_writes); @@ -873,7 +873,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, static void __zram_make_request(struct zram *zram, struct bio *bio) { - int offset, rw; + int offset; u32 index; struct bio_vec bvec; struct bvec_iter iter; @@ -888,7 +888,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) return; } - rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { int max_transfer_size = PAGE_SIZE - offset; @@ -903,15 +902,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = max_transfer_size; bv.bv_offset = bvec.bv_offset; - if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) + if (zram_bvec_rw(zram, &bv, index, offset, + bio_op(bio)) < 0) goto out; bv.bv_len = bvec.bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) + if (zram_bvec_rw(zram, &bv, index + 1, 0, + bio_op(bio)) < 0) goto out; } else - if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) + if (zram_bvec_rw(zram, &bvec, index, offset, + bio_op(bio)) < 0) goto out; update_position(&index, &offset, &bvec); @@ -968,7 +970,7 @@ static void zram_slot_free_notify(struct block_device *bdev, } static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { int offset, err = -EIO; u32 index; @@ -992,7 +994,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - err = zram_bvec_rw(zram, &bv, index, offset, rw); + err = zram_bvec_rw(zram, &bv, index, offset, op); put_zram: zram_meta_put(zram); out: @@ -1005,7 +1007,7 @@ out: * (e.g., SetPageError, set_page_dirty and extra works). */ if (err == 0) - page_endio(page, rw, 0); + page_endio(page, op, 0); return err; } diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index fdb8f3e10b6f..dcc09739a54e 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -293,7 +293,7 @@ if RTC_LIB=n config RTC tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)" - depends on ALPHA || (MIPS && MACH_LOONGSON64) || MN10300 + depends on ALPHA || (MIPS && MACH_LOONGSON64) ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you @@ -339,32 +339,6 @@ config JS_RTC To compile this driver as a module, choose M here: the module will be called js-rtc. -config GEN_RTC - tristate "Generic /dev/rtc emulation" - depends on RTC!=y - depends on ALPHA || M68K || MN10300 || PARISC || PPC || X86 - ---help--- - If you say Y here and create a character special file /dev/rtc with - major number 10 and minor number 135 using mknod ("man mknod"), you - will get access to the real time clock (or hardware clock) built - into your computer. - - It reports status information via the file /proc/driver/rtc and its - behaviour is set by various ioctls on /dev/rtc. If you enable the - "extended RTC operation" below it will also provide an emulation - for RTC_UIE which is required by some programs and may improve - precision in some cases. - - To compile this driver as a module, choose M here: the - module will be called genrtc. - -config GEN_RTC_X - bool "Extended RTC operation" - depends on GEN_RTC - help - Provides an emulation for RTC_UIE which is required by some programs - and may improve precision of the generic RTC support in some cases. - config EFI_RTC bool "EFI Real Time Clock Services" depends on IA64 diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 55d16bf3ccc5..6e6c244a66a0 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -25,7 +25,6 @@ obj-$(CONFIG_APPLICOM) += applicom.o obj-$(CONFIG_SONYPI) += sonypi.o obj-$(CONFIG_RTC) += rtc.o obj-$(CONFIG_HPET) += hpet.o -obj-$(CONFIG_GEN_RTC) += genrtc.o obj-$(CONFIG_EFI_RTC) += efirtc.o obj-$(CONFIG_DS1302) += ds1302.o obj-$(CONFIG_XILINX_HWICAP) += xilinx_hwicap/ diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c deleted file mode 100644 index 4f943759d376..000000000000 --- a/drivers/char/genrtc.c +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Real Time Clock interface for - * - q40 and other m68k machines, - * - HP PARISC machines - * - PowerPC machines - * emulate some RTC irq capabilities in software - * - * Copyright (C) 1999 Richard Zidlicky - * - * based on Paul Gortmaker's rtc.c device and - * Sam Creasey Generic rtc driver - * - * This driver allows use of the real time clock (built into - * nearly all computers) from user space. It exports the /dev/rtc - * interface supporting various ioctl() and also the /proc/driver/rtc - * pseudo-file for status information. - * - * The ioctls can be used to set the interrupt behaviour where - * supported. - * - * The /dev/rtc interface will block on reads until an interrupt - * has been received. If a RTC interrupt has already happened, - * it will output an unsigned long and then block. The output value - * contains the interrupt status in the low byte and the number of - * interrupts since the last read in the remaining high bytes. The - * /dev/rtc interface can also be used with the select(2) call. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - - * 1.01 fix for 2.3.X rz@linux-m68k.org - * 1.02 merged with code from genrtc.c rz@linux-m68k.org - * 1.03 make it more portable zippel@linux-m68k.org - * 1.04 removed useless timer code rz@linux-m68k.org - * 1.05 portable RTC_UIE emulation rz@linux-m68k.org - * 1.06 set_rtc_time can return an error trini@kernel.crashing.org - * 1.07 ported to HP PARISC (hppa) Helge Deller <deller@gmx.de> - */ - -#define RTC_VERSION "1.07" - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/miscdevice.h> -#include <linux/fcntl.h> - -#include <linux/rtc.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/mutex.h> -#include <linux/workqueue.h> - -#include <asm/uaccess.h> -#include <asm/rtc.h> - -/* - * We sponge a minor off of the misc major. No need slurping - * up another valuable major dev number for this. If you add - * an ioctl, make sure you don't conflict with SPARC's RTC - * ioctls. - */ - -static DEFINE_MUTEX(gen_rtc_mutex); -static DECLARE_WAIT_QUEUE_HEAD(gen_rtc_wait); - -/* - * Bits in gen_rtc_status. - */ - -#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */ - -static unsigned char gen_rtc_status; /* bitmapped status byte. */ -static unsigned long gen_rtc_irq_data; /* our output to the world */ - -/* months start at 0 now */ -static unsigned char days_in_mo[] = -{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -static int irq_active; - -#ifdef CONFIG_GEN_RTC_X -static struct work_struct genrtc_task; -static struct timer_list timer_task; - -static unsigned int oldsecs; -static int lostint; -static unsigned long tt_exp; - -static void gen_rtc_timer(unsigned long data); - -static volatile int stask_active; /* schedule_work */ -static volatile int ttask_active; /* timer_task */ -static int stop_rtc_timers; /* don't requeue tasks */ -static DEFINE_SPINLOCK(gen_rtc_lock); - -static void gen_rtc_interrupt(unsigned long arg); - -/* - * Routine to poll RTC seconds field for change as often as possible, - * after first RTC_UIE use timer to reduce polling - */ -static void genrtc_troutine(struct work_struct *work) -{ - unsigned int tmp = get_rtc_ss(); - - if (stop_rtc_timers) { - stask_active = 0; - return; - } - - if (oldsecs != tmp){ - oldsecs = tmp; - - timer_task.function = gen_rtc_timer; - timer_task.expires = jiffies + HZ - (HZ/10); - tt_exp=timer_task.expires; - ttask_active=1; - stask_active=0; - add_timer(&timer_task); - - gen_rtc_interrupt(0); - } else if (schedule_work(&genrtc_task) == 0) - stask_active = 0; -} - -static void gen_rtc_timer(unsigned long data) -{ - lostint = get_rtc_ss() - oldsecs ; - if (lostint<0) - lostint = 60 - lostint; - if (time_after(jiffies, tt_exp)) - printk(KERN_INFO "genrtc: timer task delayed by %ld jiffies\n", - jiffies-tt_exp); - ttask_active=0; - stask_active=1; - if ((schedule_work(&genrtc_task) == 0)) - stask_active = 0; -} - -/* - * call gen_rtc_interrupt function to signal an RTC_UIE, - * arg is unused. - * Could be invoked either from a real interrupt handler or - * from some routine that periodically (eg 100HZ) monitors - * whether RTC_SECS changed - */ -static void gen_rtc_interrupt(unsigned long arg) -{ - /* We store the status in the low byte and the number of - * interrupts received since the last read in the remainder - * of rtc_irq_data. */ - - gen_rtc_irq_data += 0x100; - gen_rtc_irq_data &= ~0xff; - gen_rtc_irq_data |= RTC_UIE; - - if (lostint){ - printk("genrtc: system delaying clock ticks?\n"); - /* increment count so that userspace knows something is wrong */ - gen_rtc_irq_data += ((lostint-1)<<8); - lostint = 0; - } - - wake_up_interruptible(&gen_rtc_wait); -} - -/* - * Now all the various file operations that we export. - */ -static ssize_t gen_rtc_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long data; - ssize_t retval; - - if (count != sizeof (unsigned int) && count != sizeof (unsigned long)) - return -EINVAL; - - if (file->f_flags & O_NONBLOCK && !gen_rtc_irq_data) - return -EAGAIN; - - retval = wait_event_interruptible(gen_rtc_wait, - (data = xchg(&gen_rtc_irq_data, 0))); - if (retval) - goto out; - - /* first test allows optimizer to nuke this case for 32-bit machines */ - if (sizeof (int) != sizeof (long) && count == sizeof (unsigned int)) { - unsigned int uidata = data; - retval = put_user(uidata, (unsigned int __user *)buf) ?: - sizeof(unsigned int); - } - else { - retval = put_user(data, (unsigned long __user *)buf) ?: - sizeof(unsigned long); - } -out: - return retval; -} - -static unsigned int gen_rtc_poll(struct file *file, - struct poll_table_struct *wait) -{ - poll_wait(file, &gen_rtc_wait, wait); - if (gen_rtc_irq_data != 0) - return POLLIN | POLLRDNORM; - return 0; -} - -#endif - -/* - * Used to disable/enable interrupts, only RTC_UIE supported - * We also clear out any old irq data after an ioctl() that - * meddles with the interrupt enable/disable bits. - */ - -static inline void gen_clear_rtc_irq_bit(unsigned char bit) -{ -#ifdef CONFIG_GEN_RTC_X - stop_rtc_timers = 1; - if (ttask_active){ - del_timer_sync(&timer_task); - ttask_active = 0; - } - while (stask_active) - schedule(); - - spin_lock(&gen_rtc_lock); - irq_active = 0; - spin_unlock(&gen_rtc_lock); -#endif -} - -static inline int gen_set_rtc_irq_bit(unsigned char bit) -{ -#ifdef CONFIG_GEN_RTC_X - spin_lock(&gen_rtc_lock); - if ( !irq_active ) { - irq_active = 1; - stop_rtc_timers = 0; - lostint = 0; - INIT_WORK(&genrtc_task, genrtc_troutine); - oldsecs = get_rtc_ss(); - init_timer(&timer_task); - - stask_active = 1; - if (schedule_work(&genrtc_task) == 0){ - stask_active = 0; - } - } - spin_unlock(&gen_rtc_lock); - gen_rtc_irq_data = 0; - return 0; -#else - return -EINVAL; -#endif -} - -static int gen_rtc_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct rtc_time wtime; - struct rtc_pll_info pll; - void __user *argp = (void __user *)arg; - - switch (cmd) { - - case RTC_PLL_GET: - if (get_rtc_pll(&pll)) - return -EINVAL; - else - return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0; - - case RTC_PLL_SET: - if (!capable(CAP_SYS_TIME)) - return -EACCES; - if (copy_from_user(&pll, argp, sizeof(pll))) - return -EFAULT; - return set_rtc_pll(&pll); - - case RTC_UIE_OFF: /* disable ints from RTC updates. */ - gen_clear_rtc_irq_bit(RTC_UIE); - return 0; - - case RTC_UIE_ON: /* enable ints for RTC updates. */ - return gen_set_rtc_irq_bit(RTC_UIE); - - case RTC_RD_TIME: /* Read the time/date from RTC */ - /* this doesn't get week-day, who cares */ - memset(&wtime, 0, sizeof(wtime)); - get_rtc_time(&wtime); - - return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0; - - case RTC_SET_TIME: /* Set the RTC */ - { - int year; - unsigned char leap_yr; - - if (!capable(CAP_SYS_TIME)) - return -EACCES; - - if (copy_from_user(&wtime, argp, sizeof(wtime))) - return -EFAULT; - - year = wtime.tm_year + 1900; - leap_yr = ((!(year % 4) && (year % 100)) || - !(year % 400)); - - if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1)) - return -EINVAL; - - if (wtime.tm_mday < 0 || wtime.tm_mday > - (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr))) - return -EINVAL; - - if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 || - wtime.tm_min < 0 || wtime.tm_min >= 60 || - wtime.tm_sec < 0 || wtime.tm_sec >= 60) - return -EINVAL; - - return set_rtc_time(&wtime); - } - } - - return -EINVAL; -} - -static long gen_rtc_unlocked_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int ret; - - mutex_lock(&gen_rtc_mutex); - ret = gen_rtc_ioctl(file, cmd, arg); - mutex_unlock(&gen_rtc_mutex); - - return ret; -} - -/* - * We enforce only one user at a time here with the open/close. - * Also clear the previous interrupt data on an open, and clean - * up things on a close. - */ - -static int gen_rtc_open(struct inode *inode, struct file *file) -{ - mutex_lock(&gen_rtc_mutex); - if (gen_rtc_status & RTC_IS_OPEN) { - mutex_unlock(&gen_rtc_mutex); - return -EBUSY; - } - - gen_rtc_status |= RTC_IS_OPEN; - gen_rtc_irq_data = 0; - irq_active = 0; - mutex_unlock(&gen_rtc_mutex); - - return 0; -} - -static int gen_rtc_release(struct inode *inode, struct file *file) -{ - /* - * Turn off all interrupts once the device is no longer - * in use and clear the data. - */ - - gen_clear_rtc_irq_bit(RTC_PIE|RTC_AIE|RTC_UIE); - - gen_rtc_status &= ~RTC_IS_OPEN; - return 0; -} - - -#ifdef CONFIG_PROC_FS - -/* - * Info exported via "/proc/driver/rtc". - */ - -static int gen_rtc_proc_show(struct seq_file *m, void *v) -{ - struct rtc_time tm; - unsigned int flags; - struct rtc_pll_info pll; - - flags = get_rtc_time(&tm); - - seq_printf(m, - "rtc_time\t: %02d:%02d:%02d\n" - "rtc_date\t: %04d-%02d-%02d\n" - "rtc_epoch\t: %04u\n", - tm.tm_hour, tm.tm_min, tm.tm_sec, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 1900); - - tm.tm_hour = tm.tm_min = tm.tm_sec = 0; - - seq_puts(m, "alarm\t\t: "); - if (tm.tm_hour <= 24) - seq_printf(m, "%02d:", tm.tm_hour); - else - seq_puts(m, "**:"); - - if (tm.tm_min <= 59) - seq_printf(m, "%02d:", tm.tm_min); - else - seq_puts(m, "**:"); - - if (tm.tm_sec <= 59) - seq_printf(m, "%02d\n", tm.tm_sec); - else - seq_puts(m, "**\n"); - - seq_printf(m, - "DST_enable\t: %s\n" - "BCD\t\t: %s\n" - "24hr\t\t: %s\n" - "square_wave\t: %s\n" - "alarm_IRQ\t: %s\n" - "update_IRQ\t: %s\n" - "periodic_IRQ\t: %s\n" - "periodic_freq\t: %ld\n" - "batt_status\t: %s\n", - (flags & RTC_DST_EN) ? "yes" : "no", - (flags & RTC_DM_BINARY) ? "no" : "yes", - (flags & RTC_24H) ? "yes" : "no", - (flags & RTC_SQWE) ? "yes" : "no", - (flags & RTC_AIE) ? "yes" : "no", - irq_active ? "yes" : "no", - (flags & RTC_PIE) ? "yes" : "no", - 0L /* freq */, - (flags & RTC_BATT_BAD) ? "bad" : "okay"); - if (!get_rtc_pll(&pll)) - seq_printf(m, - "PLL adjustment\t: %d\n" - "PLL max +ve adjustment\t: %d\n" - "PLL max -ve adjustment\t: %d\n" - "PLL +ve adjustment factor\t: %d\n" - "PLL -ve adjustment factor\t: %d\n" - "PLL frequency\t: %ld\n", - pll.pll_value, - pll.pll_max, - pll.pll_min, - pll.pll_posmult, - pll.pll_negmult, - pll.pll_clock); - return 0; -} - -static int gen_rtc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, gen_rtc_proc_show, NULL); -} - -static const struct file_operations gen_rtc_proc_fops = { - .open = gen_rtc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init gen_rtc_proc_init(void) -{ - struct proc_dir_entry *r; - - r = proc_create("driver/rtc", 0, NULL, &gen_rtc_proc_fops); - if (!r) - return -ENOMEM; - return 0; -} -#else -static inline int gen_rtc_proc_init(void) { return 0; } -#endif /* CONFIG_PROC_FS */ - - -/* - * The various file operations we support. - */ - -static const struct file_operations gen_rtc_fops = { - .owner = THIS_MODULE, -#ifdef CONFIG_GEN_RTC_X - .read = gen_rtc_read, - .poll = gen_rtc_poll, -#endif - .unlocked_ioctl = gen_rtc_unlocked_ioctl, - .open = gen_rtc_open, - .release = gen_rtc_release, - .llseek = noop_llseek, -}; - -static struct miscdevice rtc_gen_dev = -{ - .minor = RTC_MINOR, - .name = "rtc", - .fops = &gen_rtc_fops, -}; - -static int __init rtc_generic_init(void) -{ - int retval; - - printk(KERN_INFO "Generic RTC Driver v%s\n", RTC_VERSION); - - retval = misc_register(&rtc_gen_dev); - if (retval < 0) - return retval; - - retval = gen_rtc_proc_init(); - if (retval) { - misc_deregister(&rtc_gen_dev); - return retval; - } - - return 0; -} - -static void __exit rtc_generic_exit(void) -{ - remove_proc_entry ("driver/rtc", NULL); - misc_deregister(&rtc_gen_dev); -} - - -module_init(rtc_generic_init); -module_exit(rtc_generic_exit); - -MODULE_AUTHOR("Richard Zidlicky"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(RTC_MINOR); diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 89cc700fbc37..97ae60fa1584 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -250,7 +250,7 @@ struct clk_lookup_alloc { char con_id[MAX_CON_ID]; }; -static struct clk_lookup * __init_refok +static struct clk_lookup * __ref vclkdev_alloc(struct clk_hw *hw, const char *con_id, const char *dev_fmt, va_list ap) { @@ -287,7 +287,7 @@ vclkdev_create(struct clk_hw *hw, const char *con_id, const char *dev_fmt, return cl; } -struct clk_lookup * __init_refok +struct clk_lookup * __ref clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) { struct clk_lookup *cl; diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index c822d72629d5..74919aa81dcb 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -32,7 +32,6 @@ config CPU_FREQ_BOOST_SW config CPU_FREQ_STAT bool "CPU frequency transition statistics" - default y help Export CPU frequency statistics information through sysfs. diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9ec033b4f2d9..be9eade147f2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1374,6 +1374,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), + ICPU(INTEL_FAM6_BROADWELL_X, core_params), + ICPU(INTEL_FAM6_SKYLAKE_X, core_params), {} }; diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c index b6eb875d4af3..62aa3cf09b4d 100644 --- a/drivers/firmware/broadcom/bcm47xx_sprom.c +++ b/drivers/firmware/broadcom/bcm47xx_sprom.c @@ -669,7 +669,7 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out) case BCMA_HOSTTYPE_PCI: memset(out, 0, sizeof(struct ssb_sprom)); /* On BCM47XX all PCI buses share the same domain */ - if (config_enabled(CONFIG_BCM47XX)) + if (IS_ENABLED(CONFIG_BCM47XX)) snprintf(buf, sizeof(buf), "pci/%u/%u/", bus->host_pci->bus->number + 1, PCI_SLOT(bus->host_pci->devfn)); diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index c9b9fdf6cfbb..d61410299ec0 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -21,6 +21,7 @@ config FPGA_MGR_SOCFPGA config FPGA_MGR_ZYNQ_FPGA tristate "Xilinx Zynq FPGA" + depends on HAS_DMA help FPGA manager driver support for Xilinx Zynq FPGAs. diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index fb49443bfd32..4cfb39d543b4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -52,7 +52,7 @@ static int exynos_drm_fb_mmap(struct fb_info *info, ret = dma_mmap_attrs(to_dma_dev(helper->dev), vma, exynos_gem->cookie, exynos_gem->dma_addr, exynos_gem->size, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (ret < 0) { DRM_ERROR("failed to mmap.\n"); return ret; diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 8564c3da0d22..4bf00f57ffe8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/dma-mapping.h> -#include <linux/dma-attrs.h> #include <linux/of.h> #include <drm/drmP.h> @@ -235,7 +234,7 @@ struct g2d_data { struct mutex cmdlist_mutex; dma_addr_t cmdlist_pool; void *cmdlist_pool_virt; - struct dma_attrs cmdlist_dma_attrs; + unsigned long cmdlist_dma_attrs; /* runqueue*/ struct g2d_runqueue_node *runqueue_node; @@ -256,13 +255,12 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) int ret; struct g2d_buf_info *buf_info; - init_dma_attrs(&g2d->cmdlist_dma_attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &g2d->cmdlist_dma_attrs); + g2d->cmdlist_dma_attrs = DMA_ATTR_WRITE_COMBINE; g2d->cmdlist_pool_virt = dma_alloc_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE, &g2d->cmdlist_pool, GFP_KERNEL, - &g2d->cmdlist_dma_attrs); + g2d->cmdlist_dma_attrs); if (!g2d->cmdlist_pool_virt) { dev_err(dev, "failed to allocate dma memory\n"); return -ENOMEM; @@ -295,7 +293,7 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) err: dma_free_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE, g2d->cmdlist_pool_virt, - g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs); + g2d->cmdlist_pool, g2d->cmdlist_dma_attrs); return ret; } @@ -309,7 +307,7 @@ static void g2d_fini_cmdlist(struct g2d_data *g2d) dma_free_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE, g2d->cmdlist_pool_virt, - g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs); + g2d->cmdlist_pool, g2d->cmdlist_dma_attrs); } } diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index cdf9f1af4347..f2ae72ba7d5a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -24,7 +24,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) { struct drm_device *dev = exynos_gem->base.dev; - enum dma_attr attr; + unsigned long attr; unsigned int nr_pages; struct sg_table sgt; int ret = -ENOMEM; @@ -34,7 +34,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) return 0; } - init_dma_attrs(&exynos_gem->dma_attrs); + exynos_gem->dma_attrs = 0; /* * if EXYNOS_BO_CONTIG, fully physically contiguous memory @@ -42,7 +42,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) * as possible. */ if (!(exynos_gem->flags & EXYNOS_BO_NONCONTIG)) - dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, &exynos_gem->dma_attrs); + exynos_gem->dma_attrs |= DMA_ATTR_FORCE_CONTIGUOUS; /* * if EXYNOS_BO_WC or EXYNOS_BO_NONCACHABLE, writecombine mapping @@ -54,8 +54,8 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) else attr = DMA_ATTR_NON_CONSISTENT; - dma_set_attr(attr, &exynos_gem->dma_attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &exynos_gem->dma_attrs); + exynos_gem->dma_attrs |= attr; + exynos_gem->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; nr_pages = exynos_gem->size >> PAGE_SHIFT; @@ -67,7 +67,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) exynos_gem->cookie = dma_alloc_attrs(to_dma_dev(dev), exynos_gem->size, &exynos_gem->dma_addr, GFP_KERNEL, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (!exynos_gem->cookie) { DRM_ERROR("failed to allocate buffer.\n"); goto err_free; @@ -75,7 +75,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) ret = dma_get_sgtable_attrs(to_dma_dev(dev), &sgt, exynos_gem->cookie, exynos_gem->dma_addr, exynos_gem->size, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (ret < 0) { DRM_ERROR("failed to get sgtable.\n"); goto err_dma_free; @@ -99,7 +99,7 @@ err_sgt_free: sg_free_table(&sgt); err_dma_free: dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie, - exynos_gem->dma_addr, &exynos_gem->dma_attrs); + exynos_gem->dma_addr, exynos_gem->dma_attrs); err_free: drm_free_large(exynos_gem->pages); @@ -120,7 +120,7 @@ static void exynos_drm_free_buf(struct exynos_drm_gem *exynos_gem) dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie, (dma_addr_t)exynos_gem->dma_addr, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); drm_free_large(exynos_gem->pages); } @@ -346,7 +346,7 @@ static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem *exynos_gem, ret = dma_mmap_attrs(to_dma_dev(drm_dev), vma, exynos_gem->cookie, exynos_gem->dma_addr, exynos_gem->size, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (ret < 0) { DRM_ERROR("failed to mmap.\n"); return ret; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 78100742281d..df7c543d6558 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -50,7 +50,7 @@ struct exynos_drm_gem { void *cookie; void __iomem *kvaddr; dma_addr_t dma_addr; - struct dma_attrs dma_attrs; + unsigned long dma_attrs; struct page **pages; struct sg_table *sgt; }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 915a3d0acff3..21f939074abc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3651,8 +3651,8 @@ void i915_debugfs_unregister(struct drm_i915_private *dev_priv); int i915_debugfs_connector_add(struct drm_connector *connector); void intel_display_crc_init(struct drm_device *dev); #else -static inline int i915_debugfs_register(struct drm_i915_private *) {return 0;} -static inline void i915_debugfs_unregister(struct drm_i915_private *) {} +static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} +static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {} static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; } static inline void intel_display_crc_init(struct drm_device *dev) {} diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index fa2ec0cd00e8..7abc550ebc00 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -54,15 +54,14 @@ struct mtk_drm_gem_obj *mtk_drm_gem_create(struct drm_device *dev, obj = &mtk_gem->base; - init_dma_attrs(&mtk_gem->dma_attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &mtk_gem->dma_attrs); + mtk_gem->dma_attrs = DMA_ATTR_WRITE_COMBINE; if (!alloc_kmap) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &mtk_gem->dma_attrs); + mtk_gem->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; mtk_gem->cookie = dma_alloc_attrs(priv->dma_dev, obj->size, &mtk_gem->dma_addr, GFP_KERNEL, - &mtk_gem->dma_attrs); + mtk_gem->dma_attrs); if (!mtk_gem->cookie) { DRM_ERROR("failed to allocate %zx byte dma buffer", obj->size); ret = -ENOMEM; @@ -93,7 +92,7 @@ void mtk_drm_gem_free_object(struct drm_gem_object *obj) drm_prime_gem_destroy(obj, mtk_gem->sg); else dma_free_attrs(priv->dma_dev, obj->size, mtk_gem->cookie, - mtk_gem->dma_addr, &mtk_gem->dma_attrs); + mtk_gem->dma_addr, mtk_gem->dma_attrs); /* release file pointer to gem object. */ drm_gem_object_release(obj); @@ -173,7 +172,7 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, vma->vm_pgoff = 0; ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie, - mtk_gem->dma_addr, obj->size, &mtk_gem->dma_attrs); + mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs); if (ret) drm_gem_vm_close(vma); @@ -224,7 +223,7 @@ struct sg_table *mtk_gem_prime_get_sg_table(struct drm_gem_object *obj) ret = dma_get_sgtable_attrs(priv->dma_dev, sgt, mtk_gem->cookie, mtk_gem->dma_addr, obj->size, - &mtk_gem->dma_attrs); + mtk_gem->dma_attrs); if (ret) { DRM_ERROR("failed to allocate sgt, %d\n", ret); kfree(sgt); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h b/drivers/gpu/drm/mediatek/mtk_drm_gem.h index 3a2a5624a1cb..2752718fa5b2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h @@ -35,7 +35,7 @@ struct mtk_drm_gem_obj { void *cookie; void *kvaddr; dma_addr_t dma_addr; - struct dma_attrs dma_attrs; + unsigned long dma_attrs; struct sg_table *sg; }; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 26f859ec24b3..8a0237008f74 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -238,11 +238,10 @@ static int msm_drm_uninit(struct device *dev) } if (priv->vram.paddr) { - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + unsigned long attrs = DMA_ATTR_NO_KERNEL_MAPPING; drm_mm_takedown(&priv->vram.mm); dma_free_attrs(dev, priv->vram.size, NULL, - priv->vram.paddr, &attrs); + priv->vram.paddr, attrs); } component_unbind_all(dev, ddev); @@ -310,21 +309,21 @@ static int msm_init_vram(struct drm_device *dev) } if (size) { - DEFINE_DMA_ATTRS(attrs); + unsigned long attrs = 0; void *p; priv->vram.size = size; drm_mm_init(&priv->vram.mm, 0, (size >> PAGE_SHIFT) - 1); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + attrs |= DMA_ATTR_NO_KERNEL_MAPPING; + attrs |= DMA_ATTR_WRITE_COMBINE; /* note that for no-kernel-mapping, the vaddr returned * is bogus, but non-null if allocation succeeded: */ p = dma_alloc_attrs(dev->dev, size, - &priv->vram.paddr, GFP_KERNEL, &attrs); + &priv->vram.paddr, GFP_KERNEL, attrs); if (!p) { dev_err(dev->dev, "failed to allocate VRAM\n"); priv->vram.paddr = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 6b8f2a19b2d9..a6a7fa0d7679 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -109,7 +109,7 @@ struct gk20a_instmem { u16 iommu_bit; /* Only used by DMA API */ - struct dma_attrs attrs; + unsigned long attrs; }; #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) @@ -293,7 +293,7 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) goto out; dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, - node->handle, &imem->attrs); + node->handle, imem->attrs); out: return node; @@ -386,7 +386,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, - &imem->attrs); + imem->attrs); if (!node->base.vaddr) { nvkm_error(subdev, "cannot allocate DMA memory\n"); return -ENOMEM; @@ -597,10 +597,9 @@ gk20a_instmem_new(struct nvkm_device *device, int index, nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { - init_dma_attrs(&imem->attrs); - dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); - dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); + imem->attrs = DMA_ATTR_NON_CONSISTENT | + DMA_ATTR_WEAK_ORDERING | + DMA_ATTR_WRITE_COMBINE; nvkm_info(&imem->base.subdev, "using DMA API\n"); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 059e902f872d..b70f9423379c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -17,8 +17,6 @@ #include <drm/drm_gem.h> #include <drm/drm_vma_manager.h> -#include <linux/dma-attrs.h> - #include "rockchip_drm_drv.h" #include "rockchip_drm_gem.h" @@ -28,15 +26,14 @@ static int rockchip_gem_alloc_buf(struct rockchip_gem_object *rk_obj, struct drm_gem_object *obj = &rk_obj->base; struct drm_device *drm = obj->dev; - init_dma_attrs(&rk_obj->dma_attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &rk_obj->dma_attrs); + rk_obj->dma_attrs = DMA_ATTR_WRITE_COMBINE; if (!alloc_kmap) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &rk_obj->dma_attrs); + rk_obj->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; rk_obj->kvaddr = dma_alloc_attrs(drm->dev, obj->size, &rk_obj->dma_addr, GFP_KERNEL, - &rk_obj->dma_attrs); + rk_obj->dma_attrs); if (!rk_obj->kvaddr) { DRM_ERROR("failed to allocate %zu byte dma buffer", obj->size); return -ENOMEM; @@ -51,7 +48,7 @@ static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj) struct drm_device *drm = obj->dev; dma_free_attrs(drm->dev, obj->size, rk_obj->kvaddr, rk_obj->dma_addr, - &rk_obj->dma_attrs); + rk_obj->dma_attrs); } static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, @@ -70,7 +67,7 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, vma->vm_pgoff = 0; ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, - obj->size, &rk_obj->dma_attrs); + obj->size, rk_obj->dma_attrs); if (ret) drm_gem_vm_close(vma); @@ -262,7 +259,7 @@ struct sg_table *rockchip_gem_prime_get_sg_table(struct drm_gem_object *obj) ret = dma_get_sgtable_attrs(drm->dev, sgt, rk_obj->kvaddr, rk_obj->dma_addr, obj->size, - &rk_obj->dma_attrs); + rk_obj->dma_attrs); if (ret) { DRM_ERROR("failed to allocate sgt, %d\n", ret); kfree(sgt); @@ -276,7 +273,7 @@ void *rockchip_gem_prime_vmap(struct drm_gem_object *obj) { struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); - if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, &rk_obj->dma_attrs)) + if (rk_obj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) return NULL; return rk_obj->kvaddr; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h index ad22618473a4..18b3488db4ec 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h @@ -23,7 +23,7 @@ struct rockchip_gem_object { void *kvaddr; dma_addr_t dma_addr; - struct dma_attrs dma_attrs; + unsigned long dma_attrs; }; struct sg_table *rockchip_gem_prime_get_sg_table(struct drm_gem_object *obj); diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 2137adfbd8c3..e9b7dc037ff8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -84,6 +84,7 @@ source "drivers/infiniband/ulp/iser/Kconfig" source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ad1b1adcf6f0..e6dfa1bd3def 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -68,6 +68,7 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 +#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 @@ -162,6 +163,14 @@ struct rdma_bind_list { unsigned short port; }; +struct class_port_info_context { + struct ib_class_port_info *class_port_info; + struct ib_device *device; + struct completion done; + struct ib_sa_query *sa_query; + u8 port_num; +}; + static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { @@ -306,6 +315,7 @@ struct cma_multicast { struct sockaddr_storage addr; struct kref mcref; bool igmp_joined; + u8 join_state; }; struct cma_work { @@ -3752,10 +3762,63 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, } } +static void cma_query_sa_classport_info_cb(int status, + struct ib_class_port_info *rec, + void *context) +{ + struct class_port_info_context *cb_ctx = context; + + WARN_ON(!context); + + if (status || !rec) { + pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", + cb_ctx->device->name, cb_ctx->port_num, status); + goto out; + } + + memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); + +out: + complete(&cb_ctx->done); +} + +static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, + struct ib_class_port_info *class_port_info) +{ + struct class_port_info_context *cb_ctx; + int ret; + + cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); + if (!cb_ctx) + return -ENOMEM; + + cb_ctx->device = device; + cb_ctx->class_port_info = class_port_info; + cb_ctx->port_num = port_num; + init_completion(&cb_ctx->done); + + ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, + CMA_QUERY_CLASSPORT_INFO_TIMEOUT, + GFP_KERNEL, cma_query_sa_classport_info_cb, + cb_ctx, &cb_ctx->sa_query); + if (ret < 0) { + pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", + device->name, port_num, ret); + goto out; + } + + wait_for_completion(&cb_ctx->done); + +out: + kfree(cb_ctx); + return ret; +} + static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; + struct ib_class_port_info class_port_info; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; @@ -3774,7 +3837,24 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); - rec.join_state = 1; + rec.join_state = mc->join_state; + + if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { + ret = cma_query_sa_classport_info(id_priv->id.device, + id_priv->id.port_num, + &class_port_info); + + if (ret) + return ret; + + if (!(ib_get_cpi_capmask2(&class_port_info) & + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { + pr_warn("RDMA CM: %s port %u Unable to multicast join\n" + "RDMA CM: SM doesn't support Send Only Full Member option\n", + id_priv->id.device->name, id_priv->id.port_num); + return -EOPNOTSUPP; + } + } comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | @@ -3843,6 +3923,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; enum ib_gid_type gid_type; + bool send_only; + + send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); if (cma_zero_addr((struct sockaddr *)&mc->addr)) return -EINVAL; @@ -3878,10 +3961,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; - err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, - true); - if (!err) - mc->igmp_joined = true; + if (!send_only) { + err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, + true); + if (!err) + mc->igmp_joined = true; + } } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -3911,7 +3996,7 @@ out1: } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, - void *context) + u8 join_state, void *context) { struct rdma_id_private *id_priv; struct cma_multicast *mc; @@ -3930,6 +4015,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, mc->context = context; mc->id_priv = id_priv; mc->igmp_joined = false; + mc->join_state = join_state; spin_lock(&id_priv->lock); list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5c155fa91eec..760ef603a468 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -311,6 +311,15 @@ static int read_port_immutable(struct ib_device *device) return 0; } +void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +{ + if (dev->get_dev_fw_str) + dev->get_dev_fw_str(dev, str, str_len); + else + str[0] = '\0'; +} +EXPORT_SYMBOL(ib_get_device_fw_str); + /** * ib_register_device - Register an IB device with IB core * @device:Device to register diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index f0572049d291..357624f8b9d3 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv) /* * Release a reference on cm_id. If the last reference is being - * released, enable the waiting thread (in iw_destroy_cm_id) to - * get woken up, and return 1 if a thread is already waiting. + * released, free the cm_id and return 1. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { BUG_ON(atomic_read(&cm_id_priv->refcount)==0); if (atomic_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); - complete(&cm_id_priv->destroy_comp); + free_cm_id(cm_id_priv); return 1; } @@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id) static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; - int cb_destroy; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - /* - * Test bit before deref in case the cm_id gets freed on another - * thread. - */ - cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - if (iwcm_deref_id(cm_id_priv) && cb_destroy) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - } + (void)iwcm_deref_id(cm_id_priv); } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); @@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + /* + * Since we're deleting the cm_id, drop any events that + * might arrive before the last dereference. + */ + set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: @@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id) struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); - destroy_cm_id(cm_id); - - wait_for_completion(&cm_id_priv->destroy_comp); - - free_cm_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); - set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - destroy_cm_id(cm_id); - if (atomic_read(&cm_id_priv->refcount)==0) - free_cm_id(cm_id_priv); + iw_destroy_cm_id(cm_id); } out: @@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work) unsigned long flags; int empty; int ret = 0; - int destroy_id; spin_lock_irqsave(&cm_id_priv->lock, flags); empty = list_empty(&cm_id_priv->work_list); @@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work) put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = process_event(cm_id_priv, &levent); - if (ret) { - set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - destroy_cm_id(&cm_id_priv->id); - } - BUG_ON(atomic_read(&cm_id_priv->refcount)==0); - destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - if (iwcm_deref_id(cm_id_priv)) { - if (destroy_id) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - } + if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { + ret = process_event(cm_id_priv, &levent); + if (ret) + destroy_cm_id(&cm_id_priv->id); + } else + pr_debug("dropping event %d\n", levent.event); + if (iwcm_deref_id(cm_id_priv)) return; - } if (empty) return; spin_lock_irqsave(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h index 3f6cc82564c8..82c2cd1b0a80 100644 --- a/drivers/infiniband/core/iwcm.h +++ b/drivers/infiniband/core/iwcm.h @@ -56,7 +56,7 @@ struct iwcm_id_private { struct list_head work_free_list; }; -#define IWCM_F_CALLBACK_DESTROY 1 +#define IWCM_F_DROP_EVENTS 1 #define IWCM_F_CONNECT_WAIT 2 #endif /* IWCM_H */ diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index b65e06c560d7..ade71e7f0131 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -37,6 +37,7 @@ #define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) #define IWPM_REMINFO_HASH_SIZE 64 #define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) +#define IWPM_MSG_SIZE 512 static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -452,7 +453,7 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, { struct sk_buff *skb = NULL; - skb = dev_alloc_skb(NLMSG_GOODSIZE); + skb = dev_alloc_skb(IWPM_MSG_SIZE); if (!skb) { pr_err("%s Unable to allocate skb\n", __func__); goto create_nlmsg_exit; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index a83ec28a147b..3a3c5d73bbfc 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -93,18 +93,6 @@ enum { struct mcast_member; -/* -* There are 4 types of join states: -* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. -*/ -enum { - FULLMEMBER_JOIN, - NONMEMBER_JOIN, - SENDONLY_NONMEBER_JOIN, - SENDONLY_FULLMEMBER_JOIN, - NUM_JOIN_MEMBERSHIP_TYPES, -}; - struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 9b8c20c8209b..10469b0088b5 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -229,7 +229,10 @@ static void ibnl_rcv(struct sk_buff *skb) int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, __u32 pid) { - return nlmsg_unicast(nls, skb, pid); + int err; + + err = netlink_unicast(nls, skb, pid, 0); + return (err < 0) ? err : 0; } EXPORT_SYMBOL(ibnl_unicast); @@ -252,6 +255,7 @@ int __init ibnl_init(void) return -ENOMEM; } + nls->sk_sndtimeo = 10 * HZ; return 0; } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 1eb9b1294a63..dbfd854c32c9 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -58,19 +58,13 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, return false; } -static inline u32 rdma_rw_max_sge(struct ib_device *dev, - enum dma_data_direction dir) -{ - return dir == DMA_TO_DEVICE ? - dev->attrs.max_sge : dev->attrs.max_sge_rd; -} - static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) { /* arbitrary limit to avoid allocating gigantic resources */ return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); } +/* Caller must have zero-initialized *reg. */ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, u32 sg_cnt, u32 offset) @@ -114,6 +108,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { + struct rdma_rw_reg_ctx *prev = NULL; u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); int i, j, ret = 0, count = 0; @@ -125,7 +120,6 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, } for (i = 0; i < ctx->nr_ops; i++) { - struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL; struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; u32 nents = min(sg_cnt, pages_per_mr); @@ -162,9 +156,13 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, sg_cnt -= nents; for (j = 0; j < nents; j++) sg = sg_next(sg); + prev = reg; offset = 0; } + if (prev) + prev->wr.wr.next = NULL; + ctx->type = RDMA_RW_MR; return count; @@ -181,7 +179,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; - u32 max_sge = rdma_rw_max_sge(dev, dir); + u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : + qp->max_read_sge; struct ib_sge *sge; u32 total_len = 0, i, j; @@ -205,11 +204,10 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.opcode = IB_WR_RDMA_READ; rdma_wr->remote_addr = remote_addr + total_len; rdma_wr->rkey = rkey; + rdma_wr->wr.num_sge = nr_sge; rdma_wr->wr.sg_list = sge; for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { - rdma_wr->wr.num_sge++; - sge->addr = ib_sg_dma_address(dev, sg) + offset; sge->length = ib_sg_dma_len(dev, sg) - offset; sge->lkey = qp->pd->local_dma_lkey; @@ -220,8 +218,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, offset = 0; } - if (i + 1 < ctx->nr_ops) - rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr; + rdma_wr->wr.next = i + 1 < ctx->nr_ops ? + &ctx->map.wrs[i + 1].wr : NULL; } ctx->type = RDMA_RW_MULTI_WR; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e95538650dc6..b9bf7aa055e7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -65,10 +65,17 @@ struct ib_sa_sm_ah { u8 src_path_mask; }; +struct ib_sa_classport_cache { + bool valid; + struct ib_class_port_info data; +}; + struct ib_sa_port { struct ib_mad_agent *agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; + struct ib_sa_classport_cache classport_info; + spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u8 port_num; }; @@ -998,6 +1005,13 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); + if (event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_LID_CHANGE) { + spin_lock_irqsave(&port->classport_lock, flags); + port->classport_info.valid = false; + spin_unlock_irqrestore(&port->classport_lock, flags); + } queue_work(ib_wq, &sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } @@ -1719,6 +1733,7 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { + unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); @@ -1728,6 +1743,16 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, ib_unpack(classport_info_rec_table, ARRAY_SIZE(classport_info_rec_table), mad->data, &rec); + + spin_lock_irqsave(&sa_query->port->classport_lock, flags); + if (!status && !sa_query->port->classport_info.valid) { + memcpy(&sa_query->port->classport_info.data, &rec, + sizeof(sa_query->port->classport_info.data)); + + sa_query->port->classport_info.valid = true; + } + spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); + query->callback(status, &rec, query->context); } else { query->callback(status, NULL, query->context); @@ -1754,7 +1779,9 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; + struct ib_class_port_info cached_class_port_info; int ret; + unsigned long flags; if (!sa_dev) return -ENODEV; @@ -1762,6 +1789,17 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; + /* Use cached ClassPortInfo attribute if valid instead of sending mad */ + spin_lock_irqsave(&port->classport_lock, flags); + if (port->classport_info.valid && callback) { + memcpy(&cached_class_port_info, &port->classport_info.data, + sizeof(cached_class_port_info)); + spin_unlock_irqrestore(&port->classport_lock, flags); + callback(0, &cached_class_port_info, context); + return 0; + } + spin_unlock_irqrestore(&port->classport_lock, flags); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -1885,6 +1923,9 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; + spin_lock_init(&sa_dev->port[i].classport_lock); + sa_dev->port[i].classport_info.valid = false; + sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 60df4f8e81be..15defefecb4f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -38,6 +38,7 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/netdevice.h> +#include <linux/ethtool.h> #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> @@ -1200,16 +1201,28 @@ static ssize_t set_node_desc(struct device *device, return count; } +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + ib_get_device_fw_str(dev, buf, PAGE_SIZE); + strlcat(buf, "\n", PAGE_SIZE); + return strlen(buf); +} + static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, - &dev_attr_node_desc + &dev_attr_node_desc, + &dev_attr_fw_ver, }; static void free_port_list_attributes(struct ib_device *device) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index c0f3826abb30..2825ece91d3c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -106,6 +106,7 @@ struct ucma_multicast { int events_reported; u64 uid; + u8 join_state; struct list_head list; struct sockaddr_storage addr; }; @@ -1317,12 +1318,20 @@ static ssize_t ucma_process_join(struct ucma_file *file, struct ucma_multicast *mc; struct sockaddr *addr; int ret; + u8 join_state; if (out_len < sizeof(resp)) return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; - if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + return -EINVAL; + + if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) + join_state = BIT(FULLMEMBER_JOIN); + else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) + join_state = BIT(SENDONLY_FULLMEMBER_JOIN); + else return -EINVAL; ctx = ucma_get_ctx(file, cmd->id); @@ -1335,10 +1344,11 @@ static ssize_t ucma_process_join(struct ucma_file *file, ret = -ENOMEM; goto err1; } - + mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); + ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, + join_state, mc); if (ret) goto err2; @@ -1382,7 +1392,7 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file, join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); - join_cmd.reserved = 0; + join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); return ucma_process_join(file, &join_cmd, out_len); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe4d2e1a8b58..c68746ce6624 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -37,7 +37,6 @@ #include <linux/sched.h> #include <linux/export.h> #include <linux/hugetlb.h> -#include <linux/dma-attrs.h> #include <linux/slab.h> #include <rdma/ib_umem_odp.h> @@ -92,12 +91,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, unsigned long npages; int ret; int i; - DEFINE_DMA_ATTRS(attrs); + unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; int need_release = 0; if (dmasync) - dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + dma_attrs |= DMA_ATTR_WRITE_BARRIER; if (!size) return ERR_PTR(-EINVAL); @@ -215,7 +214,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->sg_head.sgl, umem->npages, DMA_BIDIRECTIONAL, - &attrs); + dma_attrs); if (umem->nmap <= 0) { ret = -ENOMEM; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 612ccfd39bf9..df26a741cda6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -116,6 +116,7 @@ struct ib_uverbs_event_file { struct ib_uverbs_file { struct kref ref; struct mutex mutex; + struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; @@ -162,6 +163,10 @@ struct ib_uqp_object { struct ib_uxrcd_object *uxrcd; }; +struct ib_uwq_object { + struct ib_uevent_object uevent; +}; + struct ib_ucq_object { struct ib_uobject uobject; struct ib_uverbs_file *uverbs_file; @@ -181,6 +186,8 @@ extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; +extern struct idr ib_uverbs_wq_idr; +extern struct idr ib_uverbs_rwq_ind_tbl_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -199,6 +206,7 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); @@ -219,6 +227,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + struct ib_uverbs_flow_spec_ipv6 ipv6; }; }; @@ -275,5 +284,10 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); IB_UVERBS_DECLARE_EX_CMD(create_qp); +IB_UVERBS_DECLARE_EX_CMD(create_wq); +IB_UVERBS_DECLARE_EX_CMD(modify_wq); +IB_UVERBS_DECLARE_EX_CMD(destroy_wq); +IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 825021d1008b..f6647318138d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -57,6 +57,8 @@ static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; +static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; +static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -243,6 +245,27 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); +} + +static void put_wq_read(struct ib_wq *wq) +{ + put_uobj_read(wq->uobject); +} + +static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, + struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); +} + +static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) +{ + put_uobj_read(ind_table->uobject); +} + static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; @@ -326,6 +349,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->wq_list); + INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); @@ -1750,6 +1775,8 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; + struct ib_rwq_ind_table *ind_tbl = NULL; + bool has_sq = true; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; @@ -1761,6 +1788,32 @@ static int create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); + if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + + sizeof(cmd->rwq_ind_tbl_handle) && + (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, + file->ucontext); + if (!ind_tbl) { + ret = -EINVAL; + goto err_put; + } + + attr.rwq_ind_tbl = ind_tbl; + } + + if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + + sizeof(cmd->reserved1)) && cmd->reserved1) { + ret = -EOPNOTSUPP; + goto err_put; + } + + if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { + ret = -EINVAL; + goto err_put; + } + + if (ind_tbl && !cmd->max_send_wr) + has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, @@ -1784,20 +1837,24 @@ static int create_qp(struct ib_uverbs_file *file, } } - if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); - if (!rcq) { - ret = -EINVAL; - goto err_put; + if (!ind_tbl) { + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } } - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); - rcq = rcq ?: scq; + if (has_sq) + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + if (!ind_tbl) + rcq = rcq ?: scq; pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd || !scq) { + if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; } @@ -1864,16 +1921,20 @@ static int create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; + qp->rwq_ind_tbl = ind_tbl; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); + if (attr.send_cq) + atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); + if (ind_tbl) + atomic_inc(&ind_tbl->usecnt); } qp->uobject = &obj->uevent.uobject; @@ -1913,6 +1974,8 @@ static int create_qp(struct ib_uverbs_file *file, put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); @@ -1940,6 +2003,8 @@ err_put: put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); put_uobj_write(&obj->uevent.uobject); return ret; @@ -2033,7 +2098,7 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, if (err) return err; - if (cmd.comp_mask) + if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; if (cmd.reserved) @@ -3040,6 +3105,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, sizeof(struct ib_flow_ipv4_filter)); break; + case IB_FLOW_SPEC_IPV6: + ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); + if (ib_spec->ipv6.size != kern_spec->ipv6.size) + return -EINVAL; + memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, + sizeof(struct ib_flow_ipv6_filter)); + memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, + sizeof(struct ib_flow_ipv6_filter)); + break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); @@ -3056,6 +3130,445 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, return 0; } +int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq_resp resp = {}; + struct ib_uwq_object *obj; + int err = 0; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_wq *wq; + struct ib_wq_init_attr wq_init_attr = {}; + size_t required_cmd_sz; + size_t required_resp_len; + + required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); + required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, + &wq_lock_class); + down_write(&obj->uevent.uobject.mutex); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + err = -EINVAL; + goto err_uobj; + } + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { + err = -EINVAL; + goto err_put_pd; + } + + wq_init_attr.cq = cq; + wq_init_attr.max_sge = cmd.max_sge; + wq_init_attr.max_wr = cmd.max_wr; + wq_init_attr.wq_context = file; + wq_init_attr.wq_type = cmd.wq_type; + wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + if (IS_ERR(wq)) { + err = PTR_ERR(wq); + goto err_put_cq; + } + + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + wq->wq_type = wq_init_attr.wq_type; + wq->cq = cq; + wq->pd = pd; + wq->device = pd->device; + wq->wq_context = wq_init_attr.wq_context; + atomic_set(&wq->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&cq->usecnt); + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + if (err) + goto destroy_wq; + + memset(&resp, 0, sizeof(resp)); + resp.wq_handle = obj->uevent.uobject.id; + resp.max_sge = wq_init_attr.max_sge; + resp.max_wr = wq_init_attr.max_wr; + resp.wqn = wq->wq_num; + resp.response_length = required_resp_len; + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + put_pd_read(pd); + put_cq_read(cq); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + up_write(&obj->uevent.uobject.mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); +destroy_wq: + ib_destroy_wq(wq); +err_put_cq: + put_cq_read(cq); +err_put_pd: + put_pd_read(pd); +err_uobj: + put_uobj_write(&obj->uevent.uobject); + + return err; +} + +int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq_resp resp = {}; + struct ib_wq *wq; + struct ib_uobject *uobj; + struct ib_uwq_object *obj; + size_t required_cmd_sz; + size_t required_resp_len; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); + required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + resp.response_length = required_resp_len; + uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + + wq = uobj->object; + obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); + ret = ib_destroy_wq(wq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, &obj->uevent); + resp.events_reported = obj->uevent.events_reported; + put_uobj(uobj); + + ret = ib_copy_to_udata(ucore, &resp, resp.response_length); + if (ret) + return ret; + + return 0; +} + +int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_wq *wq; + struct ib_wq_attr wq_attr = {}; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask) + return -EINVAL; + + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + return -EINVAL; + + wq = idr_read_wq(cmd.wq_handle, file->ucontext); + if (!wq) + return -EINVAL; + + wq_attr.curr_wq_state = cmd.curr_wq_state; + wq_attr.wq_state = cmd.wq_state; + ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); + put_wq_read(wq); + return ret; +} + +int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; + struct ib_uobject *uobj; + int err = 0; + struct ib_rwq_ind_table_init_attr init_attr = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_wq **wqs = NULL; + u32 *wqs_handles = NULL; + struct ib_wq *wq = NULL; + int i, j, num_read_wqs; + u32 num_wq_handles; + u32 expected_in_size; + size_t required_cmd_sz_header; + size_t required_resp_len; + + required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); + required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); + + if (ucore->inlen < required_cmd_sz_header) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + if (err) + return err; + + ucore->inbuf += required_cmd_sz_header; + ucore->inlen -= required_cmd_sz_header; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) + return -EINVAL; + + num_wq_handles = 1 << cmd.log_ind_tbl_size; + expected_in_size = num_wq_handles * sizeof(__u32); + if (num_wq_handles == 1) + /* input size for wq handles is u64 aligned */ + expected_in_size += sizeof(__u32); + + if (ucore->inlen < expected_in_size) + return -EINVAL; + + if (ucore->inlen > expected_in_size && + !ib_is_udata_cleared(ucore, expected_in_size, + ucore->inlen - expected_in_size)) + return -EOPNOTSUPP; + + wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), + GFP_KERNEL); + if (!wqs_handles) + return -ENOMEM; + + err = ib_copy_from_udata(wqs_handles, ucore, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); + if (!wqs) { + err = -ENOMEM; + goto err_free; + } + + for (num_read_wqs = 0; num_read_wqs < num_wq_handles; + num_read_wqs++) { + wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + if (!wq) { + err = -EINVAL; + goto put_wqs; + } + + wqs[num_read_wqs] = wq; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto put_wqs; + } + + init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); + down_write(&uobj->mutex); + init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; + init_attr.ind_tbl = wqs; + rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + + if (IS_ERR(rwq_ind_tbl)) { + err = PTR_ERR(rwq_ind_tbl); + goto err_uobj; + } + + rwq_ind_tbl->ind_tbl = wqs; + rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; + rwq_ind_tbl->uobject = uobj; + uobj->object = rwq_ind_tbl; + rwq_ind_tbl->device = ib_dev; + atomic_set(&rwq_ind_tbl->usecnt, 0); + + for (i = 0; i < num_wq_handles; i++) + atomic_inc(&wqs[i]->usecnt); + + err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + if (err) + goto destroy_ind_tbl; + + resp.ind_tbl_handle = uobj->id; + resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; + resp.response_length = required_resp_len; + + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + kfree(wqs_handles); + + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); +destroy_ind_tbl: + ib_destroy_rwq_ind_table(rwq_ind_tbl); +err_uobj: + put_uobj_write(uobj); +put_wqs: + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); +err_free: + kfree(wqs_handles); + kfree(wqs); + return err; +} + +int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_uobject *uobj; + int ret; + struct ib_wq **ind_tbl; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + rwq_ind_tbl = uobj->object; + ind_tbl = rwq_ind_tbl->ind_tbl; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + kfree(ind_tbl); + return ret; +} + int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 31f422a70623..0012fa58c105 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -76,6 +76,8 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); +DEFINE_IDR(ib_uverbs_wq_idr); +DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -130,6 +132,11 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, + [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, + [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, + [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, + [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -265,6 +272,27 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { + struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + ib_destroy_rwq_ind_table(rwq_ind_tbl); + kfree(ind_tbl); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { + struct ib_wq *wq = uobj->object; + struct ib_uwq_object *uwq = + container_of(uobj, struct ib_uwq_object, uevent.uobject); + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + ib_destroy_wq(wq); + ib_uverbs_release_uevent(file, &uwq->uevent); + kfree(uwq); + } + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = @@ -568,6 +596,16 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; @@ -931,6 +969,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); + mutex_init(&file->cleanup_mutex); filp->private_data = file; kobject_get(&dev->kobj); @@ -956,18 +995,20 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_device *dev = file->device; - struct ib_ucontext *ucontext = NULL; + + mutex_lock(&file->cleanup_mutex); + if (file->ucontext) { + ib_uverbs_cleanup_ucontext(file, file->ucontext); + file->ucontext = NULL; + } + mutex_unlock(&file->cleanup_mutex); mutex_lock(&file->device->lists_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; if (!file->is_closed) { list_del(&file->list); file->is_closed = 1; } mutex_unlock(&file->device->lists_mutex); - if (ucontext) - ib_uverbs_cleanup_ucontext(file, ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); @@ -1181,22 +1222,30 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { struct ib_ucontext *ucontext; - file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; - ucontext = file->ucontext; list_del(&file->list); - file->ucontext = NULL; kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); - /* We must release the mutex before going ahead and calling - * disassociate_ucontext. disassociate_ucontext might end up - * indirectly calling uverbs_close, for example due to freeing - * the resources (e.g mmput). - */ + ib_uverbs_event_handler(&file->event_handler, &event); + + mutex_lock(&file->cleanup_mutex); + ucontext = file->ucontext; + file->ucontext = NULL; + mutex_unlock(&file->cleanup_mutex); + + /* At this point ib_uverbs_close cannot be running + * ib_uverbs_cleanup_ucontext + */ if (ucontext) { + /* We must release the mutex before going ahead and + * calling disassociate_ucontext. disassociate_ucontext + * might end up indirectly calling uverbs_close, + * for example due to freeing the resources + * (e.g mmput). + */ ib_dev->disassociate_ucontext(ucontext); ib_uverbs_cleanup_ucontext(file, ucontext); } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6298f54b4137..f2b776efab3a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -758,6 +758,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp *qp; int ret; + if (qp_init_attr->rwq_ind_tbl && + (qp_init_attr->recv_cq || + qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || + qp_init_attr->cap.max_recv_sge)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -775,6 +781,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; atomic_set(&qp->usecnt, 0); qp->mrs_used = 0; @@ -792,7 +799,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->srq = NULL; } else { qp->recv_cq = qp_init_attr->recv_cq; - atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->recv_cq) + atomic_inc(&qp_init_attr->recv_cq->usecnt); qp->srq = qp_init_attr->srq; if (qp->srq) atomic_inc(&qp_init_attr->srq->usecnt); @@ -803,7 +811,10 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = NULL; atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->send_cq) + atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->rwq_ind_tbl) + atomic_inc(&qp->rwq_ind_tbl->usecnt); if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); @@ -814,6 +825,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, } } + /* + * Note: all hw drivers guarantee that max_send_sge is lower than + * the device RDMA WRITE SGE limit but not all hw drivers ensure that + * max_send_sge <= max_sge_rd. + */ + qp->max_write_sge = qp_init_attr->cap.max_send_sge; + qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, + device->attrs.max_sge_rd); + return qp; } EXPORT_SYMBOL(ib_create_qp); @@ -1283,6 +1303,7 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; + struct ib_rwq_ind_table *ind_tbl; int ret; WARN_ON_ONCE(qp->mrs_used > 0); @@ -1297,6 +1318,7 @@ int ib_destroy_qp(struct ib_qp *qp) scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; + ind_tbl = qp->rwq_ind_tbl; if (!qp->uobject) rdma_rw_cleanup_mrs(qp); @@ -1311,6 +1333,8 @@ int ib_destroy_qp(struct ib_qp *qp) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); + if (ind_tbl) + atomic_dec(&ind_tbl->usecnt); } return ret; @@ -1558,6 +1582,150 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) } EXPORT_SYMBOL(ib_dealloc_xrcd); +/** + * ib_create_wq - Creates a WQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the WQ. + * @wq_init_attr: A list of initial attributes required to create the + * WQ. If WQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created WQ. + * + * wq_init_attr->max_wr and wq_init_attr->max_sge determine + * the requested size of the WQ, and set to the actual values allocated + * on return. + * If ib_create_wq() succeeds, then max_wr and max_sge will always be + * at least as large as the requested values. + */ +struct ib_wq *ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *wq_attr) +{ + struct ib_wq *wq; + + if (!pd->device->create_wq) + return ERR_PTR(-ENOSYS); + + wq = pd->device->create_wq(pd, wq_attr, NULL); + if (!IS_ERR(wq)) { + wq->event_handler = wq_attr->event_handler; + wq->wq_context = wq_attr->wq_context; + wq->wq_type = wq_attr->wq_type; + wq->cq = wq_attr->cq; + wq->device = pd->device; + wq->pd = pd; + wq->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_inc(&wq_attr->cq->usecnt); + atomic_set(&wq->usecnt, 0); + } + return wq; +} +EXPORT_SYMBOL(ib_create_wq); + +/** + * ib_destroy_wq - Destroys the specified WQ. + * @wq: The WQ to destroy. + */ +int ib_destroy_wq(struct ib_wq *wq) +{ + int err; + struct ib_cq *cq = wq->cq; + struct ib_pd *pd = wq->pd; + + if (atomic_read(&wq->usecnt)) + return -EBUSY; + + err = wq->device->destroy_wq(wq); + if (!err) { + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + } + return err; +} +EXPORT_SYMBOL(ib_destroy_wq); + +/** + * ib_modify_wq - Modifies the specified WQ. + * @wq: The WQ to modify. + * @wq_attr: On input, specifies the WQ attributes to modify. + * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ + * are being modified. + * On output, the current values of selected WQ attributes are returned. + */ +int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask) +{ + int err; + + if (!wq->device->modify_wq) + return -ENOSYS; + + err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); + return err; +} +EXPORT_SYMBOL(ib_modify_wq); + +/* + * ib_create_rwq_ind_table - Creates a RQ Indirection Table. + * @device: The device on which to create the rwq indirection table. + * @ib_rwq_ind_table_init_attr: A list of initial attributes required to + * create the Indirection Table. + * + * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less + * than the created ib_rwq_ind_table object and the caller is responsible + * for its memory allocation/free. + */ +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr) +{ + struct ib_rwq_ind_table *rwq_ind_table; + int i; + u32 table_size; + + if (!device->create_rwq_ind_table) + return ERR_PTR(-ENOSYS); + + table_size = (1 << init_attr->log_ind_tbl_size); + rwq_ind_table = device->create_rwq_ind_table(device, + init_attr, NULL); + if (IS_ERR(rwq_ind_table)) + return rwq_ind_table; + + rwq_ind_table->ind_tbl = init_attr->ind_tbl; + rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; + rwq_ind_table->device = device; + rwq_ind_table->uobject = NULL; + atomic_set(&rwq_ind_table->usecnt, 0); + + for (i = 0; i < table_size; i++) + atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); + + return rwq_ind_table; +} +EXPORT_SYMBOL(ib_create_rwq_ind_table); + +/* + * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. + * @wq_ind_table: The Indirection Table to destroy. +*/ +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) +{ + int err, i; + u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); + struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; + + if (atomic_read(&rwq_ind_table->usecnt)) + return -EBUSY; + + err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); + if (!err) { + for (i = 0; i < table_size; i++) + atomic_dec(&ind_tbl[i]->usecnt); + } + + return err; +} +EXPORT_SYMBOL(ib_destroy_rwq_ind_table); + struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3e8431b5cad7..04bbf172abde 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1396,10 +1396,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) state_set(&child_ep->com, CONNECTING); child_ep->com.tdev = tdev; child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = PF_INET; + child_ep->com.local_addr.sin_family = AF_INET; child_ep->com.local_addr.sin_port = req->local_port; child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = PF_INET; + child_ep->com.remote_addr.sin_family = AF_INET; child_ep->com.remote_addr.sin_port = req->peer_port; child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; get_ep(&parent_ep->com); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index bb1a839d4d6d..3edb80644b53 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1183,18 +1183,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - PDBG("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.fw_version); -} - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1334,13 +1322,11 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *iwch_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, }; @@ -1362,6 +1348,18 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + + PDBG("%s dev 0x%p\n", __func__, iwch_dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + snprintf(str, str_len, "%s", info.fw_version); +} + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1437,6 +1435,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = iwch_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3a67216bce6..3aca7f6171b4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -294,6 +294,25 @@ static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) return; } +static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size) +{ + struct sk_buff *skb; + unsigned int i; + size_t len; + + len = roundup(sizeof(union cpl_wr_size), 16); + for (i = 0; i < size; i++) { + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + goto fail; + skb_queue_tail(ep_skb_list, skb); + } + return 0; +fail: + skb_queue_purge(ep_skb_list); + return -ENOMEM; +} + static void *alloc_ep(int size, gfp_t gfp) { struct c4iw_ep_common *epc; @@ -384,6 +403,8 @@ void _c4iw_free_ep(struct kref *kref) if (ep->mpa_skb) kfree_skb(ep->mpa_skb); } + if (!skb_queue_empty(&ep->com.ep_skb_list)) + skb_queue_purge(&ep->com.ep_skb_list); kfree(ep); } @@ -620,25 +641,27 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) } } -static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) +static int send_flowc(struct c4iw_ep *ep) { - unsigned int flowclen = 80; struct fw_flowc_wr *flowc; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); int i; u16 vlan = ep->l2t->vlan; int nparams; + if (WARN_ON(!skb)) + return -ENOMEM; + if (vlan == CPL_L2T_VLAN_NONE) nparams = 8; else nparams = 9; - skb = get_skb(skb, flowclen, GFP_KERNEL); - flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); + flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, 16)) | FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; @@ -679,18 +702,16 @@ static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) return c4iw_ofld_send(&ep->com.dev->rdev, skb); } -static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) +static int send_halfclose(struct c4iw_ep *ep) { struct cpl_close_con_req *req; - struct sk_buff *skb; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); int wrlen = roundup(sizeof *req, 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb = get_skb(NULL, wrlen, gfp); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + if (WARN_ON(!skb)) return -ENOMEM; - } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); req = (struct cpl_close_con_req *) skb_put(skb, wrlen); @@ -701,26 +722,24 @@ static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +static int send_abort(struct c4iw_ep *ep) { struct cpl_abort_req *req; int wrlen = roundup(sizeof *req, 16); + struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb = get_skb(skb, wrlen, gfp); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb.\n", - __func__); + if (WARN_ON(!req_skb)) return -ENOMEM; - } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *) skb_put(skb, wrlen); + + set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); + req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); req->cmd = CPL_ABORT_SEND_RST; - return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } static void best_mtu(const unsigned short *mtus, unsigned short mtu, @@ -992,9 +1011,19 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, mpa = (struct mpa_message *)(req + 1); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0) | - (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); + + mpa->flags = 0; + if (crc_enabled) + mpa->flags |= MPA_CRC; + if (markers_enabled) { + mpa->flags |= MPA_MARKERS; + ep->mpa_attr.recv_marker_enabled = 1; + } else { + ep->mpa_attr.recv_marker_enabled = 0; + } + if (mpa_rev_to_use == 2) + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev_to_use; if (mpa_rev_to_use == 1) { @@ -1169,8 +1198,11 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) mpa = (struct mpa_message *)(req + 1); memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); + mpa->flags = 0; + if (ep->mpa_attr.crc_enabled) + mpa->flags |= MPA_CRC; + if (ep->mpa_attr.recv_marker_enabled) + mpa->flags |= MPA_MARKERS; mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); @@ -1248,7 +1280,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ - ret = send_flowc(ep, NULL); + ret = send_flowc(ep); if (ret) goto err; if (ep->retry_with_mpa_v1) @@ -1555,7 +1587,6 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) */ __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; @@ -2004,12 +2035,17 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) } /* - * Return whether a failed active open has allocated a TID + * Some of the error codes above implicitly indicate that there is no TID + * allocated with the result of an ACT_OPEN. We use this predicate to make + * that explicit. */ static inline int act_open_has_tid(int status) { - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; + return (status != CPL_ERR_TCAM_PARITY && + status != CPL_ERR_TCAM_MISS && + status != CPL_ERR_TCAM_FULL && + status != CPL_ERR_CONN_EXIST_SYNRECV && + status != CPL_ERR_CONN_EXIST); } /* Returns whether a CPL status conveys negative advice. @@ -2130,6 +2166,7 @@ out: static int c4iw_reconnect(struct c4iw_ep *ep) { int err = 0; + int size = 0; struct sockaddr_in *laddr = (struct sockaddr_in *) &ep->com.cm_id->m_local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *) @@ -2145,6 +2182,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep) init_timer(&ep->timer); c4iw_init_wr_wait(&ep->com.wr_wait); + /* When MPA revision is different on nodes, the node with MPA_rev=2 + * tries to reconnect with MPA_rev 1 for the same EP through + * c4iw_reconnect(), where the same EP is assigned with new tid for + * further connection establishment. As we are using the same EP pointer + * for reconnect, few skbs are used during the previous c4iw_connect(), + * which leaves the EP with inadequate skbs for further + * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty + * skb_list() during peer_abort(). Allocate skbs which is already used. + */ + size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) { + err = -ENOMEM; + goto fail1; + } + /* * Allocate an active TID to initiate a TCP connection. */ @@ -2210,6 +2262,7 @@ fail2: * response of 1st connect request. */ connect_reply_upcall(ep, -ECONNRESET); +fail1: c4iw_put_ep(&ep->com); out: return err; @@ -2576,6 +2629,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) child_ep->mtu = peer_mss + hdrs; + skb_queue_head_init(&child_ep->com.ep_skb_list); + if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF)) + goto fail; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -2640,6 +2697,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) (const u32 *)&sin6->sin6_addr.s6_addr, 1); } goto out; +fail: + c4iw_put_ep(&child_ep->com); reject: reject_cr(dev, hwtid, skb); if (parent_ep) @@ -2670,7 +2729,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->com.state = MPA_REQ_WAIT; start_ep_timer(ep); set_bit(PASS_ESTAB, &ep->com.history); - ret = send_flowc(ep, skb); + ret = send_flowc(ep); mutex_unlock(&ep->com.mutex); if (ret) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); @@ -2871,10 +2930,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } mutex_unlock(&ep->com.mutex); - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - printk(KERN_ERR MOD "%s - cannot allocate skb!\n", - __func__); + rpl_skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!rpl_skb)) { release = 1; goto out; } @@ -3011,9 +3068,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s last streaming msg ack ep %p tid %u state %u " "initiator %u freeing skb\n", __func__, ep, ep->hwtid, state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); + mutex_lock(&ep->com.mutex); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; - mutex_lock(&ep->com.mutex); if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) stop_ep_timer(ep); mutex_unlock(&ep->com.mutex); @@ -3025,9 +3082,9 @@ out: int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err = 0; - int disconnect = 0; + int abort; struct c4iw_ep *ep = to_ep(cm_id); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); @@ -3038,16 +3095,13 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) } set_bit(ULP_REJECT, &ep->com.history); if (mpa_rev == 0) - disconnect = 2; - else { - err = send_mpa_reject(ep, pdata, pdata_len); - disconnect = 1; - } + abort = 1; + else + abort = send_mpa_reject(ep, pdata, pdata_len); mutex_unlock(&ep->com.mutex); - if (disconnect) { - stop_ep_timer(ep); - err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); - } + + stop_ep_timer(ep); + c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -3248,6 +3302,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto out; } + + skb_queue_head_init(&ep->com.ep_skb_list); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) { + err = -ENOMEM; + goto fail1; + } + init_timer(&ep->timer); ep->plen = conn_param->private_data_len; if (ep->plen) @@ -3266,7 +3327,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->com.qp) { PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); err = -EINVAL; - goto fail1; + goto fail2; } ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, @@ -3279,7 +3340,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->atid == -1) { printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; - goto fail1; + goto fail2; } insert_handle(dev, &dev->atid_idr, ep, ep->atid); @@ -3303,7 +3364,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail1; + goto fail2; } /* find a route */ @@ -3323,7 +3384,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail1; + goto fail2; } /* find a route */ @@ -3339,14 +3400,14 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; - goto fail2; + goto fail3; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail3; + goto fail4; } PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", @@ -3362,13 +3423,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto out; cxgb4_l2t_release(ep->l2t); -fail3: +fail4: dst_release(ep->dst); -fail2: +fail3: remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail1: +fail2: + skb_queue_purge(&ep->com.ep_skb_list); deref_cm_id(&ep->com); +fail1: c4iw_put_ep(&ep->com); out: return err; @@ -3461,6 +3524,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) err = -ENOMEM; goto fail1; } + skb_queue_head_init(&ep->com.ep_skb_list); PDBG("%s ep %p\n", __func__, ep); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); @@ -3577,11 +3641,22 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: + case CONNECTING: close = 1; if (abrupt) ep->com.state = ABORTING; else { ep->com.state = CLOSING; + + /* + * if we close before we see the fw4_ack() then we fix + * up the timer state since we're reusing it. + */ + if (ep->mpa_skb && + test_bit(STOP_MPA_TIMER, &ep->com.flags)) { + clear_bit(STOP_MPA_TIMER, &ep->com.flags); + stop_ep_timer(ep); + } start_ep_timer(ep); } set_bit(CLOSE_SENT, &ep->com.flags); @@ -3611,10 +3686,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep, -ECONNRESET); - ret = send_abort(ep, NULL, gfp); + ret = send_abort(ep); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); - ret = send_halfclose(ep, gfp); + ret = send_halfclose(ep); } if (ret) { set_bit(EP_DISC_FAIL, &ep->com.history); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b0b955724458..812ab7278b8e 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -33,19 +33,15 @@ #include "iw_cxgb4.h" static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, struct sk_buff *skb) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; - struct sk_buff *skb; int ret; wr_len = sizeof *res_wr + sizeof *res; - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); @@ -863,7 +859,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) : NULL; destroy_cq(&chp->rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); + ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, + chp->destroy_skb); + chp->destroy_skb = NULL; kfree(chp); return 0; } @@ -879,7 +877,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; - int ret; + int ret, wr_len; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; @@ -896,6 +894,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!chp) return ERR_PTR(-ENOMEM); + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!chp->destroy_skb) { + ret = -ENOMEM; + goto err1; + } + if (ib_context) ucontext = to_c4iw_ucontext(ib_context); @@ -936,7 +941,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = create_cq(&rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) - goto err1; + goto err2; chp->rhp = rhp; chp->cq.size--; /* status page */ @@ -947,15 +952,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, init_waitqueue_head(&chp->wait); ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); if (ret) - goto err2; + goto err3; if (ucontext) { mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) - goto err3; + goto err4; mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) - goto err4; + goto err5; uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; @@ -970,7 +975,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp) - sizeof(uresp.reserved)); if (ret) - goto err5; + goto err6; mm->key = uresp.key; mm->addr = virt_to_phys(chp->cq.queue); @@ -986,15 +991,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; -err5: +err6: kfree(mm2); -err4: +err5: kfree(mm); -err3: +err4: remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); -err2: +err3: destroy_cq(&chp->rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + chp->destroy_skb); +err2: + kfree_skb(chp->destroy_skb); err1: kfree(chp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae2e8b23d2dd..071d7332ec06 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -317,7 +317,7 @@ static int qp_open(struct inode *inode, struct file *file) idr_for_each(&qpd->devp->qpidr, count_idrs, &count); spin_unlock_irq(&qpd->devp->lock); - qpd->bufsize = count * 128; + qpd->bufsize = count * 180; qpd->buf = vmalloc(qpd->bufsize); if (!qpd->buf) { kfree(qpd); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f6f34a75af27..aa47e0ae80bc 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -384,6 +384,7 @@ struct c4iw_mr { struct ib_mr ibmr; struct ib_umem *umem; struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; u64 *mpl; @@ -400,6 +401,7 @@ static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) struct c4iw_mw { struct ib_mw ibmw; struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; }; @@ -412,6 +414,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; + struct sk_buff *destroy_skb; struct t4_cq cq; spinlock_t lock; spinlock_t comp_handler_lock; @@ -472,7 +475,7 @@ struct c4iw_qp { struct t4_wq wq; spinlock_t lock; struct mutex mutex; - atomic_t refcnt; + struct kref kref; wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; @@ -789,10 +792,29 @@ enum c4iw_ep_history { CM_ID_DEREFED = 28, }; +enum conn_pre_alloc_buffers { + CN_ABORT_REQ_BUF, + CN_ABORT_RPL_BUF, + CN_CLOSE_CON_REQ_BUF, + CN_DESTROY_BUF, + CN_FLOWC_BUF, + CN_MAX_CON_BUF +}; + +#define FLOWC_LEN 80 +union cpl_wr_size { + struct cpl_abort_req abrt_req; + struct cpl_abort_rpl abrt_rpl; + struct fw_ri_wr ri_req; + struct cpl_close_con_req close_req; + char flowc_buf[FLOWC_LEN]; +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; struct c4iw_dev *dev; + struct sk_buff_head ep_skb_list; enum c4iw_ep_state state; struct kref kref; struct mutex mutex; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 55d0651ee4de..0b91b0f4df71 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -59,9 +59,9 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) } static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, - u32 len, dma_addr_t data, int wait) + u32 len, dma_addr_t data, + int wait, struct sk_buff *skb) { - struct sk_buff *skb; struct ulp_mem_io *req; struct ulptx_sgl *sgl; u8 wr_len; @@ -74,9 +74,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, c4iw_init_wr_wait(&wr_wait); wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); req = (struct ulp_mem_io *)__skb_put(skb, wr_len); @@ -108,9 +110,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, } static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) + void *data, struct sk_buff *skb) { - struct sk_buff *skb; struct ulp_mem_io *req; struct ulptx_idata *sc; u8 wr_len, *to_dp, *from_dp; @@ -134,9 +135,11 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, wr_len = roundup(sizeof *req + sizeof *sc + roundup(copy_len, T4_ULPTX_MIN_IO), 16); - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); req = (struct ulp_mem_io *)__skb_put(skb, wr_len); @@ -173,6 +176,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - (copy_len % T4_ULPTX_MIN_IO)); ret = c4iw_ofld_send(rdev, skb); + skb = NULL; if (ret) return ret; len -= C4IW_MAX_INLINE_SIZE; @@ -182,7 +186,8 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, return ret; } -static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data, struct sk_buff *skb) { u32 remain = len; u32 dmalen; @@ -205,7 +210,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void * dmalen = T4_ULPTX_MAX_DMA; remain -= dmalen; ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, - !remain); + !remain, skb); if (ret) goto out; addr += dmalen >> 5; @@ -213,7 +218,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void * daddr += dmalen; } if (remain) - ret = _c4iw_write_mem_inline(rdev, addr, remain, data); + ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb); out: dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); return ret; @@ -224,23 +229,25 @@ out: * If data is NULL, clear len byte of memory to zero. */ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) + void *data, struct sk_buff *skb) { if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { if (len > inline_threshold) { - if (_c4iw_write_mem_dma(rdev, addr, len, data)) { + if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) { printk_ratelimited(KERN_WARNING "%s: dma map" " failure (non fatal)\n", pci_name(rdev->lldi.pdev)); return _c4iw_write_mem_inline(rdev, addr, len, - data); - } else + data, skb); + } else { return 0; + } } else - return _c4iw_write_mem_inline(rdev, addr, len, data); + return _c4iw_write_mem_inline(rdev, addr, + len, data, skb); } else - return _c4iw_write_mem_inline(rdev, addr, len, data); + return _c4iw_write_mem_inline(rdev, addr, len, data, skb); } /* @@ -253,7 +260,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, int bind_enabled, u32 zbva, u64 to, - u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr) + u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr, + struct sk_buff *skb) { int err; struct fw_ri_tpte tpt; @@ -307,7 +315,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt); + sizeof(tpt), &tpt, skb); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -327,28 +335,29 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, __func__, pbl_addr, rdev->lldi.vr->pbl.start, pbl_size); - err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl); + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL); return err; } static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr) + u32 pbl_addr, struct sk_buff *skb) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, - pbl_size, pbl_addr); + pbl_size, pbl_addr, skb); } static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0); + 0UL, 0, 0, 0, 0, NULL); } -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag) +static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, + struct sk_buff *skb) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0); + 0, skb); } static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, @@ -356,7 +365,7 @@ static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, - 0UL, 0, 0, pbl_size, pbl_addr); + 0UL, 0, 0, pbl_size, pbl_addr, NULL); } static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) @@ -383,14 +392,16 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, mhp->attr.mw_bind_enable, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len ? mhp->attr.len : -1, shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr); + mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL); if (ret) return ret; ret = finish_mem_reg(mhp, stag); - if (ret) + if (ret) { dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); + mhp->dereg_skb = NULL; + } return ret; } @@ -423,6 +434,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) if (!mhp) return ERR_PTR(-ENOMEM); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto err0; + } + mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.perms = c4iw_ib_to_tpt_access(acc); @@ -435,7 +452,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0, + NULL); if (ret) goto err1; @@ -445,8 +463,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) return &mhp->ibmr; err2: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); err1: + kfree_skb(mhp->dereg_skb); +err0: kfree(mhp); return ERR_PTR(ret); } @@ -481,11 +501,18 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + mhp->rhp = rhp; mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); + kfree_skb(mhp->dereg_skb); kfree(mhp); return ERR_PTR(err); } @@ -550,6 +577,7 @@ err_pbl: err: ib_umem_release(mhp->umem); + kfree_skb(mhp->dereg_skb); kfree(mhp); return ERR_PTR(err); } @@ -572,11 +600,16 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); - ret = allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); + + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto free_mhp; } + + ret = allocate_window(&rhp->rdev, &stag, php->pdid); + if (ret) + goto free_skb; mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_MW; @@ -584,12 +617,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mmid = (stag) >> 8; mhp->ibmw.rkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { - deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto dealloc_win; } PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); + +dealloc_win: + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); +free_skb: + kfree_skb(mhp->dereg_skb); +free_mhp: + kfree(mhp); + return ERR_PTR(ret); } int c4iw_dealloc_mw(struct ib_mw *mw) @@ -602,7 +642,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) rhp = mhp->rhp; mmid = (mw->rkey) >> 8; remove_handle(rhp, &rhp->mmidr, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag); + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); + kfree_skb(mhp->dereg_skb); kfree(mhp); PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); return 0; @@ -666,7 +707,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, return &(mhp->ibmr); err3: dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); @@ -717,7 +758,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); if (mhp->attr.pbl_size) c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index dd8a86b726d2..df127ce6b6ec 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -409,20 +409,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); - PDBG("%s dev 0x%p\n", __func__, dev); - - return sprintf(buf, "%u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); -} - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, char *buf) { @@ -502,13 +488,11 @@ static int c4iw_get_mib(struct ib_device *ibdev, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *c4iw_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, }; @@ -530,6 +514,20 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev); + PDBG("%s dev 0x%p\n", __func__, dev); + + snprintf(str, str_len, "%u.%u.%u.%u", + FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); +} + int c4iw_register_device(struct c4iw_dev *dev) { int ret; @@ -605,6 +603,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_str; dev->ibdev.drain_sq = c4iw_drain_sq; dev->ibdev.drain_rq = c4iw_drain_rq; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e8993e49b8b3..edb1172b6f54 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, return 0; } +void _free_qp(struct kref *kref) +{ + struct c4iw_qp *qhp; + + qhp = container_of(kref, struct c4iw_qp, kref); + PDBG("%s qhp %p\n", __func__, qhp); + kfree(qhp); +} + void c4iw_qp_add_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_c4iw_qp(qp)->refcnt)); + kref_get(&to_c4iw_qp(qp)->kref); } void c4iw_qp_rem_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) - wake_up(&(to_c4iw_qp(qp)->wait)); + kref_put(&to_c4iw_qp(qp)->kref, _free_qp); } static void add_to_fc_list(struct list_head *head, struct list_head *entry) @@ -1081,9 +1089,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, qhp->ep->hwtid); - skb = alloc_skb(sizeof *wqe, gfp); - if (!skb) + skb = skb_dequeue(&qhp->ep->com.ep_skb_list); + if (WARN_ON(!skb)) return; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); @@ -1202,9 +1211,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); - skb = alloc_skb(sizeof *wqe, GFP_KERNEL); - if (!skb) + skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!skb)) return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); @@ -1592,8 +1602,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) wait_event(qhp->wait, !qhp->ep); remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); spin_lock_irq(&rhp->lock); if (!list_empty(&qhp->db_fc_entry)) @@ -1606,8 +1614,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + c4iw_qp_rem_ref(ib_qp); + PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); - kfree(qhp); return 0; } @@ -1704,7 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, init_completion(&qhp->rq_drained); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); + kref_init(&qhp->kref); ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) @@ -1896,12 +1905,20 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static void move_qp_to_err(struct c4iw_qp *qp) +{ + struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR }; + + (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); +} + void c4iw_drain_sq(struct ib_qp *ibqp) { struct c4iw_qp *qp = to_c4iw_qp(ibqp); unsigned long flag; bool need_to_wait; + move_qp_to_err(qp); spin_lock_irqsave(&qp->lock, flag); need_to_wait = !t4_sq_empty(&qp->wq); spin_unlock_irqrestore(&qp->lock, flag); @@ -1916,6 +1933,7 @@ void c4iw_drain_rq(struct ib_qp *ibqp) unsigned long flag; bool need_to_wait; + move_qp_to_err(qp); spin_lock_irqsave(&qp->lock, flag); need_to_wait = !t4_rq_empty(&qp->wq); spin_unlock_irqrestore(&qp->lock, flag); diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index a925fb0db706..f6ea0881765a 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -1,9 +1,9 @@ config INFINIBAND_HFI1 tristate "Intel OPA Gen1 support" - depends on X86_64 && INFINIBAND_RDMAVT + depends on X86_64 && INFINIBAND_RDMAVT && I2C select MMU_NOTIFIER select CRC32 - default m + select I2C_ALGOBIT ---help--- This is a low-level driver for Intel OPA Gen1 adapter. config HFI1_DEBUG_SDMA_ORDER diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 9b5382c94b0c..0cf97a09b64b 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ - qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ + qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ verbs_txreq.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 14d7eeb09be6..79575ee873f2 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,12 +47,18 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/cpumask.h> #include "hfi.h" #include "affinity.h" #include "sdma.h" #include "trace.h" +struct hfi1_affinity_node_list node_affinity = { + .list = LIST_HEAD_INIT(node_affinity.list), + .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), +}; + /* Name of IRQ types, indexed by enum irq_type */ static const char * const irq_type_names[] = { "SDMA", @@ -61,6 +67,9 @@ static const char * const irq_type_names[] = { "OTHER", }; +/* Per NUMA node count of HFI devices */ +static unsigned int *hfi1_per_node_cntr; + static inline void init_cpu_mask_set(struct cpu_mask_set *set) { cpumask_clear(&set->mask); @@ -69,47 +78,136 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) } /* Initialize non-HT cpu cores mask */ -int init_real_cpu_mask(struct hfi1_devdata *dd) +void init_real_cpu_mask(void) { - struct hfi1_affinity *info; int possible, curr_cpu, i, ht; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - cpumask_clear(&info->real_cpu_mask); + cpumask_clear(&node_affinity.real_cpu_mask); /* Start with cpu online mask as the real cpu mask */ - cpumask_copy(&info->real_cpu_mask, cpu_online_mask); + cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); /* * Remove HT cores from the real cpu mask. Do this in two steps below. */ - possible = cpumask_weight(&info->real_cpu_mask); + possible = cpumask_weight(&node_affinity.real_cpu_mask); ht = cpumask_weight(topology_sibling_cpumask( - cpumask_first(&info->real_cpu_mask))); + cpumask_first(&node_affinity.real_cpu_mask))); /* * Step 1. Skip over the first N HT siblings and use them as the * "real" cores. Assumes that HT cores are not enumerated in * succession (except in the single core case). */ - curr_cpu = cpumask_first(&info->real_cpu_mask); + curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); for (i = 0; i < possible / ht; i++) - curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); + curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); /* * Step 2. Remove the remaining HT siblings. Use cpumask_next() to * skip any gaps. */ for (; i < possible; i++) { - cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask); - curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); + cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); + curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); + } +} + +int node_affinity_init(void) +{ + int node; + struct pci_dev *dev = NULL; + const struct pci_device_id *ids = hfi1_pci_tbl; + + cpumask_clear(&node_affinity.proc.used); + cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); + + node_affinity.proc.gen = 0; + node_affinity.num_core_siblings = + cpumask_weight(topology_sibling_cpumask( + cpumask_first(&node_affinity.proc.mask) + )); + node_affinity.num_online_nodes = num_online_nodes(); + node_affinity.num_online_cpus = num_online_cpus(); + + /* + * The real cpu mask is part of the affinity struct but it has to be + * initialized early. It is needed to calculate the number of user + * contexts in set_up_context_variables(). + */ + init_real_cpu_mask(); + + hfi1_per_node_cntr = kcalloc(num_possible_nodes(), + sizeof(*hfi1_per_node_cntr), GFP_KERNEL); + if (!hfi1_per_node_cntr) + return -ENOMEM; + + while (ids->vendor) { + dev = NULL; + while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { + node = pcibus_to_node(dev->bus); + if (node < 0) + node = numa_node_id(); + + hfi1_per_node_cntr[node]++; + } + ids++; } - dd->affinity = info; return 0; } +void node_affinity_destroy(void) +{ + struct list_head *pos, *q; + struct hfi1_affinity_node *entry; + + spin_lock(&node_affinity.lock); + list_for_each_safe(pos, q, &node_affinity.list) { + entry = list_entry(pos, struct hfi1_affinity_node, + list); + list_del(pos); + kfree(entry); + } + spin_unlock(&node_affinity.lock); + kfree(hfi1_per_node_cntr); +} + +static struct hfi1_affinity_node *node_affinity_allocate(int node) +{ + struct hfi1_affinity_node *entry; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + entry->node = node; + INIT_LIST_HEAD(&entry->list); + + return entry; +} + +/* + * It appends an entry to the list. + * It *must* be called with node_affinity.lock held. + */ +static void node_affinity_add_tail(struct hfi1_affinity_node *entry) +{ + list_add_tail(&entry->list, &node_affinity.list); +} + +/* It must be called with node_affinity.lock held */ +static struct hfi1_affinity_node *node_affinity_lookup(int node) +{ + struct list_head *pos; + struct hfi1_affinity_node *entry; + + list_for_each(pos, &node_affinity.list) { + entry = list_entry(pos, struct hfi1_affinity_node, list); + if (entry->node == node) + return entry; + } + + return NULL; +} + /* * Interrupt affinity. * @@ -121,10 +219,10 @@ int init_real_cpu_mask(struct hfi1_devdata *dd) * to the node relative 1 as necessary. * */ -void hfi1_dev_affinity_init(struct hfi1_devdata *dd) +int hfi1_dev_affinity_init(struct hfi1_devdata *dd) { int node = pcibus_to_node(dd->pcidev->bus); - struct hfi1_affinity *info = dd->affinity; + struct hfi1_affinity_node *entry; const struct cpumask *local_mask; int curr_cpu, possible, i; @@ -132,56 +230,93 @@ void hfi1_dev_affinity_init(struct hfi1_devdata *dd) node = numa_node_id(); dd->node = node; - spin_lock_init(&info->lock); - - init_cpu_mask_set(&info->def_intr); - init_cpu_mask_set(&info->rcv_intr); - init_cpu_mask_set(&info->proc); - local_mask = cpumask_of_node(dd->node); if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); - /* Use the "real" cpu mask of this node as the default */ - cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask); - - /* fill in the receive list */ - possible = cpumask_weight(&info->def_intr.mask); - curr_cpu = cpumask_first(&info->def_intr.mask); - if (possible == 1) { - /* only one CPU, everyone will use it */ - cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); - } else { - /* - * Retain the first CPU in the default list for the control - * context. - */ - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); - /* - * Remove the remaining kernel receive queues from - * the default list and add them to the receive list. - */ - for (i = 0; i < dd->n_krcv_queues - 1; i++) { - cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); - cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); - if (curr_cpu >= nr_cpu_ids) - break; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + + /* + * If this is the first time this NUMA node's affinity is used, + * create an entry in the global affinity structure and initialize it. + */ + if (!entry) { + entry = node_affinity_allocate(node); + if (!entry) { + dd_dev_err(dd, + "Unable to allocate global affinity node\n"); + return -ENOMEM; } - } + init_cpu_mask_set(&entry->def_intr); + init_cpu_mask_set(&entry->rcv_intr); + cpumask_clear(&entry->general_intr_mask); + /* Use the "real" cpu mask of this node as the default */ + cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, + local_mask); + + /* fill in the receive list */ + possible = cpumask_weight(&entry->def_intr.mask); + curr_cpu = cpumask_first(&entry->def_intr.mask); + + if (possible == 1) { + /* only one CPU, everyone will use it */ + cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); + cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); + } else { + /* + * The general/control context will be the first CPU in + * the default list, so it is removed from the default + * list and added to the general interrupt list. + */ + cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); + cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); + curr_cpu = cpumask_next(curr_cpu, + &entry->def_intr.mask); - cpumask_copy(&info->proc.mask, cpu_online_mask); -} + /* + * Remove the remaining kernel receive queues from + * the default list and add them to the receive list. + */ + for (i = 0; + i < (dd->n_krcv_queues - 1) * + hfi1_per_node_cntr[dd->node]; + i++) { + cpumask_clear_cpu(curr_cpu, + &entry->def_intr.mask); + cpumask_set_cpu(curr_cpu, + &entry->rcv_intr.mask); + curr_cpu = cpumask_next(curr_cpu, + &entry->def_intr.mask); + if (curr_cpu >= nr_cpu_ids) + break; + } -void hfi1_dev_affinity_free(struct hfi1_devdata *dd) -{ - kfree(dd->affinity); + /* + * If there ends up being 0 CPU cores leftover for SDMA + * engines, use the same CPU cores as general/control + * context. + */ + if (cpumask_weight(&entry->def_intr.mask) == 0) + cpumask_copy(&entry->def_intr.mask, + &entry->general_intr_mask); + } + + spin_lock(&node_affinity.lock); + node_affinity_add_tail(entry); + spin_unlock(&node_affinity.lock); + } + + return 0; } int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { int ret; cpumask_var_t diff; - struct cpu_mask_set *set; + struct hfi1_affinity_node *entry; + struct cpu_mask_set *set = NULL; struct sdma_engine *sde = NULL; struct hfi1_ctxtdata *rcd = NULL; char extra[64]; @@ -194,22 +329,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) if (!ret) return -ENOMEM; + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + switch (msix->type) { case IRQ_SDMA: sde = (struct sdma_engine *)msix->arg; scnprintf(extra, 64, "engine %u", sde->this_idx); - /* fall through */ + set = &entry->def_intr; + break; case IRQ_GENERAL: - set = &dd->affinity->def_intr; + cpu = cpumask_first(&entry->general_intr_mask); break; case IRQ_RCVCTXT: rcd = (struct hfi1_ctxtdata *)msix->arg; - if (rcd->ctxt == HFI1_CTRL_CTXT) { - set = &dd->affinity->def_intr; - cpu = cpumask_first(&set->mask); - } else { - set = &dd->affinity->rcv_intr; - } + if (rcd->ctxt == HFI1_CTRL_CTXT) + cpu = cpumask_first(&entry->general_intr_mask); + else + set = &entry->rcv_intr; scnprintf(extra, 64, "ctxt %u", rcd->ctxt); break; default: @@ -218,12 +356,12 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) } /* - * The control receive context is placed on a particular CPU, which - * is set above. Skip accounting for it. Everything else finds its - * CPU here. + * The general and control contexts are placed on a particular + * CPU, which is set above. Skip accounting for it. Everything else + * finds its CPU here. */ - if (cpu == -1) { - spin_lock(&dd->affinity->lock); + if (cpu == -1 && set) { + spin_lock(&node_affinity.lock); if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -235,7 +373,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); - spin_unlock(&dd->affinity->lock); + spin_unlock(&node_affinity.lock); } switch (msix->type) { @@ -263,43 +401,84 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, { struct cpu_mask_set *set = NULL; struct hfi1_ctxtdata *rcd; + struct hfi1_affinity_node *entry; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: + set = &entry->def_intr; + break; case IRQ_GENERAL: - set = &dd->affinity->def_intr; + /* Don't do accounting for general contexts */ break; case IRQ_RCVCTXT: rcd = (struct hfi1_ctxtdata *)msix->arg; - /* only do accounting for non control contexts */ + /* Don't do accounting for control contexts */ if (rcd->ctxt != HFI1_CTRL_CTXT) - set = &dd->affinity->rcv_intr; + set = &entry->rcv_intr; break; default: return; } if (set) { - spin_lock(&dd->affinity->lock); + spin_lock(&node_affinity.lock); cpumask_andnot(&set->used, &set->used, &msix->mask); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&dd->affinity->lock); + spin_unlock(&node_affinity.lock); } irq_set_affinity_hint(msix->msix.vector, NULL); cpumask_clear(&msix->mask); } -int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) +/* This should be called with node_affinity.lock held */ +static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, + struct hfi1_affinity_node_list *affinity) { - int cpu = -1, ret; - cpumask_var_t diff, mask, intrs; + int possible, curr_cpu, i; + uint num_cores_per_socket = node_affinity.num_online_cpus / + affinity->num_core_siblings / + node_affinity.num_online_nodes; + + cpumask_copy(hw_thread_mask, &affinity->proc.mask); + if (affinity->num_core_siblings > 0) { + /* Removing other siblings not needed for now */ + possible = cpumask_weight(hw_thread_mask); + curr_cpu = cpumask_first(hw_thread_mask); + for (i = 0; + i < num_cores_per_socket * node_affinity.num_online_nodes; + i++) + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); + + for (; i < possible; i++) { + cpumask_clear_cpu(curr_cpu, hw_thread_mask); + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); + } + + /* Identifying correct HW threads within physical cores */ + cpumask_shift_left(hw_thread_mask, hw_thread_mask, + num_cores_per_socket * + node_affinity.num_online_nodes * + hw_thread_no); + } +} + +int hfi1_get_proc_affinity(int node) +{ + int cpu = -1, ret, i; + struct hfi1_affinity_node *entry; + cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; const struct cpumask *node_mask, *proc_mask = tsk_cpus_allowed(current); - struct cpu_mask_set *set = &dd->affinity->proc; + struct hfi1_affinity_node_list *affinity = &node_affinity; + struct cpu_mask_set *set = &affinity->proc; /* * check whether process/context affinity has already @@ -325,22 +504,41 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) /* * The process does not have a preset CPU affinity so find one to - * recommend. We prefer CPUs on the same NUMA as the device. + * recommend using the following algorithm: + * + * For each user process that is opening a context on HFI Y: + * a) If all cores are filled, reinitialize the bitmask + * b) Fill real cores first, then HT cores (First set of HT + * cores on all physical cores, then second set of HT core, + * and, so on) in the following order: + * + * 1. Same NUMA node as HFI Y and not running an IRQ + * handler + * 2. Same NUMA node as HFI Y and running an IRQ handler + * 3. Different NUMA node to HFI Y and not running an IRQ + * handler + * 4. Different NUMA node to HFI Y and running an IRQ + * handler + * c) Mark core as filled in the bitmask. As user processes are + * done, clear cores from the bitmask. */ ret = zalloc_cpumask_var(&diff, GFP_KERNEL); if (!ret) goto done; - ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); if (!ret) goto free_diff; - ret = zalloc_cpumask_var(&intrs, GFP_KERNEL); + ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); + if (!ret) + goto free_hw_thread_mask; + ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); if (!ret) - goto free_mask; + goto free_available_mask; - spin_lock(&dd->affinity->lock); + spin_lock(&affinity->lock); /* - * If we've used all available CPUs, clear the mask and start + * If we've used all available HW threads, clear the mask and start * overloading. */ if (cpumask_equal(&set->mask, &set->used)) { @@ -348,81 +546,198 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_clear(&set->used); } - /* CPUs used by interrupt handlers */ - cpumask_copy(intrs, (dd->affinity->def_intr.gen ? - &dd->affinity->def_intr.mask : - &dd->affinity->def_intr.used)); - cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? - &dd->affinity->rcv_intr.mask : - &dd->affinity->rcv_intr.used)); + /* + * If NUMA node has CPUs used by interrupt handlers, include them in the + * interrupt handler mask. + */ + entry = node_affinity_lookup(node); + if (entry) { + cpumask_copy(intrs_mask, (entry->def_intr.gen ? + &entry->def_intr.mask : + &entry->def_intr.used)); + cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? + &entry->rcv_intr.mask : + &entry->rcv_intr.used)); + cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); + } hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", - cpumask_pr_args(intrs)); + cpumask_pr_args(intrs_mask)); + + cpumask_copy(hw_thread_mask, &set->mask); /* - * If we don't have a NUMA node requested, preference is towards - * device NUMA node + * If HT cores are enabled, identify which HW threads within the + * physical cores should be used. */ - if (node == -1) - node = dd->node; + if (affinity->num_core_siblings > 0) { + for (i = 0; i < affinity->num_core_siblings; i++) { + find_hw_thread_mask(i, hw_thread_mask, affinity); + + /* + * If there's at least one available core for this HW + * thread number, stop looking for a core. + * + * diff will always be not empty at least once in this + * loop as the used mask gets reset when + * (set->mask == set->used) before this loop. + */ + cpumask_andnot(diff, hw_thread_mask, &set->used); + if (!cpumask_empty(diff)) + break; + } + } + hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", + cpumask_pr_args(hw_thread_mask)); + node_mask = cpumask_of_node(node); - hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, + hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, cpumask_pr_args(node_mask)); - /* diff will hold all unused cpus */ - cpumask_andnot(diff, &set->mask, &set->used); - hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); - - /* get cpumask of available CPUs on preferred NUMA */ - cpumask_and(mask, diff, node_mask); - hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); + /* Get cpumask of available CPUs on preferred NUMA */ + cpumask_and(available_mask, hw_thread_mask, node_mask); + cpumask_andnot(available_mask, available_mask, &set->used); + hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, + cpumask_pr_args(available_mask)); /* * At first, we don't want to place processes on the same - * CPUs as interrupt handlers. + * CPUs as interrupt handlers. Then, CPUs running interrupt + * handlers are used. + * + * 1) If diff is not empty, then there are CPUs not running + * non-interrupt handlers available, so diff gets copied + * over to available_mask. + * 2) If diff is empty, then all CPUs not running interrupt + * handlers are taken, so available_mask contains all + * available CPUs running interrupt handlers. + * 3) If available_mask is empty, then all CPUs on the + * preferred NUMA node are taken, so other NUMA nodes are + * used for process assignments using the same method as + * the preferred NUMA node. */ - cpumask_andnot(diff, mask, intrs); + cpumask_andnot(diff, available_mask, intrs_mask); if (!cpumask_empty(diff)) - cpumask_copy(mask, diff); + cpumask_copy(available_mask, diff); - /* - * if we don't have a cpu on the preferred NUMA, get - * the list of the remaining available CPUs - */ - if (cpumask_empty(mask)) { - cpumask_andnot(diff, &set->mask, &set->used); - cpumask_andnot(mask, diff, node_mask); + /* If we don't have CPUs on the preferred node, use other NUMA nodes */ + if (cpumask_empty(available_mask)) { + cpumask_andnot(available_mask, hw_thread_mask, &set->used); + /* Excluding preferred NUMA cores */ + cpumask_andnot(available_mask, available_mask, node_mask); + hfi1_cdbg(PROC, + "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", + cpumask_pr_args(available_mask)); + + /* + * At first, we don't want to place processes on the same + * CPUs as interrupt handlers. + */ + cpumask_andnot(diff, available_mask, intrs_mask); + if (!cpumask_empty(diff)) + cpumask_copy(available_mask, diff); } - hfi1_cdbg(PROC, "possible CPUs for process %*pbl", - cpumask_pr_args(mask)); + hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", + cpumask_pr_args(available_mask)); - cpu = cpumask_first(mask); + cpu = cpumask_first(available_mask); if (cpu >= nr_cpu_ids) /* empty */ cpu = -1; else cpumask_set_cpu(cpu, &set->used); - spin_unlock(&dd->affinity->lock); - - free_cpumask_var(intrs); -free_mask: - free_cpumask_var(mask); + spin_unlock(&affinity->lock); + hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); + + free_cpumask_var(intrs_mask); +free_available_mask: + free_cpumask_var(available_mask); +free_hw_thread_mask: + free_cpumask_var(hw_thread_mask); free_diff: free_cpumask_var(diff); done: return cpu; } -void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu) +void hfi1_put_proc_affinity(int cpu) { - struct cpu_mask_set *set = &dd->affinity->proc; + struct hfi1_affinity_node_list *affinity = &node_affinity; + struct cpu_mask_set *set = &affinity->proc; if (cpu < 0) return; - spin_lock(&dd->affinity->lock); + spin_lock(&affinity->lock); cpumask_clear_cpu(cpu, &set->used); + hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&dd->affinity->lock); + spin_unlock(&affinity->lock); } +/* Prevents concurrent reads and writes of the sdma_affinity attrib */ +static DEFINE_MUTEX(sdma_affinity_mutex); + +int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, + size_t count) +{ + struct hfi1_affinity_node *entry; + struct cpumask mask; + int ret, i; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + + if (!entry) + return -EINVAL; + + ret = cpulist_parse(buf, &mask); + if (ret) + return ret; + + if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + dd_dev_warn(dd, "Invalid CPU mask\n"); + return -EINVAL; + } + + mutex_lock(&sdma_affinity_mutex); + /* reset the SDMA interrupt affinity details */ + init_cpu_mask_set(&entry->def_intr); + cpumask_copy(&entry->def_intr.mask, &mask); + /* + * Reassign the affinity for each SDMA interrupt. + */ + for (i = 0; i < dd->num_msix_entries; i++) { + struct hfi1_msix_entry *msix; + + msix = &dd->msix_entries[i]; + if (msix->type != IRQ_SDMA) + continue; + + ret = hfi1_get_irq_affinity(dd, msix); + + if (ret) + break; + } + + mutex_unlock(&sdma_affinity_mutex); + return ret ? ret : strnlen(buf, PAGE_SIZE); +} + +int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) +{ + struct hfi1_affinity_node *entry; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + + if (!entry) + return -EINVAL; + + mutex_lock(&sdma_affinity_mutex); + cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); + mutex_unlock(&sdma_affinity_mutex); + return strnlen(buf, PAGE_SIZE); +} diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 20f52fe74091..8879cf7a8cac 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -73,7 +73,6 @@ struct cpu_mask_set { struct hfi1_affinity { struct cpu_mask_set def_intr; struct cpu_mask_set rcv_intr; - struct cpu_mask_set proc; struct cpumask real_cpu_mask; /* spin lock to protect affinity struct */ spinlock_t lock; @@ -82,11 +81,9 @@ struct hfi1_affinity { struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ -int init_real_cpu_mask(struct hfi1_devdata *); +void init_real_cpu_mask(void); /* Initialize driver affinity data */ -void hfi1_dev_affinity_init(struct hfi1_devdata *); -/* Free driver affinity data */ -void hfi1_dev_affinity_free(struct hfi1_devdata *); +int hfi1_dev_affinity_init(struct hfi1_devdata *); /* * Set IRQ affinity to a CPU. The function will determine the * CPU and set the affinity to it. @@ -101,8 +98,35 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); * Determine a CPU affinity for a user process, if the process does not * have an affinity set yet. */ -int hfi1_get_proc_affinity(struct hfi1_devdata *, int); +int hfi1_get_proc_affinity(int); /* Release a CPU used by a user process. */ -void hfi1_put_proc_affinity(struct hfi1_devdata *, int); +void hfi1_put_proc_affinity(int); + +int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf); +int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, + size_t count); + +struct hfi1_affinity_node { + int node; + struct cpu_mask_set def_intr; + struct cpu_mask_set rcv_intr; + struct cpumask general_intr_mask; + struct list_head list; +}; + +struct hfi1_affinity_node_list { + struct list_head list; + struct cpumask real_cpu_mask; + struct cpu_mask_set proc; + int num_core_siblings; + int num_online_nodes; + int num_online_cpus; + /* protect affinity node list */ + spinlock_t lock; +}; + +int node_affinity_init(void); +void node_affinity_destroy(void); +extern struct hfi1_affinity_node_list node_affinity; #endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index dad4d0ebbdff..b32638d58ae8 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -63,6 +63,7 @@ #include "efivar.h" #include "platform.h" #include "aspm.h" +#include "affinity.h" #define NUM_IB_PORTS 1 @@ -121,6 +122,7 @@ struct flag_table { #define SEC_SC_HALTED 0x4 /* per-context only */ #define SEC_SPC_FREEZE 0x8 /* per-HFI only */ +#define DEFAULT_KRCVQS 2 #define MIN_KERNEL_KCTXTS 2 #define FIRST_KERNEL_KCTXT 1 /* sizes for both the QP and RSM map tables */ @@ -238,6 +240,9 @@ struct flag_table { /* all CceStatus sub-block RXE pause bits */ #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK +#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL +#define CNTR_32BIT_MAX 0x00000000FFFFFFFF + /* * CCE Error flags. */ @@ -3947,6 +3952,28 @@ static u64 access_sdma_wrong_dw_err_cnt(const struct cntr_entry *entry, return dd->sw_send_dma_eng_err_status_cnt[0]; } +static u64 access_dc_rcv_err_cnt(const struct cntr_entry *entry, + void *context, int vl, int mode, + u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + u64 val = 0; + u64 csr = entry->csr; + + val = read_write_csr(dd, csr, mode, data); + if (mode == CNTR_MODE_R) { + val = val > CNTR_MAX - dd->sw_rcv_bypass_packet_errors ? + CNTR_MAX : val + dd->sw_rcv_bypass_packet_errors; + } else if (mode == CNTR_MODE_W) { + dd->sw_rcv_bypass_packet_errors = 0; + } else { + dd_dev_err(dd, "Invalid cntr register access mode"); + return 0; + } + return val; +} + #define def_access_sw_cpu(cntr) \ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ @@ -4020,7 +4047,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL), [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT, CNTR_SYNTH), -[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH), +[C_DC_RCV_ERR] = CNTR_ELEM("DcRecvErr", DCC_ERR_PORTRCV_ERR_CNT, 0, CNTR_SYNTH, + access_dc_rcv_err_cnt), [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT, CNTR_SYNTH), [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT, @@ -8798,30 +8826,6 @@ static int write_tx_settings(struct hfi1_devdata *dd, return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame); } -static void check_fabric_firmware_versions(struct hfi1_devdata *dd) -{ - u32 frame, version, prod_id; - int ret, lane; - - /* 4 lanes */ - for (lane = 0; lane < 4; lane++) { - ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame); - if (ret) { - dd_dev_err(dd, - "Unable to read lane %d firmware details\n", - lane); - continue; - } - version = (frame >> SPICO_ROM_VERSION_SHIFT) - & SPICO_ROM_VERSION_MASK; - prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT) - & SPICO_ROM_PROD_ID_MASK; - dd_dev_info(dd, - "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n", - lane, version, prod_id); - } -} - /* * Read an idle LCB message. * @@ -9187,17 +9191,24 @@ static void wait_for_qsfp_init(struct hfi1_pportdata *ppd) unsigned long timeout; /* - * Check for QSFP interrupt for t_init (SFF 8679) + * Some QSFP cables have a quirk that asserts the IntN line as a side + * effect of power up on plug-in. We ignore this false positive + * interrupt until the module has finished powering up by waiting for + * a minimum timeout of the module inrush initialization time of + * 500 ms (SFF 8679 Table 5-6) to ensure the voltage rails in the + * module have stabilized. + */ + msleep(500); + + /* + * Check for QSFP interrupt for t_init (SFF 8679 Table 8-1) */ timeout = jiffies + msecs_to_jiffies(2000); while (1) { mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); - if (!(mask & QSFP_HFI0_INT_N)) { - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : - ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N); + if (!(mask & QSFP_HFI0_INT_N)) break; - } if (time_after(jiffies, timeout)) { dd_dev_info(dd, "%s: No IntN detected, reset complete\n", __func__); @@ -9213,10 +9224,17 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable) u64 mask; mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK); - if (enable) + if (enable) { + /* + * Clear the status register to avoid an immediate interrupt + * when we re-enable the IntN pin + */ + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR, + QSFP_HFI0_INT_N); mask |= (u64)QSFP_HFI0_INT_N; - else + } else { mask &= ~(u64)QSFP_HFI0_INT_N; + } write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); } @@ -9630,14 +9648,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, - struct hfi1_ctxt_info *kinfo) -{ - kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) | - HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U); - return 0; -} - struct hfi1_message_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr) { @@ -9890,6 +9900,131 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) return 0; } +static const char *state_completed_string(u32 completed) +{ + static const char * const state_completed[] = { + "EstablishComm", + "OptimizeEQ", + "VerifyCap" + }; + + if (completed < ARRAY_SIZE(state_completed)) + return state_completed[completed]; + + return "unknown"; +} + +static const char all_lanes_dead_timeout_expired[] = + "All lanes were inactive – was the interconnect media removed?"; +static const char tx_out_of_policy[] = + "Passing lanes on local port do not meet the local link width policy"; +static const char no_state_complete[] = + "State timeout occurred before link partner completed the state"; +static const char * const state_complete_reasons[] = { + [0x00] = "Reason unknown", + [0x01] = "Link was halted by driver, refer to LinkDownReason", + [0x02] = "Link partner reported failure", + [0x10] = "Unable to achieve frame sync on any lane", + [0x11] = + "Unable to find a common bit rate with the link partner", + [0x12] = + "Unable to achieve frame sync on sufficient lanes to meet the local link width policy", + [0x13] = + "Unable to identify preset equalization on sufficient lanes to meet the local link width policy", + [0x14] = no_state_complete, + [0x15] = + "State timeout occurred before link partner identified equalization presets", + [0x16] = + "Link partner completed the EstablishComm state, but the passing lanes do not meet the local link width policy", + [0x17] = tx_out_of_policy, + [0x20] = all_lanes_dead_timeout_expired, + [0x21] = + "Unable to achieve acceptable BER on sufficient lanes to meet the local link width policy", + [0x22] = no_state_complete, + [0x23] = + "Link partner completed the OptimizeEq state, but the passing lanes do not meet the local link width policy", + [0x24] = tx_out_of_policy, + [0x30] = all_lanes_dead_timeout_expired, + [0x31] = + "State timeout occurred waiting for host to process received frames", + [0x32] = no_state_complete, + [0x33] = + "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy", + [0x34] = tx_out_of_policy, +}; + +static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd, + u32 code) +{ + const char *str = NULL; + + if (code < ARRAY_SIZE(state_complete_reasons)) + str = state_complete_reasons[code]; + + if (str) + return str; + return "Reserved"; +} + +/* describe the given last state complete frame */ +static void decode_state_complete(struct hfi1_pportdata *ppd, u32 frame, + const char *prefix) +{ + struct hfi1_devdata *dd = ppd->dd; + u32 success; + u32 state; + u32 reason; + u32 lanes; + + /* + * Decode frame: + * [ 0: 0] - success + * [ 3: 1] - state + * [ 7: 4] - next state timeout + * [15: 8] - reason code + * [31:16] - lanes + */ + success = frame & 0x1; + state = (frame >> 1) & 0x7; + reason = (frame >> 8) & 0xff; + lanes = (frame >> 16) & 0xffff; + + dd_dev_err(dd, "Last %s LNI state complete frame 0x%08x:\n", + prefix, frame); + dd_dev_err(dd, " last reported state state: %s (0x%x)\n", + state_completed_string(state), state); + dd_dev_err(dd, " state successfully completed: %s\n", + success ? "yes" : "no"); + dd_dev_err(dd, " fail reason 0x%x: %s\n", + reason, state_complete_reason_code_string(ppd, reason)); + dd_dev_err(dd, " passing lane mask: 0x%x", lanes); +} + +/* + * Read the last state complete frames and explain them. This routine + * expects to be called if the link went down during link negotiation + * and initialization (LNI). That is, anywhere between polling and link up. + */ +static void check_lni_states(struct hfi1_pportdata *ppd) +{ + u32 last_local_state; + u32 last_remote_state; + + read_last_local_state(ppd->dd, &last_local_state); + read_last_remote_state(ppd->dd, &last_remote_state); + + /* + * Don't report anything if there is nothing to report. A value of + * 0 means the link was taken down while polling and there was no + * training in-process. + */ + if (last_local_state == 0 && last_remote_state == 0) + return; + + decode_state_complete(ppd, last_local_state, "transmitted"); + decode_state_complete(ppd, last_remote_state, "received"); +} + /* * Helper for set_link_state(). Do not call except from that routine. * Expects ppd->hls_mutex to be held. @@ -9902,8 +10037,6 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) { struct hfi1_devdata *dd = ppd->dd; u32 pstate, previous_state; - u32 last_local_state; - u32 last_remote_state; int ret; int do_transition; int do_wait; @@ -10003,12 +10136,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) } else if (previous_state & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { /* went down while attempting link up */ - /* byte 1 of last_*_state is the failure reason */ - read_last_local_state(dd, &last_local_state); - read_last_remote_state(dd, &last_remote_state); - dd_dev_err(dd, - "LNI failure last states: local 0x%08x, remote 0x%08x\n", - last_local_state, last_remote_state); + check_lni_states(ppd); } /* the active link width (downgrade) is 0 on link down */ @@ -11668,9 +11796,6 @@ static void free_cntrs(struct hfi1_devdata *dd) dd->cntrnames = NULL; } -#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL -#define CNTR_32BIT_MAX 0x00000000FFFFFFFF - static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, u64 *psval, void *context, int vl) { @@ -12325,37 +12450,6 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) return ib_pstate; } -/* - * Read/modify/write ASIC_QSFP register bits as selected by mask - * data: 0 or 1 in the positions depending on what needs to be written - * dir: 0 for read, 1 for write - * mask: select by setting - * I2CCLK (bit 0) - * I2CDATA (bit 1) - */ -u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, - u32 mask) -{ - u64 qsfp_oe, target_oe; - - target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; - if (mask) { - /* We are writing register bits, so lock access */ - dir &= mask; - data &= mask; - - qsfp_oe = read_csr(dd, target_oe); - qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir; - write_csr(dd, target_oe, qsfp_oe); - } - /* We are exclusively reading bits here, but it is unlikely - * we'll get valid data when we set the direction of the pin - * in the same call, so read should call this function again - * to get valid data - */ - return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); -} - #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) @@ -12780,7 +12874,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Kernel receive contexts: - * - min of 2 or 1 context/numa (excluding control context) * - Context 0 - control context (VL15/multicast/error) * - Context 1 - first kernel context * - Context 2 - second kernel context @@ -12794,9 +12887,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ num_kernel_contexts = n_krcvqs + 1; else - num_kernel_contexts = num_online_nodes() + 1; - num_kernel_contexts = - max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts); + num_kernel_contexts = DEFAULT_KRCVQS + 1; /* * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 @@ -12815,7 +12906,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ if (num_user_contexts < 0) num_user_contexts = - cpumask_weight(&dd->affinity->real_cpu_mask); + cpumask_weight(&node_affinity.real_cpu_mask); total_contexts = num_kernel_contexts + num_user_contexts; @@ -14141,6 +14232,11 @@ static int init_asic_data(struct hfi1_devdata *dd) } dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ spin_unlock_irqrestore(&hfi1_devs_lock, flags); + + /* first one through - set up i2c devices */ + if (!peer) + ret = set_up_i2c(dd, dd->asic_data); + return ret; } @@ -14445,19 +14541,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, (dd->revision >> CCE_REVISION_SW_SHIFT) & CCE_REVISION_SW_MASK); - /* - * The real cpu mask is part of the affinity struct but has to be - * initialized earlier than the rest of the affinity struct because it - * is needed to calculate the number of user contexts in - * set_up_context_variables(). However, hfi1_dev_affinity_init(), - * which initializes the rest of the affinity struct members, - * depends on set_up_context_variables() for the number of kernel - * contexts, so it cannot be called before set_up_context_variables(). - */ - ret = init_real_cpu_mask(dd); - if (ret) - goto bail_cleanup; - ret = set_up_context_variables(dd); if (ret) goto bail_cleanup; @@ -14471,7 +14554,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* set up KDETH QP prefix in both RX and TX CSRs */ init_kdeth_qp(dd); - hfi1_dev_affinity_init(dd); + ret = hfi1_dev_affinity_init(dd); + if (ret) + goto bail_cleanup; /* send contexts must be set up before receive contexts */ ret = init_send_contexts(dd); @@ -14508,8 +14593,14 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); + /* + * Serial number is created from the base guid: + * [27:24] = base guid [38:35] + * [23: 0] = base guid [23: 0] + */ snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n", - dd->base_guid & 0xFFFFFF); + (dd->base_guid & 0xFFFFFF) | + ((dd->base_guid >> 11) & 0xF000000)); dd->oui1 = dd->base_guid >> 56 & 0xFF; dd->oui2 = dd->base_guid >> 48 & 0xFF; @@ -14518,7 +14609,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, ret = load_firmware(dd); /* asymmetric with dispose_firmware() */ if (ret) goto bail_clear_intr; - check_fabric_firmware_versions(dd); thermal_init(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 66a327978739..ed11107c50fe 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -640,6 +640,7 @@ extern uint platform_config_load; /* SBus commands */ #define RESET_SBUS_RECEIVER 0x20 #define WRITE_SBUS_RECEIVER 0x21 +#define READ_SBUS_RECEIVER 0x22 void sbus_request(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr, u8 command, u32 data_in); int sbus_request_slow(struct hfi1_devdata *dd, @@ -1336,10 +1337,6 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); struct hfi1_message_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); -int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, - struct hfi1_ctxt_info *kinfo); -u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, - u32 mask); int hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 8744de6667c2..5b9993899789 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -471,6 +471,10 @@ #define ASIC_STS_SBUS_RESULT (ASIC + 0x000000000010) #define ASIC_STS_SBUS_RESULT_DONE_SMASK 0x1ull #define ASIC_STS_SBUS_RESULT_RCV_DATA_VALID_SMASK 0x2ull +#define ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT 2 +#define ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK 0x7ull +#define ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT 32 +#define ASIC_STS_SBUS_RESULT_DATA_OUT_MASK 0xFFFFFFFFull #define ASIC_STS_THERM (ASIC + 0x000000000058) #define ASIC_STS_THERM_CRIT_TEMP_MASK 0x7FFull #define ASIC_STS_THERM_CRIT_TEMP_SHIFT 18 diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c75b0ae688f8..8246dc7d0573 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -392,9 +392,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u16 rlid; u8 svc_type, sl, sc5; - sc5 = (be16_to_cpu(rhdr->lrh[0]) >> 12) & 0xf; - if (rhf_dc_info(packet->rhf)) - sc5 |= 0x10; + sc5 = hdr2sc(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; @@ -450,14 +448,20 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rcv_flags = 0; } -static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, - struct hfi1_other_headers *ohdr, - u64 rhf, u32 bth1, struct ib_grh *grh) +void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - u32 rqpn = 0; - u16 rlid; - u8 sc5, svc_type; + struct hfi1_ib_header *hdr = pkt->hdr; + struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_grh *grh = NULL; + u32 rqpn = 0, bth1; + u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); + u8 sc, svc_type; + bool is_mcast = false; + + if (pkt->rcv_flags & HFI1_HAS_GRH) + grh = &hdr->u.l.grh; switch (qp->ibqp.qp_type) { case IB_QPT_SMI: @@ -466,6 +470,8 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, rlid = be16_to_cpu(hdr->lrh[3]); rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); break; case IB_QPT_UC: rlid = qp->remote_ah_attr.dlid; @@ -481,24 +487,23 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, return; } - sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; - if (rhf_dc_info(rhf)) - sc5 |= 0x10; + sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); - if (bth1 & HFI1_FECN_SMASK) { + bth1 = be32_to_cpu(ohdr->bth[1]); + if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); - u16 dlid = be16_to_cpu(hdr->lrh[1]); - return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc5, grh); + return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } - if (bth1 & HFI1_BECN_SMASK) { + if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; - u8 sl = ibp->sc_to_sl[sc5]; + u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } + } struct ps_mdata { @@ -596,7 +601,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_qp *qp; struct hfi1_ib_header *hdr; struct hfi1_other_headers *ohdr; - struct ib_grh *grh = NULL; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -616,14 +620,13 @@ static void __prescan_rxq(struct hfi1_packet *packet) hfi1_get_msgheader(dd, rhf_addr); lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) { + if (lnh == HFI1_LRH_BTH) ohdr = &hdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { + else if (lnh == HFI1_LRH_GRH) ohdr = &hdr->u.l.oth; - grh = &hdr->u.l.grh; - } else { + else goto next; /* just in case */ - } + bth1 = be32_to_cpu(ohdr->bth[1]); is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); @@ -639,7 +642,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, hdr, ohdr, rhf, bth1, grh); + process_ecn(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1362,6 +1365,7 @@ int process_receive_bypass(struct hfi1_packet *packet) dd_dev_err(packet->rcd->dd, "Bypass packets are not supported in normal operation. Dropping\n"); + incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); return RHF_RCV_CONTINUE; } diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c702a009608f..1ecbec192358 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -168,6 +168,7 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_filedata *fd; struct hfi1_devdata *dd = container_of(inode->i_cdev, struct hfi1_devdata, user_cdev); @@ -176,10 +177,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) kobject_get(&dd->kobj); /* The real work is performed later in assign_ctxt() */ - fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); - if (fp->private_data) /* no cpu affinity by default */ - ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1; - return fp->private_data ? 0 : -ENOMEM; + + fd = kzalloc(sizeof(*fd), GFP_KERNEL); + + if (fd) { + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + } + + fp->private_data = fd; + + return fd ? 0 : -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -228,7 +236,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(struct hfi1_base_info)); break; case HFI1_IOCTL_CREDIT_UPD: - if (uctxt && uctxt->sc) + if (uctxt) sc_return_credits(uctxt->sc); break; @@ -392,41 +400,38 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) struct hfi1_filedata *fd = kiocb->ki_filp->private_data; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; - int ret = 0, done = 0, reqs = 0; + int done = 0, reqs = 0; unsigned long dim = from->nr_segs; - if (!cq || !pq) { - ret = -EIO; - goto done; - } + if (!cq || !pq) + return -EIO; - if (!iter_is_iovec(from) || !dim) { - ret = -EINVAL; - goto done; - } + if (!iter_is_iovec(from) || !dim) + return -EINVAL; hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { - ret = -ENOSPC; - goto done; - } + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + return -ENOSPC; while (dim) { + int ret; unsigned long count = 0; ret = hfi1_user_sdma_process_request( kiocb->ki_filp, (struct iovec *)(from->iov + done), dim, &count); - if (ret) - goto done; + if (ret) { + reqs = ret; + break; + } dim -= count; done += count; reqs++; } -done: - return ret ? ret : reqs; + + return reqs; } static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) @@ -718,7 +723,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_user_sdma_free_queues(fdata); /* release the cpu */ - hfi1_put_proc_affinity(dd, fdata->rec_cpu_num); + hfi1_put_proc_affinity(fdata->rec_cpu_num); /* * Clear any left over, unhandled events so the next process that @@ -730,7 +735,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (--uctxt->cnt) { uctxt->active_slaves &= ~(1 << fdata->subctxt); - uctxt->subpid[fdata->subctxt] = 0; mutex_unlock(&hfi1_mutex); goto done; } @@ -756,7 +760,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); sc_disable(uctxt->sc); - uctxt->pid = 0; spin_unlock_irqrestore(&dd->uctxt_lock, flags); dd->rcd[uctxt->ctxt] = NULL; @@ -818,9 +821,10 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) ret = find_shared_ctxt(fp, uinfo); if (ret < 0) goto done_unlock; - if (ret) - fd->rec_cpu_num = hfi1_get_proc_affinity( - fd->uctxt->dd, fd->uctxt->numa_id); + if (ret) { + fd->rec_cpu_num = + hfi1_get_proc_affinity(fd->uctxt->numa_id); + } } /* @@ -895,7 +899,6 @@ static int find_shared_ctxt(struct file *fp, } fd->uctxt = uctxt; fd->subctxt = uctxt->cnt++; - uctxt->subpid[fd->subctxt] = current->pid; uctxt->active_slaves |= 1 << fd->subctxt; ret = 1; goto done; @@ -932,7 +935,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, if (ctxt == dd->num_rcv_contexts) return -EBUSY; - fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1); + /* + * If we don't have a NUMA node requested, preference is towards + * device NUMA node. + */ + fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); if (fd->rec_cpu_num != -1) numa = cpu_to_node(fd->rec_cpu_num); else @@ -976,8 +983,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return ret; } uctxt->userversion = uinfo->userversion; - uctxt->pid = current->pid; - uctxt->flags = HFI1_CAP_UGET(MASK); + uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); @@ -1080,18 +1086,18 @@ static int user_init(struct file *fp) hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; /* * Ignore the bit in the flags for now until proper * support for multiple packet per rcv array entry is * added. */ - if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; /* * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. @@ -1099,7 +1105,7 @@ static int user_init(struct file *fp) * uses of the chip or ctxt. Therefore, add the rcvctrl op * for both cases. */ - if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; @@ -1122,9 +1128,14 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) int ret = 0; memset(&cinfo, 0, sizeof(cinfo)); - ret = hfi1_get_base_kinfo(uctxt, &cinfo); - if (ret < 0) - goto done; + cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & + HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | + HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | + HFI1_CAP_KGET_MASK(uctxt->flags, K2U); + /* adjust flag if this fd is not able to cache */ + if (!fd->handler) + cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ + cinfo.num_active = hfi1_count_active_units(); cinfo.unit = uctxt->dd->unit; cinfo.ctxt = uctxt->ctxt; @@ -1146,7 +1157,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) ret = -EFAULT; -done: + return ret; } diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index ed680fda611d..13db8eb4f4ec 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -206,6 +206,9 @@ static const struct firmware *platform_config; /* the number of fabric SerDes on the SBus */ #define NUM_FABRIC_SERDES 4 +/* ASIC_STS_SBUS_RESULT.RESULT_CODE value */ +#define SBUS_READ_COMPLETE 0x4 + /* SBus fabric SerDes addresses, one set per HFI */ static const u8 fabric_serdes_addrs[2][NUM_FABRIC_SERDES] = { { 0x01, 0x02, 0x03, 0x04 }, @@ -240,6 +243,7 @@ static const u8 all_pcie_serdes_broadcast = 0xe0; static void dispose_one_firmware(struct firmware_details *fdet); static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, struct firmware_details *fdet); +static void dump_fw_version(struct hfi1_devdata *dd); /* * Read a single 64-bit value from 8051 data memory. @@ -1079,6 +1083,44 @@ void sbus_request(struct hfi1_devdata *dd, } /* + * Read a value from the SBus. + * + * Requires the caller to be in fast mode + */ +static u32 sbus_read(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr, + u32 data_in) +{ + u64 reg; + int retries; + int success = 0; + u32 result = 0; + u32 result_code = 0; + + sbus_request(dd, receiver_addr, data_addr, READ_SBUS_RECEIVER, data_in); + + for (retries = 0; retries < 100; retries++) { + usleep_range(1000, 1200); /* arbitrary */ + reg = read_csr(dd, ASIC_STS_SBUS_RESULT); + result_code = (reg >> ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT) + & ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK; + if (result_code != SBUS_READ_COMPLETE) + continue; + + success = 1; + result = (reg >> ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT) + & ASIC_STS_SBUS_RESULT_DATA_OUT_MASK; + break; + } + + if (!success) { + dd_dev_err(dd, "%s: read failed, result code 0x%x\n", __func__, + result_code); + } + + return result; +} + +/* * Turn off the SBus and fabric serdes spicos. * * + Must be called with Sbus fast mode turned on. @@ -1636,6 +1678,7 @@ int load_firmware(struct hfi1_devdata *dd) return ret; } + dump_fw_version(dd); return 0; } @@ -2054,3 +2097,85 @@ void read_guid(struct hfi1_devdata *dd) dd_dev_info(dd, "GUID %llx", (unsigned long long)dd->base_guid); } + +/* read and display firmware version info */ +static void dump_fw_version(struct hfi1_devdata *dd) +{ + u32 pcie_vers[NUM_PCIE_SERDES]; + u32 fabric_vers[NUM_FABRIC_SERDES]; + u32 sbus_vers; + int i; + int all_same; + int ret; + u8 rcv_addr; + + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); + if (ret) { + dd_dev_err(dd, "Unable to acquire SBus to read firmware versions\n"); + return; + } + + /* set fast mode */ + set_sbus_fast_mode(dd); + + /* read version for SBus Master */ + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x02, WRITE_SBUS_RECEIVER, 0); + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x07, WRITE_SBUS_RECEIVER, 0x1); + /* wait for interrupt to be processed */ + usleep_range(10000, 11000); + sbus_vers = sbus_read(dd, SBUS_MASTER_BROADCAST, 0x08, 0x1); + dd_dev_info(dd, "SBus Master firmware version 0x%08x\n", sbus_vers); + + /* read version for PCIe SerDes */ + all_same = 1; + pcie_vers[0] = 0; + for (i = 0; i < NUM_PCIE_SERDES; i++) { + rcv_addr = pcie_serdes_addrs[dd->hfi1_id][i]; + sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0); + /* wait for interrupt to be processed */ + usleep_range(10000, 11000); + pcie_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0); + if (i > 0 && pcie_vers[0] != pcie_vers[i]) + all_same = 0; + } + + if (all_same) { + dd_dev_info(dd, "PCIe SerDes firmware version 0x%x\n", + pcie_vers[0]); + } else { + dd_dev_warn(dd, "PCIe SerDes do not have the same firmware version\n"); + for (i = 0; i < NUM_PCIE_SERDES; i++) { + dd_dev_info(dd, + "PCIe SerDes lane %d firmware version 0x%x\n", + i, pcie_vers[i]); + } + } + + /* read version for fabric SerDes */ + all_same = 1; + fabric_vers[0] = 0; + for (i = 0; i < NUM_FABRIC_SERDES; i++) { + rcv_addr = fabric_serdes_addrs[dd->hfi1_id][i]; + sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0); + /* wait for interrupt to be processed */ + usleep_range(10000, 11000); + fabric_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0); + if (i > 0 && fabric_vers[0] != fabric_vers[i]) + all_same = 0; + } + + if (all_same) { + dd_dev_info(dd, "Fabric SerDes firmware version 0x%x\n", + fabric_vers[0]); + } else { + dd_dev_warn(dd, "Fabric SerDes do not have the same firmware version\n"); + for (i = 0; i < NUM_FABRIC_SERDES; i++) { + dd_dev_info(dd, + "Fabric SerDes lane %d firmware version 0x%x\n", + i, fabric_vers[i]); + } + } + + clear_sbus_fast_mode(dd); + release_chip_resource(dd, CR_SBUS); +} diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4417a0fd3ef9..1000e0fd96d9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -62,6 +62,8 @@ #include <linux/cdev.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/i2c.h> +#include <linux/i2c-algo-bit.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -253,7 +255,7 @@ struct hfi1_ctxtdata { /* chip offset of PIO buffers for this ctxt */ u32 piobufs; /* per-context configuration flags */ - u32 flags; + unsigned long flags; /* per-context event flags for fileops/intr communication */ unsigned long event_flags; /* WAIT_RCV that timed out, no interrupt */ @@ -268,9 +270,6 @@ struct hfi1_ctxtdata { u32 urgent; /* saved total number of polled urgent packets for poll edge trigger */ u32 urgent_poll; - /* pid of process using this ctxt */ - pid_t pid; - pid_t subpid[HFI1_MAX_SHARED_CTXTS]; /* same size as task_struct .comm[], command that opened context */ char comm[TASK_COMM_LEN]; /* so file ops can get at unit */ @@ -366,11 +365,6 @@ struct hfi1_packet { u8 etype; }; -static inline bool has_sc4_bit(struct hfi1_packet *p) -{ - return !!rhf_dc_info(p->rhf); -} - /* * Private data for snoop/capture support. */ @@ -805,10 +799,19 @@ struct hfi1_temp { u8 triggers; /* temperature triggers */ }; +struct hfi1_i2c_bus { + struct hfi1_devdata *controlling_dd; /* current controlling device */ + struct i2c_adapter adapter; /* bus details */ + struct i2c_algo_bit_data algo; /* bus algorithm details */ + int num; /* bus number, 0 or 1 */ +}; + /* common data between shared ASIC HFIs */ struct hfi1_asic_data { struct hfi1_devdata *dds[2]; /* back pointers */ struct mutex asic_resource_mutex; + struct hfi1_i2c_bus *i2c_bus0; + struct hfi1_i2c_bus *i2c_bus1; }; /* device data struct now contains only "general per-device" info. @@ -1128,7 +1131,8 @@ struct hfi1_devdata { NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS]; /* Software counter that aggregates all cce_err_status errors */ u64 sw_cce_err_status_aggregate; - + /* Software counter that aggregates all bypass packet rcv errors */ + u64 sw_rcv_bypass_packet_errors; /* receive interrupt functions */ rhf_rcv_function_ptr *rhf_rcv_function_map; rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; @@ -1174,6 +1178,8 @@ struct hfi1_devdata { /* 8051 firmware version helper */ #define dc8051_ver(a, b) ((a) << 8 | (b)) +#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) +#define dc8051_ver_min(a) (a & 0x00ff) /* f_put_tid types */ #define PT_EXPECTED 0 @@ -1182,6 +1188,7 @@ struct hfi1_devdata { struct tid_rb_node; struct mmu_rb_node; +struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { @@ -1192,7 +1199,7 @@ struct hfi1_filedata { /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; - struct rb_root tid_rb_root; + struct mmu_rb_handler *handler; struct tid_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ u32 tid_limit; @@ -1201,6 +1208,7 @@ struct hfi1_filedata { u32 invalid_tid_idx; /* protect invalid_tids array and invalid_tid_idx */ spinlock_t invalid_lock; + struct mm_struct *mm; }; extern struct list_head hfi1_dev_list; @@ -1234,6 +1242,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); void set_all_slowpath(struct hfi1_devdata *dd); +extern const struct pci_device_id hfi1_pci_tbl[]; + /* receive packet handler dispositions */ #define RCV_PKT_OK 0x0 /* keep going */ #define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ @@ -1259,7 +1269,7 @@ void receive_interrupt_work(struct work_struct *work); static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | - ((!!(rhf & RHF_DC_INFO_SMASK)) << 4); + ((!!(rhf_dc_info(rhf))) << 4); } static inline u16 generate_jkey(kuid_t uid) @@ -1569,6 +1579,22 @@ static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port) return &dd->pport[pidx].ibport_data; } +void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool do_cnp); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool do_cnp) +{ + struct hfi1_other_headers *ohdr = pkt->ohdr; + u32 bth1; + + bth1 = be32_to_cpu(ohdr->bth[1]); + if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { + hfi1_process_ecn_slowpath(qp, pkt, do_cnp); + return bth1 & HFI1_FECN_SMASK; + } + return false; +} + /* * Return the indexed PKEY from the port PKEY table. */ @@ -1586,8 +1612,7 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) } /* - * Readers of cc_state must call get_cc_state() under rcu_read_lock(). - * Writers of cc_state must call get_cc_state() under cc_state_lock. + * Called by readers of cc_state only, must call under rcu_read_lock(). */ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) { @@ -1595,6 +1620,16 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) } /* + * Called by writers of cc_state only, must call under cc_state_lock. + */ +static inline +struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) +{ + return rcu_dereference_protected(ppd->cc_state, + lockdep_is_held(&ppd->cc_state_lock)); +} + +/* * values for dd->flags (_device_ related flags) */ #define HFI1_INITTED 0x1 /* chip and driver up and initted */ @@ -1669,9 +1704,12 @@ void shutdown_led_override(struct hfi1_pportdata *ppd); */ #define DEFAULT_RCVHDR_ENTSIZE 32 -bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32); -int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **); -void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool); +bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, + u32 nlocked, u32 npages); +int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, + size_t npages, bool writable, struct page **pages); +void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, + size_t npages, bool dirty); static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) { @@ -1947,4 +1985,55 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd) int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); +#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) +#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) + +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + +#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } +#define show_ib_opcode(opcode) \ +__print_symbolic(opcode, \ + ib_opcode_name(RC_SEND_FIRST), \ + ib_opcode_name(RC_SEND_MIDDLE), \ + ib_opcode_name(RC_SEND_LAST), \ + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_SEND_ONLY), \ + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(RC_RDMA_WRITE_LAST), \ + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_READ_REQUEST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ + ib_opcode_name(RC_ACKNOWLEDGE), \ + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ + ib_opcode_name(RC_COMPARE_SWAP), \ + ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(UC_SEND_FIRST), \ + ib_opcode_name(UC_SEND_MIDDLE), \ + ib_opcode_name(UC_SEND_LAST), \ + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_SEND_ONLY), \ + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(UC_RDMA_WRITE_LAST), \ + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UD_SEND_ONLY), \ + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(CNP)) #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index eed971ccd2a1..a358d23ecd54 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -64,6 +64,7 @@ #include "debugfs.h" #include "verbs.h" #include "aspm.h" +#include "affinity.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -474,8 +475,9 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port) { - int i, size; + int i; uint default_pkey_idx; + struct cc_state *cc_state; ppd->dd = dd; ppd->hw_pidx = hw_pidx; @@ -526,9 +528,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, spin_lock_init(&ppd->cc_state_lock); spin_lock_init(&ppd->cc_log_lock); - size = sizeof(struct cc_state); - RCU_INIT_POINTER(ppd->cc_state, kzalloc(size, GFP_KERNEL)); - if (!rcu_dereference(ppd->cc_state)) + cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL); + RCU_INIT_POINTER(ppd->cc_state, cc_state); + if (!cc_state) goto bail; return; @@ -972,39 +974,49 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, - * free the structure. Must be holding hfi1_devs_lock. + * return the structure to be finalized outside the lock. Must be + * holding hfi1_devs_lock. */ -static void release_asic_data(struct hfi1_devdata *dd) +static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { + struct hfi1_asic_data *ad; int other; if (!dd->asic_data) - return; + return NULL; dd->asic_data->dds[dd->hfi1_id] = NULL; other = dd->hfi1_id ? 0 : 1; - if (!dd->asic_data->dds[other]) { - /* we are the last holder, free it */ - kfree(dd->asic_data); - } + ad = dd->asic_data; dd->asic_data = NULL; + /* return NULL if the other dd still has a link */ + return ad->dds[other] ? NULL : ad; +} + +static void finalize_asic_data(struct hfi1_devdata *dd, + struct hfi1_asic_data *ad) +{ + clean_up_i2c(dd, ad); + kfree(ad); } static void __hfi1_free_devdata(struct kobject *kobj) { struct hfi1_devdata *dd = container_of(kobj, struct hfi1_devdata, kobj); + struct hfi1_asic_data *ad; unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); idr_remove(&hfi1_unit_table, dd->unit); list_del(&dd->list); - release_asic_data(dd); + ad = release_asic_data(dd); spin_unlock_irqrestore(&hfi1_devs_lock, flags); + if (ad) + finalize_asic_data(dd, ad); free_platform_config(dd); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); - hfi1_dev_affinity_free(dd); free_percpu(dd->send_schedule); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1162,7 +1174,7 @@ static int init_one(struct pci_dev *, const struct pci_device_id *); #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " -static const struct pci_device_id hfi1_pci_tbl[] = { +const struct pci_device_id hfi1_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) }, { 0, } @@ -1198,6 +1210,10 @@ static int __init hfi1_mod_init(void) if (ret) goto bail; + ret = node_affinity_init(); + if (ret) + goto bail; + /* validate max MTU before any devices start */ if (!valid_opa_max_mtu(hfi1_max_mtu)) { pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n", @@ -1278,6 +1294,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); + node_affinity_destroy(); hfi1_wss_exit(); hfi1_dbg_exit(); hfi1_cpulist_count = 0; @@ -1311,7 +1328,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) hrtimer_cancel(&ppd->cca_timer[i].hrtimer); spin_lock(&ppd->cc_state_lock); - cc_state = get_cc_state(ppd); + cc_state = get_cc_state_protected(ppd); RCU_INIT_POINTER(ppd->cc_state, NULL); spin_unlock(&ppd->cc_state_lock); @@ -1760,8 +1777,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) hfi1_cdbg(PROC, "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", - rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size, - rcd->egrbufs.size); + rcd->ctxt, rcd->egrbufs.alloced, + rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); /* * Set the contexts rcv array head update threshold to the closest diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index fca07a1d6c28..1263abe01999 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -588,7 +588,6 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_phys_conf = (ppd->port_type & 0xf); -#if PI_LED_ENABLE_SUP pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; pi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; @@ -602,11 +601,6 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6; pi->port_states.ledenable_offlinereason |= ppd->offline_disabled_reason; -#else - pi->port_states.offline_reason = ppd->neighbor_normal << 4; - pi->port_states.offline_reason |= ppd->is_sm_config_started << 5; - pi->port_states.offline_reason |= ppd->offline_disabled_reason; -#endif /* PI_LED_ENABLE_SUP */ pi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | state; @@ -1752,17 +1746,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (start_of_sm_config && (lstate == IB_PORT_INIT)) ppd->is_sm_config_started = 1; -#if PI_LED_ENABLE_SUP psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; psi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; psi->port_states.ledenable_offlinereason |= ppd->offline_disabled_reason; -#else - psi->port_states.offline_reason = ppd->neighbor_normal << 4; - psi->port_states.offline_reason |= ppd->is_sm_config_started << 5; - psi->port_states.offline_reason |= ppd->offline_disabled_reason; -#endif /* PI_LED_ENABLE_SUP */ psi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); @@ -2430,14 +2418,9 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, rsp->port_rcv_remote_physical_errors = cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL)); - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - if (tmp2 < tmp) { - /* overflow/wrapped */ - rsp->local_link_integrity_errors = cpu_to_be64(~0); - } else { - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); - } + rsp->local_link_integrity_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY, + CNTR_INVALID_VL)); tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL); @@ -2499,6 +2482,9 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl))); + rsp->vls[vfi].port_vl_xmit_discards = + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, + idx_from_vl(vl))); vlinfo++; vfi++; } @@ -2529,9 +2515,8 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL); /* local link integrity must be right-shifted by the lli resolution */ - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - error_counter_summary += (tmp >> res_lli); + error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY, + CNTR_INVALID_VL) >> res_lli); /* link error recovery must b right-shifted by the ler resolution */ tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL); @@ -2800,14 +2785,9 @@ static void pma_get_opa_port_ectrs(struct ib_device *ibdev, rsp->port_rcv_constraint_errors = cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL)); - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - if (tmp2 < tmp) { - /* overflow/wrapped */ - rsp->local_link_integrity_errors = cpu_to_be64(~0); - } else { - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); - } + rsp->local_link_integrity_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY, + CNTR_INVALID_VL)); rsp->excessive_buffer_overruns = cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL)); } @@ -2883,14 +2863,17 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; - + rsp->port_rcv_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL)); vlinfo = &rsp->vls[0]; vfi = 0; vl_select_mask = be32_to_cpu(req->vl_select_mask); for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), 8 * sizeof(req->vl_select_mask)) { memset(vlinfo, 0, sizeof(*vlinfo)); - /* vlinfo->vls[vfi].port_vl_xmit_discards ??? */ + rsp->vls[vfi].port_vl_xmit_discards = + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, + idx_from_vl(vl))); vlinfo += 1; vfi++; } @@ -3162,10 +3145,8 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS) write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0); - if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) { - write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0); + if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0); - } if (counter_select & CS_LINK_ERROR_RECOVERY) { write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); @@ -3223,7 +3204,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, /* if (counter_select & CS_PORT_MARK_FECN) * write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0); */ - /* port_vl_xmit_discards ??? */ + if (counter_select & C_SW_XMIT_DSCD_VL) + write_port_cntr(ppd, C_SW_XMIT_DSCD_VL, + idx_from_vl(vl), 0); } if (resp_len) @@ -3392,7 +3375,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) */ spin_lock(&ppd->cc_state_lock); - old_cc_state = get_cc_state(ppd); + old_cc_state = get_cc_state_protected(ppd); if (!old_cc_state) { /* never active, or shutting down */ spin_unlock(&ppd->cc_state_lock); @@ -3960,7 +3943,6 @@ void clear_linkup_counters(struct hfi1_devdata *dd) write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0); /* LocalLinkIntegrityErrors */ - write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0); write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0); /* ExcessiveBufferOverruns */ write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 8b734aaae88a..5aa3fd1be653 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -48,15 +48,8 @@ #define _HFI1_MAD_H #include <rdma/ib_pma.h> -#define USE_PI_LED_ENABLE 1 /* - * use led enabled bit in struct - * opa_port_states, if available - */ #include <rdma/opa_smi.h> #include <rdma/opa_port_info.h> -#ifndef PI_LED_ENABLE_SUP -#define PI_LED_ENABLE_SUP 0 -#endif #include "opa_compat.h" /* diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index b7a80aa1ae30..7ad30898fc19 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -53,19 +53,20 @@ #include "trace.h" struct mmu_rb_handler { - struct list_head list; struct mmu_notifier mn; - struct rb_root *root; + struct rb_root root; + void *ops_arg; spinlock_t lock; /* protect the RB tree */ struct mmu_rb_ops *ops; + struct mm_struct *mm; + struct list_head lru_list; + struct work_struct del_work; + struct list_head del_list; + struct workqueue_struct *wq; }; -static LIST_HEAD(mmu_rb_handlers); -static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */ - static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); -static struct mmu_rb_handler *find_mmu_handler(struct rb_root *); static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, unsigned long); static inline void mmu_notifier_range_start(struct mmu_notifier *, @@ -76,6 +77,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *, unsigned long, unsigned long); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); +static void do_remove(struct mmu_rb_handler *handler, + struct list_head *del_list); +static void handle_remove(struct work_struct *work); static struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, @@ -95,73 +99,79 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) return PAGE_ALIGN(node->addr + node->len) - 1; } -int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) +int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, + struct mmu_rb_ops *ops, + struct workqueue_struct *wq, + struct mmu_rb_handler **handler) { struct mmu_rb_handler *handlr; - - if (!ops->invalidate) - return -EINVAL; + int ret; handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); if (!handlr) return -ENOMEM; - handlr->root = root; + handlr->root = RB_ROOT; handlr->ops = ops; + handlr->ops_arg = ops_arg; INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock(&mmu_rb_lock); - list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); - spin_unlock(&mmu_rb_lock); + handlr->mm = mm; + INIT_WORK(&handlr->del_work, handle_remove); + INIT_LIST_HEAD(&handlr->del_list); + INIT_LIST_HEAD(&handlr->lru_list); + handlr->wq = wq; + + ret = mmu_notifier_register(&handlr->mn, handlr->mm); + if (ret) { + kfree(handlr); + return ret; + } - return mmu_notifier_register(&handlr->mn, current->mm); + *handler = handlr; + return 0; } -void hfi1_mmu_rb_unregister(struct rb_root *root) +void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) { - struct mmu_rb_handler *handler = find_mmu_handler(root); + struct mmu_rb_node *rbnode; + struct rb_node *node; unsigned long flags; - - if (!handler) - return; + struct list_head del_list; /* Unregister first so we don't get any more notifications. */ - if (current->mm) - mmu_notifier_unregister(&handler->mn, current->mm); + mmu_notifier_unregister(&handler->mn, handler->mm); - spin_lock(&mmu_rb_lock); - list_del_rcu(&handler->list); - spin_unlock(&mmu_rb_lock); - synchronize_rcu(); + /* + * Make sure the wq delete handler is finished running. It will not + * be triggered once the mmu notifiers are unregistered above. + */ + flush_work(&handler->del_work); + + INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); - if (!RB_EMPTY_ROOT(root)) { - struct rb_node *node; - struct mmu_rb_node *rbnode; - - while ((node = rb_first(root))) { - rbnode = rb_entry(node, struct mmu_rb_node, node); - rb_erase(node, root); - if (handler->ops->remove) - handler->ops->remove(root, rbnode, NULL); - } + while ((node = rb_first(&handler->root))) { + rbnode = rb_entry(node, struct mmu_rb_node, node); + rb_erase(node, &handler->root); + /* move from LRU list to delete list */ + list_move(&rbnode->list, &del_list); } spin_unlock_irqrestore(&handler->lock, flags); + do_remove(handler, &del_list); + kfree(handler); } -int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) +int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, + struct mmu_rb_node *mnode) { - struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_node *node; unsigned long flags; int ret = 0; - if (!handler) - return -EINVAL; - spin_lock_irqsave(&handler->lock, flags); hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr, mnode->len); @@ -170,12 +180,13 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) ret = -EINVAL; goto unlock; } - __mmu_int_rb_insert(mnode, root); + __mmu_int_rb_insert(mnode, &handler->root); + list_add(&mnode->list, &handler->lru_list); - if (handler->ops->insert) { - ret = handler->ops->insert(root, mnode); - if (ret) - __mmu_int_rb_remove(mnode, root); + ret = handler->ops->insert(handler->ops_arg, mnode); + if (ret) { + __mmu_int_rb_remove(mnode, &handler->root); + list_del(&mnode->list); /* remove from LRU list */ } unlock: spin_unlock_irqrestore(&handler->lock, flags); @@ -191,10 +202,10 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); if (!handler->ops->filter) { - node = __mmu_int_rb_iter_first(handler->root, addr, + node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); } else { - for (node = __mmu_int_rb_iter_first(handler->root, addr, + for (node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); node; node = __mmu_int_rb_iter_next(node, addr, @@ -206,82 +217,72 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } -/* Caller must *not* hold handler lock. */ -static void __mmu_rb_remove(struct mmu_rb_handler *handler, - struct mmu_rb_node *node, struct mm_struct *mm) -{ - unsigned long flags; - - /* Validity of handler and node pointers has been checked by caller. */ - hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, - node->len); - spin_lock_irqsave(&handler->lock, flags); - __mmu_int_rb_remove(node, handler->root); - spin_unlock_irqrestore(&handler->lock, flags); - - if (handler->ops->remove) - handler->ops->remove(handler->root, node, mm); -} - -struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, - unsigned long len) +struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len) { - struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_node *node; unsigned long flags; - if (!handler) - return ERR_PTR(-EINVAL); - spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); + if (node) { + __mmu_int_rb_remove(node, &handler->root); + list_del(&node->list); /* remove from LRU list */ + } spin_unlock_irqrestore(&handler->lock, flags); return node; } -struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root, - unsigned long addr, unsigned long len) +void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) { - struct mmu_rb_handler *handler = find_mmu_handler(root); - struct mmu_rb_node *node; + struct mmu_rb_node *rbnode, *ptr; + struct list_head del_list; unsigned long flags; + bool stop = false; - if (!handler) - return ERR_PTR(-EINVAL); + INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); - node = __mmu_rb_search(handler, addr, len); - if (node) - __mmu_int_rb_remove(node, handler->root); + list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list, + list) { + if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, + &stop)) { + __mmu_int_rb_remove(rbnode, &handler->root); + /* move from LRU list to delete list */ + list_move(&rbnode->list, &del_list); + } + if (stop) + break; + } spin_unlock_irqrestore(&handler->lock, flags); - return node; + while (!list_empty(&del_list)) { + rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); + list_del(&rbnode->list); + handler->ops->remove(handler->ops_arg, rbnode); + } } -void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) +/* + * It is up to the caller to ensure that this function does not race with the + * mmu invalidate notifier which may be calling the users remove callback on + * 'node'. + */ +void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, + struct mmu_rb_node *node) { - struct mmu_rb_handler *handler = find_mmu_handler(root); - - if (!handler || !node) - return; - - __mmu_rb_remove(handler, node, NULL); -} + unsigned long flags; -static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) -{ - struct mmu_rb_handler *handler; + /* Validity of handler and node pointers has been checked by caller. */ + hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, + node->len); + spin_lock_irqsave(&handler->lock, flags); + __mmu_int_rb_remove(node, &handler->root); + list_del(&node->list); /* remove from LRU list */ + spin_unlock_irqrestore(&handler->lock, flags); - rcu_read_lock(); - list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { - if (handler->root == root) - goto unlock; - } - handler = NULL; -unlock: - rcu_read_unlock(); - return handler; + handler->ops->remove(handler->ops_arg, node); } static inline void mmu_notifier_page(struct mmu_notifier *mn, @@ -304,9 +305,10 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); - struct rb_root *root = handler->root; + struct rb_root *root = &handler->root; struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; + bool added = false; spin_lock_irqsave(&handler->lock, flags); for (node = __mmu_int_rb_iter_first(root, start, end - 1); @@ -315,11 +317,53 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, ptr = __mmu_int_rb_iter_next(node, start, end - 1); hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", node->addr, node->len); - if (handler->ops->invalidate(root, node)) { + if (handler->ops->invalidate(handler->ops_arg, node)) { __mmu_int_rb_remove(node, root); - if (handler->ops->remove) - handler->ops->remove(root, node, mm); + /* move from LRU list to delete list */ + list_move(&node->list, &handler->del_list); + added = true; } } spin_unlock_irqrestore(&handler->lock, flags); + + if (added) + queue_work(handler->wq, &handler->del_work); +} + +/* + * Call the remove function for the given handler and the list. This + * is expected to be called with a delete list extracted from handler. + * The caller should not be holding the handler lock. + */ +static void do_remove(struct mmu_rb_handler *handler, + struct list_head *del_list) +{ + struct mmu_rb_node *node; + + while (!list_empty(del_list)) { + node = list_first_entry(del_list, struct mmu_rb_node, list); + list_del(&node->list); + handler->ops->remove(handler->ops_arg, node); + } +} + +/* + * Work queue function to remove all nodes that have been queued up to + * be removed. The key feature is that mm->mmap_sem is not being held + * and the remove callback can sleep while taking it, if needed. + */ +static void handle_remove(struct work_struct *work) +{ + struct mmu_rb_handler *handler = container_of(work, + struct mmu_rb_handler, + del_work); + struct list_head del_list; + unsigned long flags; + + /* remove anything that is queued to get removed */ + spin_lock_irqsave(&handler->lock, flags); + list_replace_init(&handler->del_list, &del_list); + spin_unlock_irqrestore(&handler->lock, flags); + + do_remove(handler, &del_list); } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 7a57b9c49d27..754f6ebf13fb 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -54,23 +54,34 @@ struct mmu_rb_node { unsigned long len; unsigned long __last; struct rb_node node; + struct list_head list; }; +/* + * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is + * allowed to sleep. + */ struct mmu_rb_ops { - bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); - int (*insert)(struct rb_root *, struct mmu_rb_node *); - void (*remove)(struct rb_root *, struct mmu_rb_node *, - struct mm_struct *); - int (*invalidate)(struct rb_root *, struct mmu_rb_node *); + bool (*filter)(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); + int (*insert)(void *ops_arg, struct mmu_rb_node *mnode); + void (*remove)(void *ops_arg, struct mmu_rb_node *mnode); + int (*invalidate)(void *ops_arg, struct mmu_rb_node *node); + int (*evict)(void *ops_arg, struct mmu_rb_node *mnode, + void *evict_arg, bool *stop); }; -int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops); -void hfi1_mmu_rb_unregister(struct rb_root *); -int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); -void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); -struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, - unsigned long); -struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long, - unsigned long); +int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, + struct mmu_rb_ops *ops, + struct workqueue_struct *wq, + struct mmu_rb_handler **handler); +void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler); +int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, + struct mmu_rb_node *mnode); +void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); +void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, + struct mmu_rb_node *mnode); +struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len); #endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0bac21e6a658..89c68da1c273 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -679,6 +679,10 @@ static uint pcie_pset = UNSET_PSET; module_param(pcie_pset, uint, S_IRUGO); MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10"); +static uint pcie_ctle = 1; /* discrete on, integrated off */ +module_param(pcie_ctle, uint, S_IRUGO); +MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off"); + /* equalization columns */ #define PREC 0 #define ATTN 1 @@ -716,6 +720,36 @@ static const u8 integrated_preliminary_eq[11][3] = { { 0x00, 0x1e, 0x0a }, /* p10 */ }; +static const u8 discrete_ctle_tunings[11][4] = { + /* DC LF HF BW */ + { 0x48, 0x0b, 0x04, 0x04 }, /* p0 */ + { 0x60, 0x05, 0x0f, 0x0a }, /* p1 */ + { 0x50, 0x09, 0x06, 0x06 }, /* p2 */ + { 0x68, 0x05, 0x0f, 0x0a }, /* p3 */ + { 0x80, 0x05, 0x0f, 0x0a }, /* p4 */ + { 0x70, 0x05, 0x0f, 0x0a }, /* p5 */ + { 0x68, 0x05, 0x0f, 0x0a }, /* p6 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */ + { 0x48, 0x09, 0x06, 0x06 }, /* p8 */ + { 0x60, 0x05, 0x0f, 0x0a }, /* p9 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p10 */ +}; + +static const u8 integrated_ctle_tunings[11][4] = { + /* DC LF HF BW */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p0 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p1 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p2 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p3 */ + { 0x58, 0x0a, 0x05, 0x05 }, /* p4 */ + { 0x48, 0x0a, 0x05, 0x05 }, /* p5 */ + { 0x40, 0x0a, 0x05, 0x05 }, /* p6 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p8 */ + { 0x38, 0x09, 0x06, 0x06 }, /* p9 */ + { 0x38, 0x0e, 0x01, 0x01 }, /* p10 */ +}; + /* helper to format the value to write to hardware */ #define eq_value(pre, curr, post) \ ((((u32)(pre)) << \ @@ -951,11 +985,14 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) u32 status, err; int ret; int do_retry, retry_count = 0; + int intnum = 0; uint default_pset; u16 target_vector, target_speed; u16 lnkctl2, vendor; u8 div; const u8 (*eq)[3]; + const u8 (*ctle_tunings)[4]; + uint static_ctle_mode; int return_error = 0; /* PCIe Gen3 is for the ASIC only */ @@ -1089,6 +1126,9 @@ retry: div = 3; eq = discrete_preliminary_eq; default_pset = DEFAULT_DISCRETE_PSET; + ctle_tunings = discrete_ctle_tunings; + /* bit 0 - discrete on/off */ + static_ctle_mode = pcie_ctle & 0x1; } else { /* 400mV, FS=29, LF = 9 */ fs = 29; @@ -1096,6 +1136,9 @@ retry: div = 1; eq = integrated_preliminary_eq; default_pset = DEFAULT_MCP_PSET; + ctle_tunings = integrated_ctle_tunings; + /* bit 1 - integrated on/off */ + static_ctle_mode = (pcie_ctle >> 1) & 0x1; } pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101, (fs << @@ -1135,16 +1178,33 @@ retry: * step 5c: Program gasket interrupts */ /* set the Rx Bit Rate to REFCLK ratio */ - write_gasket_interrupt(dd, 0, 0x0006, 0x0050); + write_gasket_interrupt(dd, intnum++, 0x0006, 0x0050); /* disable pCal for PCIe Gen3 RX equalization */ - write_gasket_interrupt(dd, 1, 0x0026, 0x5b01); + /* select adaptive or static CTLE */ + write_gasket_interrupt(dd, intnum++, 0x0026, + 0x5b01 | (static_ctle_mode << 3)); /* * Enable iCal for PCIe Gen3 RX equalization, and set which * evaluation of RX_EQ_EVAL will launch the iCal procedure. */ - write_gasket_interrupt(dd, 2, 0x0026, 0x5202); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x5202); + + if (static_ctle_mode) { + /* apply static CTLE tunings */ + u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw; + + pcie_dc = ctle_tunings[pcie_pset][0]; + pcie_lf = ctle_tunings[pcie_pset][1]; + pcie_hf = ctle_tunings[pcie_pset][2]; + pcie_bw = ctle_tunings[pcie_pset][3]; + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x5500 | pcie_bw); + } + /* terminate list */ - write_gasket_interrupt(dd, 3, 0x0000, 0x0000); + write_gasket_interrupt(dd, intnum++, 0x0000, 0x0000); /* * step 5d: program XMT margin diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d4022450b73f..ac1bf4a73571 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1952,13 +1952,17 @@ int init_pervl_scs(struct hfi1_devdata *dd) dd->vld[15].sc = sc_alloc(dd, SC_VL15, dd->rcd[0]->rcvhdrqentsize, dd->node); if (!dd->vld[15].sc) - goto nomem; + return -ENOMEM; + hfi1_init_ctxt(dd->vld[15].sc); dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); - dd->kernel_send_context = kmalloc_node(dd->num_send_contexts * + dd->kernel_send_context = kzalloc_node(dd->num_send_contexts * sizeof(struct send_context *), GFP_KERNEL, dd->node); + if (!dd->kernel_send_context) + goto freesc15; + dd->kernel_send_context[0] = dd->vld[15].sc; for (i = 0; i < num_vls; i++) { @@ -2010,12 +2014,21 @@ int init_pervl_scs(struct hfi1_devdata *dd) if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) goto nomem; return 0; + nomem: - sc_free(dd->vld[15].sc); - for (i = 0; i < num_vls; i++) + for (i = 0; i < num_vls; i++) { sc_free(dd->vld[i].sc); + dd->vld[i].sc = NULL; + } + for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) sc_free(dd->kernel_send_context[i + 1]); + + kfree(dd->kernel_send_context); + dd->kernel_send_context = NULL; + +freesc15: + sc_free(dd->vld[15].sc); return -ENOMEM; } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 03df9322f862..965c8aef0c60 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -537,20 +537,6 @@ static void apply_tunings( u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0; u8 *cache = ppd->qsfp_info.cache; - /* Enable external device config if channel is limiting active */ - read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, - GENERAL_CONFIG, &config_data); - config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); - config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); - ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, - GENERAL_CONFIG, config_data); - if (ret != HCMD_SUCCESS) - dd_dev_err( - ppd->dd, - "%s: Failed to set enable external device config\n", - __func__); - - config_data = 0; /* re-init */ /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); @@ -638,9 +624,13 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, if (ret) return ret; + /* + * We'll change the QSFP memory contents from here on out, thus we set a + * flag here to remind ourselves to reset the QSFP module. This prevents + * reuse of stale settings established in our previous pass through. + */ if (ppd->qsfp_info.reset_needed) { reset_qsfp(ppd); - ppd->qsfp_info.reset_needed = 0; refresh_qsfp_cache(ppd, &ppd->qsfp_info); } else { ppd->qsfp_info.reset_needed = 1; diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a942ffba4cb..a5aa3517e7d5 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -52,6 +52,7 @@ #include <linux/seq_file.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> +#include <rdma/ib_verbs.h> #include "hfi.h" #include "qp.h" @@ -115,6 +116,66 @@ static const u16 credit_table[31] = { 32768 /* 1E */ }; +const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { +[IB_WR_RDMA_WRITE] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_RDMA_READ] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC, +}, + +[IB_WR_ATOMIC_CMP_AND_SWP] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_ATOMIC_FETCH_AND_ADD] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_RDMA_WRITE_WITH_IMM] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND_WITH_IMM] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_REG_MR] = { + .length = sizeof(struct ib_reg_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), + .flags = RVT_OPERATION_LOCAL, +}, + +[IB_WR_LOCAL_INV] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), + .flags = RVT_OPERATION_LOCAL, +}, + +[IB_WR_SEND_WITH_INV] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_RC), +}, + +}; + static void flush_tx_list(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -745,8 +806,9 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, priv->owner = qp; - priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node); - if (!priv->s_hdr) { + priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp, + rdi->dparms.node); + if (!priv->s_ahg) { kfree(priv); return ERR_PTR(-ENOMEM); } @@ -759,7 +821,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - kfree(priv->s_hdr); + kfree(priv->s_ahg); kfree(priv); } diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index e7bc8d6cf681..587d84d65bb8 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -54,6 +54,8 @@ extern unsigned int hfi1_qp_table_size; +extern const struct rvt_operation_params hfi1_post_parms[]; + /* * free_ahg - clear ahg from QP */ @@ -61,7 +63,7 @@ static inline void clear_ahg(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - priv->s_hdr->ahgcount = 0; + priv->s_ahg->ahgcount = 0; qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 9fb561682c66..a207717ade2a 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -50,46 +50,285 @@ #include <linux/vmalloc.h> #include "hfi.h" -#include "twsi.h" + +/* for the given bus number, return the CSR for reading an i2c line */ +static inline u32 i2c_in_csr(u32 bus_num) +{ + return bus_num ? ASIC_QSFP2_IN : ASIC_QSFP1_IN; +} + +/* for the given bus number, return the CSR for writing an i2c line */ +static inline u32 i2c_oe_csr(u32 bus_num) +{ + return bus_num ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; +} + +static void hfi1_setsda(void *data, int state) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + struct hfi1_devdata *dd = bus->controlling_dd; + u64 reg; + u32 target_oe; + + target_oe = i2c_oe_csr(bus->num); + reg = read_csr(dd, target_oe); + /* + * The OE bit value is inverted and connected to the pin. When + * OE is 0 the pin is left to be pulled up, when the OE is 1 + * the pin is driven low. This matches the "open drain" or "open + * collector" convention. + */ + if (state) + reg &= ~QSFP_HFI0_I2CDAT; + else + reg |= QSFP_HFI0_I2CDAT; + write_csr(dd, target_oe, reg); + /* do a read to force the write into the chip */ + (void)read_csr(dd, target_oe); +} + +static void hfi1_setscl(void *data, int state) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + struct hfi1_devdata *dd = bus->controlling_dd; + u64 reg; + u32 target_oe; + + target_oe = i2c_oe_csr(bus->num); + reg = read_csr(dd, target_oe); + /* + * The OE bit value is inverted and connected to the pin. When + * OE is 0 the pin is left to be pulled up, when the OE is 1 + * the pin is driven low. This matches the "open drain" or "open + * collector" convention. + */ + if (state) + reg &= ~QSFP_HFI0_I2CCLK; + else + reg |= QSFP_HFI0_I2CCLK; + write_csr(dd, target_oe, reg); + /* do a read to force the write into the chip */ + (void)read_csr(dd, target_oe); +} + +static int hfi1_getsda(void *data) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + u64 reg; + u32 target_in; + + hfi1_setsda(data, 1); /* clear OE so we do not pull line down */ + udelay(2); /* 1us pull up + 250ns hold */ + + target_in = i2c_in_csr(bus->num); + reg = read_csr(bus->controlling_dd, target_in); + return !!(reg & QSFP_HFI0_I2CDAT); +} + +static int hfi1_getscl(void *data) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + u64 reg; + u32 target_in; + + hfi1_setscl(data, 1); /* clear OE so we do not pull line down */ + udelay(2); /* 1us pull up + 250ns hold */ + + target_in = i2c_in_csr(bus->num); + reg = read_csr(bus->controlling_dd, target_in); + return !!(reg & QSFP_HFI0_I2CCLK); +} /* - * QSFP support for hfi driver, using "Two Wire Serial Interface" driver - * in twsi.c + * Allocate and initialize the given i2c bus number. + * Returns NULL on failure. */ -#define I2C_MAX_RETRY 4 +static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, + struct hfi1_asic_data *ad, int num) +{ + struct hfi1_i2c_bus *bus; + int ret; + + bus = kzalloc(sizeof(*bus), GFP_KERNEL); + if (!bus) + return NULL; + + bus->controlling_dd = dd; + bus->num = num; /* our bus number */ + + bus->algo.setsda = hfi1_setsda; + bus->algo.setscl = hfi1_setscl; + bus->algo.getsda = hfi1_getsda; + bus->algo.getscl = hfi1_getscl; + bus->algo.udelay = 5; + bus->algo.timeout = usecs_to_jiffies(50); + bus->algo.data = bus; + + bus->adapter.owner = THIS_MODULE; + bus->adapter.algo_data = &bus->algo; + bus->adapter.dev.parent = &dd->pcidev->dev; + snprintf(bus->adapter.name, sizeof(bus->adapter.name), + "hfi1_i2c%d", num); + + ret = i2c_bit_add_bus(&bus->adapter); + if (ret) { + dd_dev_info(dd, "%s: unable to add i2c bus %d, err %d\n", + __func__, num, ret); + kfree(bus); + return NULL; + } + + return bus; +} /* - * Raw i2c write. No set-up or lock checking. + * Initialize i2c buses. + * Return 0 on success, -errno on error. */ -static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, - int offset, void *bp, int len) +int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) { - struct hfi1_devdata *dd = ppd->dd; - int ret, cnt; - u8 *buff = bp; + ad->i2c_bus0 = init_i2c_bus(dd, ad, 0); + ad->i2c_bus1 = init_i2c_bus(dd, ad, 1); + if (!ad->i2c_bus0 || !ad->i2c_bus1) + return -ENOMEM; + return 0; +}; - cnt = 0; - while (cnt < len) { - int wlen = len - cnt; +static void clean_i2c_bus(struct hfi1_i2c_bus *bus) +{ + if (bus) { + i2c_del_adapter(&bus->adapter); + kfree(bus); + } +} - ret = hfi1_twsi_blk_wr(dd, target, i2c_addr, offset, - buff + cnt, wlen); - if (ret) { - /* hfi1_twsi_blk_wr() 1 for error, else 0 */ - return -EIO; - } - offset += wlen; - cnt += wlen; +void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) +{ + clean_i2c_bus(ad->i2c_bus0); + ad->i2c_bus0 = NULL; + clean_i2c_bus(ad->i2c_bus1); + ad->i2c_bus1 = NULL; +} + +static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, + u8 slave_addr, int offset, int offset_size, + u8 *data, u16 len) +{ + int ret; + int num_msgs; + u8 offset_bytes[2]; + struct i2c_msg msgs[2]; + + switch (offset_size) { + case 0: + num_msgs = 1; + msgs[0].addr = slave_addr; + msgs[0].flags = 0; + msgs[0].len = len; + msgs[0].buf = data; + break; + case 2: + offset_bytes[1] = (offset >> 8) & 0xff; + /* fall through */ + case 1: + num_msgs = 2; + offset_bytes[0] = offset & 0xff; + + msgs[0].addr = slave_addr; + msgs[0].flags = 0; + msgs[0].len = offset_size; + msgs[0].buf = offset_bytes; + + msgs[1].addr = slave_addr; + msgs[1].flags = I2C_M_NOSTART, + msgs[1].len = len; + msgs[1].buf = data; + break; + default: + return -EINVAL; } - /* Must wait min 20us between qsfp i2c transactions */ - udelay(20); + i2c->controlling_dd = dd; + ret = i2c_transfer(&i2c->adapter, msgs, num_msgs); + if (ret != num_msgs) { + dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; write failed, ret %d\n", + __func__, i2c->num, slave_addr, offset, len, ret); + return ret < 0 ? ret : -EIO; + } + return 0; +} + +static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, + u8 slave_addr, int offset, int offset_size, + u8 *data, u16 len) +{ + int ret; + int num_msgs; + u8 offset_bytes[2]; + struct i2c_msg msgs[2]; + + switch (offset_size) { + case 0: + num_msgs = 1; + msgs[0].addr = slave_addr; + msgs[0].flags = I2C_M_RD; + msgs[0].len = len; + msgs[0].buf = data; + break; + case 2: + offset_bytes[1] = (offset >> 8) & 0xff; + /* fall through */ + case 1: + num_msgs = 2; + offset_bytes[0] = offset & 0xff; + + msgs[0].addr = slave_addr; + msgs[0].flags = 0; + msgs[0].len = offset_size; + msgs[0].buf = offset_bytes; + + msgs[1].addr = slave_addr; + msgs[1].flags = I2C_M_RD, + msgs[1].len = len; + msgs[1].buf = data; + break; + default: + return -EINVAL; + } - return cnt; + bus->controlling_dd = dd; + ret = i2c_transfer(&bus->adapter, msgs, num_msgs); + if (ret != num_msgs) { + dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; read failed, ret %d\n", + __func__, bus->num, slave_addr, offset, len, ret); + return ret < 0 ? ret : -EIO; + } + return 0; +} + +/* + * Raw i2c write. No set-up or lock checking. + * + * Return 0 on success, -errno on error. + */ +static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, + int offset, void *bp, int len) +{ + struct hfi1_devdata *dd = ppd->dd; + struct hfi1_i2c_bus *bus; + u8 slave_addr; + int offset_size; + + bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0; + slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */ + offset_size = (i2c_addr >> 8) & 0x3; + return i2c_bus_write(dd, bus, slave_addr, offset, offset_size, bp, len); } /* * Caller must hold the i2c chain resource. + * + * Return number of bytes written, or -errno. */ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) @@ -99,63 +338,36 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "I2C chain %d write interface reset failed\n", - target); + ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len); + if (ret) return ret; - } - return __i2c_write(ppd, target, i2c_addr, offset, bp, len); + return len; } /* * Raw i2c read. No set-up or lock checking. + * + * Return 0 on success, -errno on error. */ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) { struct hfi1_devdata *dd = ppd->dd; - int ret, cnt, pass = 0; - int orig_offset = offset; - - cnt = 0; - while (cnt < len) { - int rlen = len - cnt; - - ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset, - bp + cnt, rlen); - /* Some QSFP's fail first try. Retry as experiment */ - if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY) - continue; - if (ret) { - /* hfi1_twsi_blk_rd() 1 for error, else 0 */ - ret = -EIO; - goto exit; - } - offset += rlen; - cnt += rlen; - } - - ret = cnt; - -exit: - if (ret < 0) { - hfi1_dev_porterr(dd, ppd->port, - "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n", - target, i2c_addr, orig_offset, len); - } - - /* Must wait min 20us between qsfp i2c transactions */ - udelay(20); - - return ret; + struct hfi1_i2c_bus *bus; + u8 slave_addr; + int offset_size; + + bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0; + slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */ + offset_size = (i2c_addr >> 8) & 0x3; + return i2c_bus_read(dd, bus, slave_addr, offset, offset_size, bp, len); } /* * Caller must hold the i2c chain resource. + * + * Return number of bytes read, or -errno. */ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) @@ -165,16 +377,11 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "I2C chain %d read interface reset failed\n", - target); + ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len); + if (ret) return ret; - } - return __i2c_read(ppd, target, i2c_addr, offset, bp, len); + return len; } /* @@ -182,6 +389,8 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, * by writing @addr = ((256 * n) + m) * * Caller must hold the i2c chain resource. + * + * Return number of bytes written or -errno. */ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -189,21 +398,12 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int count = 0; int offset; int nwrite; - int ret; + int ret = 0; u8 page; if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "QSFP chain %d write interface reset failed\n", - target); - return ret; - } - while (count < len) { /* * Set the qsfp page based on a zero-based address @@ -213,11 +413,12 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); - if (ret != 1) { + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", target, ret); - ret = -EIO; break; } @@ -229,11 +430,13 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, offset, bp + count, nwrite); - if (ret <= 0) /* stop on error or nothing written */ + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + if (ret) /* stop on error */ break; - count += ret; - addr += ret; + count += nwrite; + addr += nwrite; } if (ret < 0) @@ -243,7 +446,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, /* * Perform a stand-alone single QSFP write. Acquire the resource, do the - * read, then release the resource. + * write, then release the resource. */ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -266,6 +469,8 @@ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, * by reading @addr = ((256 * n) + m) * * Caller must hold the i2c chain resource. + * + * Return the number of bytes read or -errno. */ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -273,21 +478,12 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int count = 0; int offset; int nread; - int ret; + int ret = 0; u8 page; if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "QSFP chain %d read interface reset failed\n", - target); - return ret; - } - while (count < len) { /* * Set the qsfp page based on a zero-based address @@ -296,11 +492,12 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, page = (u8)(addr / QSFP_PAGESIZE); ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); - if (ret != 1) { + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", target, ret); - ret = -EIO; break; } @@ -310,15 +507,13 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY) nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); - /* QSFPs require a 5-10msec delay after write operations */ - mdelay(5); ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, offset, bp + count, nread); - if (ret <= 0) /* stop on error or nothing read */ + if (ret) /* stop on error */ break; - count += ret; - addr += ret; + count += nread; + addr += nread; } if (ret < 0) diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index dadc66c442b9..69275ebd9597 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -238,3 +238,6 @@ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); +struct hfi1_asic_data; +int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad); +void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 792f15eb8efe..5da190e6011b 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -477,6 +477,37 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } + /* + * Local operations are processed immediately + * after all prior requests have completed + */ + if (wqe->wr.opcode == IB_WR_REG_MR || + wqe->wr.opcode == IB_WR_LOCAL_INV) { + int local_ops = 0; + int err = 0; + + if (qp->s_last != qp->s_cur) + goto bail; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + if (++qp->s_tail == qp->s_size) + qp->s_tail = 0; + if (!(wqe->wr.send_flags & + RVT_SEND_COMPLETION_ONLY)) { + err = rvt_invalidate_rkey( + qp, + wqe->wr.ex.invalidate_rkey); + local_ops = 1; + } + hfi1_send_complete(qp, wqe, + err ? IB_WC_LOC_PROT_ERR + : IB_WC_SUCCESS); + if (local_ops) + atomic_dec(&qp->local_ops_pending); + qp->s_hdrwords = 0; + goto done_free_tx; + } + newreq = 1; qp->s_psn = wqe->psn; } @@ -491,6 +522,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) switch (wqe->wr.opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { @@ -504,11 +536,17 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_ONLY); - } else { + } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; + } else { + qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE); + /* Invalidate rkey comes after the BTH */ + ohdr->u.ieth = cpu_to_be32( + wqe->wr.ex.invalidate_rkey); + hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; @@ -671,11 +709,16 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_LAST); - } else { + } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; + } else { + qp->s_state = OP(SEND_LAST_WITH_INVALIDATE); + /* invalidate data comes after the BTH */ + ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey); + hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; @@ -1047,7 +1090,7 @@ void hfi1_rc_timeout(unsigned long arg) ibp->rvp.n_rc_timeouts++; qp->s_flags &= ~RVT_S_TIMER; del_timer(&qp->s_timer); - trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1); + trace_hfi1_timeout(qp, qp->s_last_psn + 1); restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); } @@ -1171,7 +1214,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) * If we were waiting for sends to complete before re-sending, * and they are now complete, restart sending. */ - trace_hfi1_rc_sendcomplete(qp, psn); + trace_hfi1_sendcomplete(qp, psn); if (qp->s_flags & RVT_S_WAIT_PSN && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { qp->s_flags &= ~RVT_S_WAIT_PSN; @@ -1567,7 +1610,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, spin_lock_irqsave(&qp->s_lock, flags); - trace_hfi1_rc_ack(qp, psn); + trace_hfi1_ack(qp, psn); /* Ignore invalid responses. */ smp_read_barrier_depends(); /* see post_one_send */ @@ -1782,7 +1825,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, u8 i, prev; int old_req; - trace_hfi1_rc_rcv_error(qp, psn); + trace_hfi1_rcv_error(qp, psn); if (diff > 0) { /* * Packet sequence error. @@ -2086,7 +2129,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; @@ -2097,30 +2139,15 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int diff; struct ib_reth *reth; unsigned long flags; - u32 bth1; int ret, is_fecn = 0; int copy_last = 0; + u32 rkey; bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) return; - bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { - if (bth1 & HFI1_BECN_SMASK) { - u16 rlid = qp->remote_ah_attr.dlid; - u32 lqpn, rqpn; - - lqpn = qp->ibqp.qp_num; - rqpn = qp->remote_qpn; - process_becn( - ppd, - qp->remote_ah_attr.sl, - rlid, lqpn, rqpn, - IB_CC_SVCTYPE_RC); - } - is_fecn = bth1 & HFI1_FECN_SMASK; - } + is_fecn = process_ecn(qp, packet, false); psn = be32_to_cpu(ohdr->bth[2]); opcode = (bth0 >> 24) & 0xff; @@ -2154,7 +2181,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) case OP(SEND_MIDDLE): if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(SEND_LAST_WITH_INVALIDATE)) break; goto nack_inv; @@ -2170,6 +2198,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(SEND_LAST_WITH_INVALIDATE) || opcode == OP(RDMA_WRITE_MIDDLE) || opcode == OP(RDMA_WRITE_LAST) || opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) @@ -2218,6 +2247,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): + case OP(SEND_ONLY_WITH_INVALIDATE): ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto nack_op_err; @@ -2226,12 +2256,22 @@ send_middle: qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto no_immediate_data; + if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) + goto send_last_inv; /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; + case OP(SEND_LAST_WITH_INVALIDATE): +send_last_inv: + rkey = be32_to_cpu(ohdr->u.ieth); + if (rvt_invalidate_rkey(qp, rkey)) + goto no_immediate_data; + wc.ex.invalidate_rkey = rkey; + wc.wc_flags = IB_WC_WITH_INVALIDATE; + goto send_last; case OP(RDMA_WRITE_LAST): copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user; /* fall through */ diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a659aec3c3c6..48d5094f98e2 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -372,6 +372,7 @@ static void ruc_loopback(struct rvt_qp *sqp) int ret; int copy_last = 0; u32 to; + int local_ops = 0; rcu_read_lock(); @@ -440,11 +441,31 @@ again: sqp->s_sge.num_sge = wqe->wr.num_sge; sqp->s_len = wqe->length; switch (wqe->wr.opcode) { + case IB_WR_REG_MR: + goto send_comp; + + case IB_WR_LOCAL_INV: + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { + if (rvt_invalidate_rkey(sqp, + wqe->wr.ex.invalidate_rkey)) + send_status = IB_WC_LOC_PROT_ERR; + local_ops = 1; + } + goto send_comp; + + case IB_WR_SEND_WITH_INV: + if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { + wc.wc_flags = IB_WC_WITH_INVALIDATE; + wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; + } + goto send; + case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: +send: ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; @@ -583,6 +604,10 @@ send_comp: flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; hfi1_send_complete(sqp, wqe, send_status); + if (local_ops) { + atomic_dec(&sqp->local_ops_pending); + local_ops = 0; + } goto again; rnr_nak: @@ -683,10 +708,10 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -#define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4) +#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) /** - * build_ahg - create ahg in s_hdr + * build_ahg - create ahg in s_ahg * @qp: a pointer to QP * @npsn: the next PSN for the request/response * @@ -708,19 +733,18 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); if (qp->s_ahgidx >= 0) { qp->s_ahgpsn = npsn; - priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY; + priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ - priv->s_hdr->sde = priv->s_sde; - priv->s_hdr->ahgidx = qp->s_ahgidx; + priv->s_ahg->ahgidx = qp->s_ahgidx; qp->s_flags |= RVT_S_AHG_VALID; } } else { /* subsequent middle after valid */ if (qp->s_ahgidx >= 0) { - priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG; - priv->s_hdr->ahgidx = qp->s_ahgidx; - priv->s_hdr->ahgcount++; - priv->s_hdr->ahgdesc[0] = + priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG; + priv->s_ahg->ahgidx = qp->s_ahgidx; + priv->s_ahg->ahgcount++; + priv->s_ahg->ahgdesc[0] = sdma_build_ahg_descriptor( (__force u16)cpu_to_be16((u16)npsn), BTH2_OFFSET, @@ -728,8 +752,8 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) 16); if ((npsn & 0xffff0000) != (qp->s_ahgpsn & 0xffff0000)) { - priv->s_hdr->ahgcount++; - priv->s_hdr->ahgdesc[1] = + priv->s_ahg->ahgcount++; + priv->s_ahg->ahgdesc[1] = sdma_build_ahg_descriptor( (__force u16)cpu_to_be16( (u16)(npsn >> 16)), @@ -766,7 +790,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, } lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; /* - * reset s_hdr/AHG fields + * reset s_ahg/AHG fields * * This insures that the ahgentry/ahgcount * are at a non-AHG default to protect @@ -776,10 +800,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, * build_ahg() will modify as appropriate * to use the AHG feature. */ - priv->s_hdr->tx_flags = 0; - priv->s_hdr->ahgcount = 0; - priv->s_hdr->ahgidx = 0; - priv->s_hdr->sde = NULL; + priv->s_ahg->tx_flags = 0; + priv->s_ahg->ahgcount = 0; + priv->s_ahg->ahgidx = 0; if (qp->s_mig_state == IB_MIG_MIGRATED) bth0 |= IB_BTH_MIG_REQ; else @@ -890,7 +913,7 @@ void hfi1_do_send(struct rvt_qp *qp) */ if (hfi1_verbs_send(qp, &ps)) return; - /* Record that s_hdr is empty. */ + /* Record that s_ahg is empty. */ qp->s_hdrwords = 0; /* allow other tasks to run */ if (unlikely(time_after(jiffies, timeout))) { diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 91fc2aed6aed..74c84c655f7e 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -49,6 +49,7 @@ #include "hfi.h" #include "mad.h" #include "trace.h" +#include "affinity.h" /* * Start of per-port congestion control structures and support code @@ -622,6 +623,27 @@ static ssize_t show_tempsense(struct device *device, return ret; } +static ssize_t show_sdma_affinity(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct hfi1_ibdev *dev = + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + struct hfi1_devdata *dd = dd_from_dev(dev); + + return hfi1_get_sdma_affinity(dd, buf); +} + +static ssize_t store_sdma_affinity(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hfi1_ibdev *dev = + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + struct hfi1_devdata *dd = dd_from_dev(dev); + + return hfi1_set_sdma_affinity(dd, buf, count); +} + /* * end of per-unit (or driver, in some cases, but replicated * per unit) functions @@ -636,6 +658,8 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); +static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity, + store_sdma_affinity); static struct device_attribute *hfi1_attributes[] = { &dev_attr_hw_rev, @@ -646,6 +670,7 @@ static struct device_attribute *hfi1_attributes[] = { &dev_attr_boardversion, &dev_attr_tempsense, &dev_attr_chip_reset, + &dev_attr_sdma_affinity, }; int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 28c1d0832886..92dc88f013c9 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -44,1329 +44,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR hfi1 - -#if !defined(__HFI1_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __HFI1_TRACE_H - -#include <linux/tracepoint.h> -#include <linux/trace_seq.h> - -#include "hfi.h" -#include "mad.h" -#include "sdma.h" - -#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) -#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) - -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_rx - -TRACE_EVENT(hfi1_rcvhdr, - TP_PROTO(struct hfi1_devdata *dd, - u32 ctxt, - u64 eflags, - u32 etype, - u32 hlen, - u32 tlen, - u32 updegr, - u32 etail - ), - TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(u64, eflags) - __field(u32, ctxt) - __field(u32, etype) - __field(u32, hlen) - __field(u32, tlen) - __field(u32, updegr) - __field(u32, etail) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->eflags = eflags; - __entry->ctxt = ctxt; - __entry->etype = etype; - __entry->hlen = hlen; - __entry->tlen = tlen; - __entry->updegr = updegr; - __entry->etail = etail; - ), - TP_printk( - "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", - __get_str(dev), - __entry->ctxt, - __entry->eflags, - __entry->etype, show_packettype(__entry->etype), - __entry->hlen, - __entry->tlen, - __entry->updegr, - __entry->etail - ) -); - -TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), - TP_ARGS(dd, ctxt), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(u32, ctxt) - __field(u8, slow_path) - __field(u8, dma_rtail) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt) { - __entry->slow_path = 1; - __entry->dma_rtail = 0xFF; - } else if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt_dma_rtail){ - __entry->dma_rtail = 1; - __entry->slow_path = 0; - } else if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt_nodma_rtail) { - __entry->dma_rtail = 0; - __entry->slow_path = 0; - } - ), - TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", - __get_str(dev), - __entry->ctxt, - __entry->slow_path, - __entry->dma_rtail - ) -); - -TRACE_EVENT(hfi1_exp_tid_reg, - TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, - u32 npages, unsigned long va, unsigned long pa, - dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); - -TRACE_EVENT(hfi1_exp_tid_unreg, - TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); - -TRACE_EVENT(hfi1_exp_tid_inval, - TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr, - u32 npages, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __field(unsigned long, va) - __field(u32, rarr) - __field(u32, npages) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->va = va; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->va, - __entry->dma - ) - ); - -TRACE_EVENT(hfi1_mmu_invalidate, - TP_PROTO(unsigned ctxt, u16 subctxt, const char *type, - unsigned long start, unsigned long end), - TP_ARGS(ctxt, subctxt, type, start, end), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __string(type, type) - __field(unsigned long, start) - __field(unsigned long, end) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __assign_str(type, type); - __entry->start = start; - __entry->end = end; - ), - TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", - __entry->ctxt, - __entry->subctxt, - __get_str(type), - __entry->start, - __entry->end - ) - ); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_tx - -TRACE_EVENT(hfi1_piofree, - TP_PROTO(struct send_context *sc, int extra), - TP_ARGS(sc, extra), - TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) - __field(u32, sw_index) - __field(u32, hw_context) - __field(int, extra) - ), - TP_fast_assign(DD_DEV_ASSIGN(sc->dd); - __entry->sw_index = sc->sw_index; - __entry->hw_context = sc->hw_context; - __entry->extra = extra; - ), - TP_printk("[%s] ctxt %u(%u) extra %d", - __get_str(dev), - __entry->sw_index, - __entry->hw_context, - __entry->extra - ) -); - -TRACE_EVENT(hfi1_wantpiointr, - TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), - TP_ARGS(sc, needint, credit_ctrl), - TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) - __field(u32, sw_index) - __field(u32, hw_context) - __field(u32, needint) - __field(u64, credit_ctrl) - ), - TP_fast_assign(DD_DEV_ASSIGN(sc->dd); - __entry->sw_index = sc->sw_index; - __entry->hw_context = sc->hw_context; - __entry->needint = needint; - __entry->credit_ctrl = credit_ctrl; - ), - TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", - __get_str(dev), - __entry->sw_index, - __entry->hw_context, - __entry->needint, - (unsigned long long)__entry->credit_ctrl - ) -); - -DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, flags) - __field(u32, s_flags) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->flags = flags; - __entry->qpn = qp->ibqp.qp_num; - __entry->s_flags = qp->s_flags; - ), - TP_printk( - "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", - __get_str(dev), - __entry->qpn, - __entry->flags, - __entry->s_flags - ) -); - -DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags)); - -DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_ibhdrs - -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); -const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); - -#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) - -const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); - -#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) - -#define lrh_name(lrh) { HFI1_##lrh, #lrh } -#define show_lnh(lrh) \ -__print_symbolic(lrh, \ - lrh_name(LRH_BTH), \ - lrh_name(LRH_GRH)) - -#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } -#define show_ib_opcode(opcode) \ -__print_symbolic(opcode, \ - ib_opcode_name(RC_SEND_FIRST), \ - ib_opcode_name(RC_SEND_MIDDLE), \ - ib_opcode_name(RC_SEND_LAST), \ - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_SEND_ONLY), \ - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(RC_RDMA_WRITE_LAST), \ - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_READ_REQUEST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ - ib_opcode_name(RC_ACKNOWLEDGE), \ - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ - ib_opcode_name(RC_COMPARE_SWAP), \ - ib_opcode_name(RC_FETCH_ADD), \ - ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ - ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ - ib_opcode_name(UC_SEND_FIRST), \ - ib_opcode_name(UC_SEND_MIDDLE), \ - ib_opcode_name(UC_SEND_LAST), \ - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_SEND_ONLY), \ - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(UC_RDMA_WRITE_LAST), \ - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UD_SEND_ONLY), \ - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(CNP)) - -#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" -#define BTH_PRN \ - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" -#define EHDR_PRN "%s" - -DECLARE_EVENT_CLASS(hfi1_ibhdr_template, - TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) - __field(u8, lver) - __field(u8, sl) - __field(u8, lnh) - __field(u16, dlid) - __field(u16, len) - __field(u16, slid) - /* BTH */ - __field(u8, opcode) - __field(u8, se) - __field(u8, m) - __field(u8, pad) - __field(u8, tver) - __field(u16, pkey) - __field(u8, f) - __field(u8, b) - __field(u32, qpn) - __field(u8, a) - __field(u32, psn) - /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) - ), - TP_fast_assign( - struct hfi1_other_headers *ohdr; - - DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & - HFI1_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & - HFI1_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; - /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) -); - -DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -#define SNOOP_PRN \ - "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ - "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_snoop - -TRACE_EVENT(snoop_capture, - TP_PROTO(struct hfi1_devdata *dd, - int hdr_len, - struct hfi1_ib_header *hdr, - int data_len, - void *data), - TP_ARGS(dd, hdr_len, hdr, data_len, data), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, slid) - __field(u16, dlid) - __field(u32, qpn) - __field(u8, opcode) - __field(u8, sl) - __field(u16, pkey) - __field(u32, hdr_len) - __field(u32, data_len) - __field(u8, lnh) - __dynamic_array(u8, raw_hdr, hdr_len) - __dynamic_array(u8, raw_pkt, data_len) - ), - TP_fast_assign( - struct hfi1_other_headers *ohdr; - - __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - DD_DEV_ASSIGN(dd); - __entry->slid = be16_to_cpu(hdr->lrh[3]); - __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->hdr_len = hdr_len; - __entry->data_len = data_len; - memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); - memcpy(__get_dynamic_array(raw_pkt), data, data_len); - ), - TP_printk( - "[%s] " SNOOP_PRN, - __get_str(dev), - __entry->slid, - __entry->dlid, - __entry->qpn, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->sl, - __entry->pkey, - __entry->hdr_len, - __entry->data_len - ) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_ctxts - -#define UCTXT_FMT \ - "cred:%u, credaddr:0x%llx, piobase:0x%llx, rcvhdr_cnt:%u, " \ - "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" -TRACE_EVENT(hfi1_uctxtdata, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), - TP_ARGS(dd, uctxt), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(unsigned, ctxt) - __field(u32, credits) - __field(u64, hw_free) - __field(u64, piobase) - __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) - __field(u32, eager_cnt) - __field(u64, rcvegr_phys) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = uctxt->ctxt; - __entry->credits = uctxt->sc->credits; - __entry->hw_free = (u64)uctxt->sc->hw_free; - __entry->piobase = (u64)uctxt->sc->base_addr; - __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; - __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = - uctxt->egrbufs.rcvtids[0].phys; - ), - TP_printk("[%s] ctxt %u " UCTXT_FMT, - __get_str(dev), - __entry->ctxt, - __entry->credits, - __entry->hw_free, - __entry->piobase, - __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, - __entry->eager_cnt, - __entry->rcvegr_phys - ) -); - -#define CINFO_FMT \ - "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" -TRACE_EVENT(hfi1_ctxt_info, - TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt, - struct hfi1_ctxt_info cinfo), - TP_ARGS(dd, ctxt, subctxt, cinfo), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(unsigned, ctxt) - __field(unsigned, subctxt) - __field(u16, egrtids) - __field(u16, rcvhdrq_cnt) - __field(u16, rcvhdrq_size) - __field(u16, sdma_ring_size) - __field(u32, rcvegr_size) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->egrtids = cinfo.egrtids; - __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; - __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; - __entry->sdma_ring_size = cinfo.sdma_ring_size; - __entry->rcvegr_size = cinfo.rcvegr_size; - ), - TP_printk("[%s] ctxt %u:%u " CINFO_FMT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->egrtids, - __entry->rcvegr_size, - __entry->rcvhdrq_cnt, - __entry->rcvhdrq_size, - __entry->sdma_ring_size - ) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_sma - -#define BCT_FORMAT \ - "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]" - -#define BCT(field) \ - be16_to_cpu( \ - ((struct buffer_control *)__get_dynamic_array(bct))->field \ - ) - -DECLARE_EVENT_CLASS(hfi1_bct_template, - TP_PROTO(struct hfi1_devdata *dd, - struct buffer_control *bc), - TP_ARGS(dd, bc), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __dynamic_array(u8, bct, sizeof(*bc)) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - memcpy(__get_dynamic_array(bct), bc, - sizeof(*bc)); - ), - TP_printk(BCT_FORMAT, - BCT(overall_shared_limit), - - BCT(vl[0].dedicated), - BCT(vl[0].shared), - - BCT(vl[1].dedicated), - BCT(vl[1].shared), - - BCT(vl[2].dedicated), - BCT(vl[2].shared), - - BCT(vl[3].dedicated), - BCT(vl[3].shared), - - BCT(vl[4].dedicated), - BCT(vl[4].shared), - - BCT(vl[5].dedicated), - BCT(vl[5].shared), - - BCT(vl[6].dedicated), - BCT(vl[6].shared), - - BCT(vl[7].dedicated), - BCT(vl[7].shared), - - BCT(vl[15].dedicated), - BCT(vl[15].shared) - ) -); - -DEFINE_EVENT(hfi1_bct_template, bct_set, - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), - TP_ARGS(dd, bc)); - -DEFINE_EVENT(hfi1_bct_template, bct_get, - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), - TP_ARGS(dd, bc)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_sdma - -TRACE_EVENT(hfi1_sdma_descriptor, - TP_PROTO(struct sdma_engine *sde, - u64 desc0, - u64 desc1, - u16 e, - void *descp), - TP_ARGS(sde, desc0, desc1, e, descp), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(void *, descp) - __field(u64, desc0) - __field(u64, desc1) - __field(u16, e) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->desc0 = desc0; - __entry->desc1 = desc1; - __entry->idx = sde->this_idx; - __entry->descp = descp; - __entry->e = e; - ), - TP_printk( - "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", - __get_str(dev), - __entry->idx, - __parse_sdma_flags(__entry->desc0, __entry->desc1), - (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & - SDMA_DESC0_PHY_ADDR_MASK, - (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & - SDMA_DESC1_GENERATION_MASK), - (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & - SDMA_DESC0_BYTE_COUNT_MASK), - __entry->desc0, - __entry->desc1, - __entry->descp, - __entry->e - ) -); - -TRACE_EVENT(hfi1_sdma_engine_select, - TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), - TP_ARGS(dd, sel, vl, idx), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(u32, sel) - __field(u8, vl) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->sel = sel; - __entry->vl = vl; - __entry->idx = idx; - ), - TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", - __get_str(dev), - __entry->idx, - __entry->sel, - __entry->vl - ) -); - -DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, - TP_PROTO(struct sdma_engine *sde, u64 status), - TP_ARGS(sde, status), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u64, status) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->status = status; - __entry->idx = sde->this_idx; - ), - TP_printk("[%s] SDE(%u) status %llx", - __get_str(dev), - __entry->idx, - (unsigned long long)__entry->status - ) -); - -DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, - TP_PROTO(struct sdma_engine *sde, u64 status), - TP_ARGS(sde, status) -); - -DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, - TP_PROTO(struct sdma_engine *sde, u64 status), - TP_ARGS(sde, status) -); - -DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, - TP_PROTO(struct sdma_engine *sde, int aidx), - TP_ARGS(sde, aidx), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(int, aidx) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->idx = sde->this_idx; - __entry->aidx = aidx; - ), - TP_printk("[%s] SDE(%u) aidx %d", - __get_str(dev), - __entry->idx, - __entry->aidx - ) -); - -DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, - TP_PROTO(struct sdma_engine *sde, int aidx), - TP_ARGS(sde, aidx)); - -DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, - TP_PROTO(struct sdma_engine *sde, int aidx), - TP_ARGS(sde, aidx)); - -#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER -TRACE_EVENT(hfi1_sdma_progress, - TP_PROTO(struct sdma_engine *sde, - u16 hwhead, - u16 swhead, - struct sdma_txreq *txp - ), - TP_ARGS(sde, hwhead, swhead, txp), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u64, sn) - __field(u16, hwhead) - __field(u16, swhead) - __field(u16, txnext) - __field(u16, tx_tail) - __field(u16, tx_head) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->hwhead = hwhead; - __entry->swhead = swhead; - __entry->tx_tail = sde->tx_tail; - __entry->tx_head = sde->tx_head; - __entry->txnext = txp ? txp->next_descq_idx : ~0; - __entry->idx = sde->this_idx; - __entry->sn = txp ? txp->sn : ~0; - ), - TP_printk( - "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", - __get_str(dev), - __entry->idx, - __entry->sn, - __entry->hwhead, - __entry->swhead, - __entry->txnext, - __entry->tx_head, - __entry->tx_tail - ) -); -#else -TRACE_EVENT(hfi1_sdma_progress, - TP_PROTO(struct sdma_engine *sde, - u16 hwhead, u16 swhead, - struct sdma_txreq *txp - ), - TP_ARGS(sde, hwhead, swhead, txp), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u16, hwhead) - __field(u16, swhead) - __field(u16, txnext) - __field(u16, tx_tail) - __field(u16, tx_head) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->hwhead = hwhead; - __entry->swhead = swhead; - __entry->tx_tail = sde->tx_tail; - __entry->tx_head = sde->tx_head; - __entry->txnext = txp ? txp->next_descq_idx : ~0; - __entry->idx = sde->this_idx; - ), - TP_printk( - "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", - __get_str(dev), - __entry->idx, - __entry->hwhead, - __entry->swhead, - __entry->txnext, - __entry->tx_head, - __entry->tx_tail - ) -); -#endif - -DECLARE_EVENT_CLASS(hfi1_sdma_sn, - TP_PROTO(struct sdma_engine *sde, u64 sn), - TP_ARGS(sde, sn), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u64, sn) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->sn = sn; - __entry->idx = sde->this_idx; - ), - TP_printk("[%s] SDE(%u) sn %llu", - __get_str(dev), - __entry->idx, - __entry->sn - ) -); - -DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, - TP_PROTO( - struct sdma_engine *sde, - u64 sn - ), - TP_ARGS(sde, sn) -); - -DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, - TP_PROTO(struct sdma_engine *sde, u64 sn), - TP_ARGS(sde, sn) -); - -#define USDMA_HDR_FORMAT \ - "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x" - -TRACE_EVENT(hfi1_sdma_user_header, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, - struct hfi1_pkt_header *hdr, u32 tidval), - TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, ctxt) - __field(u8, subctxt) - __field(u16, req) - __field(__le32, pbc0) - __field(__le32, pbc1) - __field(__be32, lrh0) - __field(__be32, lrh1) - __field(__be32, bth0) - __field(__be32, bth1) - __field(__be32, bth2) - __field(__le32, kdeth0) - __field(__le32, kdeth1) - __field(__le32, kdeth2) - __field(__le32, kdeth3) - __field(__le32, kdeth4) - __field(__le32, kdeth5) - __field(__le32, kdeth6) - __field(__le32, kdeth7) - __field(__le32, kdeth8) - __field(u32, tidval) - ), - TP_fast_assign( - __le32 *pbc = (__le32 *)hdr->pbc; - __be32 *lrh = (__be32 *)hdr->lrh; - __be32 *bth = (__be32 *)hdr->bth; - __le32 *kdeth = (__le32 *)&hdr->kdeth; - - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->req = req; - __entry->pbc0 = pbc[0]; - __entry->pbc1 = pbc[1]; - __entry->lrh0 = be32_to_cpu(lrh[0]); - __entry->lrh1 = be32_to_cpu(lrh[1]); - __entry->bth0 = be32_to_cpu(bth[0]); - __entry->bth1 = be32_to_cpu(bth[1]); - __entry->bth2 = be32_to_cpu(bth[2]); - __entry->kdeth0 = kdeth[0]; - __entry->kdeth1 = kdeth[1]; - __entry->kdeth2 = kdeth[2]; - __entry->kdeth3 = kdeth[3]; - __entry->kdeth4 = kdeth[4]; - __entry->kdeth5 = kdeth[5]; - __entry->kdeth6 = kdeth[6]; - __entry->kdeth7 = kdeth[7]; - __entry->kdeth8 = kdeth[8]; - __entry->tidval = tidval; - ), - TP_printk(USDMA_HDR_FORMAT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->req, - __entry->pbc1, - __entry->pbc0, - __entry->lrh0, - __entry->lrh1, - __entry->bth0, - __entry->bth1, - __entry->bth2, - __entry->kdeth0, - __entry->kdeth1, - __entry->kdeth2, - __entry->kdeth3, - __entry->kdeth4, - __entry->kdeth5, - __entry->kdeth6, - __entry->kdeth7, - __entry->kdeth8, - __entry->tidval - ) - ); - -#define SDMA_UREQ_FMT \ - "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u" -TRACE_EVENT(hfi1_sdma_user_reqinfo, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), - TP_ARGS(dd, ctxt, subctxt, i), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd); - __field(u16, ctxt) - __field(u8, subctxt) - __field(u8, ver_opcode) - __field(u8, iovcnt) - __field(u16, npkts) - __field(u16, fragsize) - __field(u16, comp_idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->ver_opcode = i[0] & 0xff; - __entry->iovcnt = (i[0] >> 8) & 0xff; - __entry->npkts = i[1]; - __entry->fragsize = i[2]; - __entry->comp_idx = i[3]; - ), - TP_printk(SDMA_UREQ_FMT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->ver_opcode, - __entry->iovcnt, - __entry->npkts, - __entry->fragsize, - __entry->comp_idx - ) - ); - -#define usdma_complete_name(st) { st, #st } -#define show_usdma_complete_state(st) \ - __print_symbolic(st, \ - usdma_complete_name(FREE), \ - usdma_complete_name(QUEUED), \ - usdma_complete_name(COMPLETE), \ - usdma_complete_name(ERROR)) - -TRACE_EVENT(hfi1_sdma_user_completion, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx, - u8 state, int code), - TP_ARGS(dd, ctxt, subctxt, idx, state, code), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, ctxt) - __field(u8, subctxt) - __field(u16, idx) - __field(u8, state) - __field(int, code) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->idx = idx; - __entry->state = state; - __entry->code = code; - ), - TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)", - __get_str(dev), __entry->ctxt, __entry->subctxt, - __entry->idx, show_usdma_complete_state(__entry->state), - __entry->code) - ); - -const char *print_u32_array(struct trace_seq *, u32 *, int); -#define __print_u32_hex(arr, len) print_u32_array(p, arr, len) - -TRACE_EVENT(hfi1_sdma_user_header_ahg, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, - u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval), - TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, ctxt) - __field(u8, subctxt) - __field(u16, req) - __field(u8, sde) - __field(u8, idx) - __field(int, len) - __field(u32, tidval) - __array(u32, ahg, 10) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->req = req; - __entry->sde = sde; - __entry->idx = ahgidx; - __entry->len = len; - __entry->tidval = tidval; - memcpy(__entry->ahg, ahg, len * sizeof(u32)); - ), - TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x", - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->req, - __entry->sde, - __entry->idx, - __entry->len - 1, - __print_u32_hex(__entry->ahg, __entry->len), - __entry->tidval - ) - ); - -TRACE_EVENT(hfi1_sdma_state, - TP_PROTO(struct sdma_engine *sde, - const char *cstate, - const char *nstate - ), - TP_ARGS(sde, cstate, nstate), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __string(curstate, cstate) - __string(newstate, nstate) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __assign_str(curstate, cstate); - __assign_str(newstate, nstate); - ), - TP_printk("[%s] current state %s new state %s", - __get_str(dev), - __get_str(curstate), - __get_str(newstate) - ) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_rc - -DECLARE_EVENT_CLASS(hfi1_rc_template, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, s_flags) - __field(u32, psn) - __field(u32, s_psn) - __field(u32, s_next_psn) - __field(u32, s_sending_psn) - __field(u32, s_sending_hpsn) - __field(u32, r_psn) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->s_flags = qp->s_flags; - __entry->psn = psn; - __entry->s_psn = qp->s_psn; - __entry->s_next_psn = qp->s_next_psn; - __entry->s_sending_psn = qp->s_sending_psn; - __entry->s_sending_hpsn = qp->s_sending_hpsn; - __entry->r_psn = qp->r_psn; - ), - TP_printk( - "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", - __get_str(dev), - __entry->qpn, - __entry->s_flags, - __entry->psn, - __entry->s_psn, - __entry->s_next_psn, - __entry->s_sending_psn, - __entry->s_sending_hpsn, - __entry->r_psn - ) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_misc - -TRACE_EVENT(hfi1_interrupt, - TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, - int src), - TP_ARGS(dd, is_entry, src), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __array(char, buf, 64) - __field(int, src) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd) - is_entry->is_name(__entry->buf, 64, - src - is_entry->start); - __entry->src = src; - ), - TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, - __entry->src) -); - -/* - * Note: - * This produces a REALLY ugly trace in the console output when the string is - * too long. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_trace - -#define MAX_MSG_LEN 512 - -DECLARE_EVENT_CLASS(hfi1_trace_template, - TP_PROTO(const char *function, struct va_format *vaf), - TP_ARGS(function, vaf), - TP_STRUCT__entry(__string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign(__assign_str(function, function); - WARN_ON_ONCE(vsnprintf - (__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= - MAX_MSG_LEN); - ), - TP_printk("(%s) %s", - __get_str(function), - __get_str(msg)) -); - -/* - * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an - * actual function to work and can not be in a macro. - */ -#define __hfi1_trace_def(lvl) \ -void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ - \ -DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ - TP_PROTO(const char *function, struct va_format *vaf), \ - TP_ARGS(function, vaf)) - -#define __hfi1_trace_fn(lvl) \ -void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ -{ \ - struct va_format vaf = { \ - .fmt = fmt, \ - }; \ - va_list args; \ - \ - va_start(args, fmt); \ - vaf.va = &args; \ - trace_hfi1_ ##lvl(func, &vaf); \ - va_end(args); \ - return; \ -} - -/* - * To create a new trace level simply define it below and as a __hfi1_trace_fn - * in trace.c. This will create all the hooks for calling - * hfi1_cdbg(LVL, fmt, ...); as well as take care of all - * the debugfs stuff. - */ -__hfi1_trace_def(PKT); -__hfi1_trace_def(PROC); -__hfi1_trace_def(SDMA); -__hfi1_trace_def(LINKVERB); -__hfi1_trace_def(DEBUG); -__hfi1_trace_def(SNOOP); -__hfi1_trace_def(CNTR); -__hfi1_trace_def(PIO); -__hfi1_trace_def(DC8051); -__hfi1_trace_def(FIRMWARE); -__hfi1_trace_def(RCVCTRL); -__hfi1_trace_def(TID); -__hfi1_trace_def(MMU); -__hfi1_trace_def(IOCTL); - -#define hfi1_cdbg(which, fmt, ...) \ - __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) - -#define hfi1_dbg(fmt, ...) \ - hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__) - -/* - * Define HFI1_EARLY_DBG at compile time or here to enable early trace - * messages. Do not check in an enablement for this. - */ - -#ifdef HFI1_EARLY_DBG -#define hfi1_dbg_early(fmt, ...) \ - trace_printk(fmt, ##__VA_ARGS__) -#else -#define hfi1_dbg_early(fmt, ...) -#endif - -#endif /* __HFI1_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include <trace/define_trace.h> +#include "trace_dbg.h" +#include "trace_misc.h" +#include "trace_ctxts.h" +#include "trace_ibhdrs.h" +#include "trace_rc.h" +#include "trace_rx.h" +#include "trace_tx.h" diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h new file mode 100644 index 000000000000..31654bbac1cf --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -0,0 +1,141 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_CTXTS_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_ctxts + +#define UCTXT_FMT \ + "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \ + "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" +TRACE_EVENT(hfi1_uctxtdata, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), + TP_ARGS(dd, uctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(unsigned int, ctxt) + __field(u32, credits) + __field(u64, hw_free) + __field(void __iomem *, piobase) + __field(u16, rcvhdrq_cnt) + __field(u64, rcvhdrq_phys) + __field(u32, eager_cnt) + __field(u64, rcvegr_phys) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = uctxt->ctxt; + __entry->credits = uctxt->sc->credits; + __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); + __entry->piobase = uctxt->sc->base_addr; + __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; + __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->eager_cnt = uctxt->egrbufs.alloced; + __entry->rcvegr_phys = + uctxt->egrbufs.rcvtids[0].phys; + ), + TP_printk("[%s] ctxt %u " UCTXT_FMT, + __get_str(dev), + __entry->ctxt, + __entry->credits, + __entry->hw_free, + __entry->piobase, + __entry->rcvhdrq_cnt, + __entry->rcvhdrq_phys, + __entry->eager_cnt, + __entry->rcvegr_phys + ) +); + +#define CINFO_FMT \ + "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" +TRACE_EVENT(hfi1_ctxt_info, + TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt, + unsigned int subctxt, + struct hfi1_ctxt_info cinfo), + TP_ARGS(dd, ctxt, subctxt, cinfo), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(unsigned int, ctxt) + __field(unsigned int, subctxt) + __field(u16, egrtids) + __field(u16, rcvhdrq_cnt) + __field(u16, rcvhdrq_size) + __field(u16, sdma_ring_size) + __field(u32, rcvegr_size) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->egrtids = cinfo.egrtids; + __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; + __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; + __entry->sdma_ring_size = cinfo.sdma_ring_size; + __entry->rcvegr_size = cinfo.rcvegr_size; + ), + TP_printk("[%s] ctxt %u:%u " CINFO_FMT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->egrtids, + __entry->rcvegr_size, + __entry->rcvhdrq_cnt, + __entry->rcvhdrq_size, + __entry->sdma_ring_size + ) +); + +#endif /* __HFI1_TRACE_CTXTS_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_ctxts +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h new file mode 100644 index 000000000000..0e7d929530c5 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -0,0 +1,155 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_EXTRA_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +/* + * Note: + * This produces a REALLY ugly trace in the console output when the string is + * too long. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_dbg + +#define MAX_MSG_LEN 512 + +DECLARE_EVENT_CLASS(hfi1_trace_template, + TP_PROTO(const char *function, struct va_format *vaf), + TP_ARGS(function, vaf), + TP_STRUCT__entry(__string(function, function) + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign(__assign_str(function, function); + WARN_ON_ONCE(vsnprintf + (__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= + MAX_MSG_LEN); + ), + TP_printk("(%s) %s", + __get_str(function), + __get_str(msg)) +); + +/* + * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an + * actual function to work and can not be in a macro. + */ +#define __hfi1_trace_def(lvl) \ +void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ + \ +DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ + TP_PROTO(const char *function, struct va_format *vaf), \ + TP_ARGS(function, vaf)) + +#define __hfi1_trace_fn(lvl) \ +void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ +{ \ + struct va_format vaf = { \ + .fmt = fmt, \ + }; \ + va_list args; \ + \ + va_start(args, fmt); \ + vaf.va = &args; \ + trace_hfi1_ ##lvl(func, &vaf); \ + va_end(args); \ + return; \ +} + +/* + * To create a new trace level simply define it below and as a __hfi1_trace_fn + * in trace.c. This will create all the hooks for calling + * hfi1_cdbg(LVL, fmt, ...); as well as take care of all + * the debugfs stuff. + */ +__hfi1_trace_def(PKT); +__hfi1_trace_def(PROC); +__hfi1_trace_def(SDMA); +__hfi1_trace_def(LINKVERB); +__hfi1_trace_def(DEBUG); +__hfi1_trace_def(SNOOP); +__hfi1_trace_def(CNTR); +__hfi1_trace_def(PIO); +__hfi1_trace_def(DC8051); +__hfi1_trace_def(FIRMWARE); +__hfi1_trace_def(RCVCTRL); +__hfi1_trace_def(TID); +__hfi1_trace_def(MMU); +__hfi1_trace_def(IOCTL); + +#define hfi1_cdbg(which, fmt, ...) \ + __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) + +#define hfi1_dbg(fmt, ...) \ + hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__) + +/* + * Define HFI1_EARLY_DBG at compile time or here to enable early trace + * messages. Do not check in an enablement for this. + */ + +#ifdef HFI1_EARLY_DBG +#define hfi1_dbg_early(fmt, ...) \ + trace_printk(fmt, ##__VA_ARGS__) +#else +#define hfi1_dbg_early(fmt, ...) +#endif + +#endif /* __HFI1_TRACE_EXTRA_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_dbg +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h new file mode 100644 index 000000000000..c3e41aed0034 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -0,0 +1,209 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_IBHDRS_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_ibhdrs + +u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); +const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); + +#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) + +#define lrh_name(lrh) { HFI1_##lrh, #lrh } +#define show_lnh(lrh) \ +__print_symbolic(lrh, \ + lrh_name(LRH_BTH), \ + lrh_name(LRH_GRH)) + +#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" +#define BTH_PRN \ + "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ + "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" +#define EHDR_PRN "%s" + +DECLARE_EVENT_CLASS(hfi1_ibhdr_template, + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + /* LRH */ + __field(u8, vl) + __field(u8, lver) + __field(u8, sl) + __field(u8, lnh) + __field(u16, dlid) + __field(u16, len) + __field(u16, slid) + /* BTH */ + __field(u8, opcode) + __field(u8, se) + __field(u8, m) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, f) + __field(u8, b) + __field(u32, qpn) + __field(u8, a) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + ), + TP_fast_assign( + struct hfi1_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + /* LRH */ + __entry->vl = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); + __entry->lver = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; + __entry->sl = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + __entry->lnh = + (u8)(be16_to_cpu(hdr->lrh[0]) & 3); + __entry->dlid = + be16_to_cpu(hdr->lrh[1]); + /* allow for larger len */ + __entry->len = + be16_to_cpu(hdr->lrh[2]); + __entry->slid = + be16_to_cpu(hdr->lrh[3]); + /* BTH */ + if (__entry->lnh == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + __entry->opcode = + (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; + __entry->se = + (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; + __entry->m = + (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; + __entry->pad = + (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + __entry->tver = + (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; + __entry->pkey = + be32_to_cpu(ohdr->bth[0]) & 0xffff; + __entry->f = + (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & + HFI1_FECN_MASK; + __entry->b = + (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & + HFI1_BECN_MASK; + __entry->qpn = + be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + __entry->a = + (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; + /* allow for larger PSN */ + __entry->psn = + be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + ibhdr_exhdr_len(hdr)); + ), + TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, + __get_str(dev), + /* LRH */ + __entry->vl, + __entry->lver, + __entry->sl, + __entry->lnh, show_lnh(__entry->lnh), + __entry->dlid, + __entry->len, + __entry->slid, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->m, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->f, + __entry->b, + __entry->qpn, + __entry->a, + __entry->psn, + /* extended headers */ + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) +); + +DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +#endif /* __HFI1_TRACE_IBHDRS_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_ibhdrs +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h new file mode 100644 index 000000000000..d308454af7fd --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -0,0 +1,81 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_MISC_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_misc + +TRACE_EVENT(hfi1_interrupt, + TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, + int src), + TP_ARGS(dd, is_entry, src), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __array(char, buf, 64) + __field(int, src) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd) + is_entry->is_name(__entry->buf, 64, + src - is_entry->start); + __entry->src = src; + ), + TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, + __entry->src) +); + +#endif /* __HFI1_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_misc +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h new file mode 100644 index 000000000000..5ea5005f9f41 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_rc.h @@ -0,0 +1,123 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_RC_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_rc + +DECLARE_EVENT_CLASS(hfi1_rc_template, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, s_flags) + __field(u32, psn) + __field(u32, s_psn) + __field(u32, s_next_psn) + __field(u32, s_sending_psn) + __field(u32, s_sending_hpsn) + __field(u32, r_psn) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + __entry->psn = psn; + __entry->s_psn = qp->s_psn; + __entry->s_next_psn = qp->s_next_psn; + __entry->s_sending_psn = qp->s_sending_psn; + __entry->s_sending_hpsn = qp->s_sending_hpsn; + __entry->r_psn = qp->r_psn; + ), + TP_printk( + "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", + __get_str(dev), + __entry->qpn, + __entry->s_flags, + __entry->psn, + __entry->s_psn, + __entry->s_next_psn, + __entry->s_sending_psn, + __entry->s_sending_hpsn, + __entry->r_psn + ) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_sendcomplete, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_ack, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_timeout, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +#endif /* __HFI1_TRACE_RC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rc +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h new file mode 100644 index 000000000000..9ba1f615ec95 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -0,0 +1,322 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_RX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_rx + +TRACE_EVENT(hfi1_rcvhdr, + TP_PROTO(struct hfi1_devdata *dd, + u32 ctxt, + u64 eflags, + u32 etype, + u32 hlen, + u32 tlen, + u32 updegr, + u32 etail + ), + TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u64, eflags) + __field(u32, ctxt) + __field(u32, etype) + __field(u32, hlen) + __field(u32, tlen) + __field(u32, updegr) + __field(u32, etail) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->eflags = eflags; + __entry->ctxt = ctxt; + __entry->etype = etype; + __entry->hlen = hlen; + __entry->tlen = tlen; + __entry->updegr = updegr; + __entry->etail = etail; + ), + TP_printk( + "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", + __get_str(dev), + __entry->ctxt, + __entry->eflags, + __entry->etype, show_packettype(__entry->etype), + __entry->hlen, + __entry->tlen, + __entry->updegr, + __entry->etail + ) +); + +TRACE_EVENT(hfi1_receive_interrupt, + TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), + TP_ARGS(dd, ctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u32, ctxt) + __field(u8, slow_path) + __field(u8, dma_rtail) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt) { + __entry->slow_path = 1; + __entry->dma_rtail = 0xFF; + } else if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt_dma_rtail){ + __entry->dma_rtail = 1; + __entry->slow_path = 0; + } else if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt_nodma_rtail) { + __entry->dma_rtail = 0; + __entry->slow_path = 0; + } + ), + TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", + __get_str(dev), + __entry->ctxt, + __entry->slow_path, + __entry->dma_rtail + ) +); + +TRACE_EVENT(hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, + u32 npages, unsigned long va, unsigned long pa, + dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma + ) + ); + +TRACE_EVENT(hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma + ) + ); + +TRACE_EVENT(hfi1_exp_tid_inval, + TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, + u32 npages, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(unsigned long, va) + __field(u32, rarr) + __field(u32, npages) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->va = va; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->va, + __entry->dma + ) + ); + +TRACE_EVENT(hfi1_mmu_invalidate, + TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type, + unsigned long start, unsigned long end), + TP_ARGS(ctxt, subctxt, type, start, end), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __string(type, type) + __field(unsigned long, start) + __field(unsigned long, end) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __assign_str(type, type); + __entry->start = start; + __entry->end = end; + ), + TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", + __entry->ctxt, + __entry->subctxt, + __get_str(type), + __entry->start, + __entry->end + ) + ); + +#define SNOOP_PRN \ + "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ + "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" + +TRACE_EVENT(snoop_capture, + TP_PROTO(struct hfi1_devdata *dd, + int hdr_len, + struct hfi1_ib_header *hdr, + int data_len, + void *data), + TP_ARGS(dd, hdr_len, hdr, data_len, data), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, slid) + __field(u16, dlid) + __field(u32, qpn) + __field(u8, opcode) + __field(u8, sl) + __field(u16, pkey) + __field(u32, hdr_len) + __field(u32, data_len) + __field(u8, lnh) + __dynamic_array(u8, raw_hdr, hdr_len) + __dynamic_array(u8, raw_pkt, data_len) + ), + TP_fast_assign( + struct hfi1_other_headers *ohdr; + + __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); + if (__entry->lnh == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + DD_DEV_ASSIGN(dd); + __entry->slid = be16_to_cpu(hdr->lrh[3]); + __entry->dlid = be16_to_cpu(hdr->lrh[1]); + __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; + __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; + __entry->hdr_len = hdr_len; + __entry->data_len = data_len; + memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); + memcpy(__get_dynamic_array(raw_pkt), data, data_len); + ), + TP_printk( + "[%s] " SNOOP_PRN, + __get_str(dev), + __entry->slid, + __entry->dlid, + __entry->qpn, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->sl, + __entry->pkey, + __entry->hdr_len, + __entry->data_len + ) +); + +#endif /* __HFI1_TRACE_RX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rx +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h new file mode 100644 index 000000000000..415d6be42c5d --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -0,0 +1,642 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_TX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" +#include "mad.h" +#include "sdma.h" + +const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); + +#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_tx + +TRACE_EVENT(hfi1_piofree, + TP_PROTO(struct send_context *sc, int extra), + TP_ARGS(sc, extra), + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) + __field(u32, sw_index) + __field(u32, hw_context) + __field(int, extra) + ), + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); + __entry->sw_index = sc->sw_index; + __entry->hw_context = sc->hw_context; + __entry->extra = extra; + ), + TP_printk("[%s] ctxt %u(%u) extra %d", + __get_str(dev), + __entry->sw_index, + __entry->hw_context, + __entry->extra + ) +); + +TRACE_EVENT(hfi1_wantpiointr, + TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), + TP_ARGS(sc, needint, credit_ctrl), + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) + __field(u32, sw_index) + __field(u32, hw_context) + __field(u32, needint) + __field(u64, credit_ctrl) + ), + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); + __entry->sw_index = sc->sw_index; + __entry->hw_context = sc->hw_context; + __entry->needint = needint; + __entry->credit_ctrl = credit_ctrl; + ), + TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", + __get_str(dev), + __entry->sw_index, + __entry->hw_context, + __entry->needint, + (unsigned long long)__entry->credit_ctrl + ) +); + +DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, flags) + __field(u32, s_flags) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->flags = flags; + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + ), + TP_printk( + "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", + __get_str(dev), + __entry->qpn, + __entry->flags, + __entry->s_flags + ) +); + +DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags)); + +DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags)); + +TRACE_EVENT(hfi1_sdma_descriptor, + TP_PROTO(struct sdma_engine *sde, + u64 desc0, + u64 desc1, + u16 e, + void *descp), + TP_ARGS(sde, desc0, desc1, e, descp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(void *, descp) + __field(u64, desc0) + __field(u64, desc1) + __field(u16, e) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->desc0 = desc0; + __entry->desc1 = desc1; + __entry->idx = sde->this_idx; + __entry->descp = descp; + __entry->e = e; + ), + TP_printk( + "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", + __get_str(dev), + __entry->idx, + __parse_sdma_flags(__entry->desc0, __entry->desc1), + (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & + SDMA_DESC0_PHY_ADDR_MASK, + (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & + SDMA_DESC1_GENERATION_MASK), + (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & + SDMA_DESC0_BYTE_COUNT_MASK), + __entry->desc0, + __entry->desc1, + __entry->descp, + __entry->e + ) +); + +TRACE_EVENT(hfi1_sdma_engine_select, + TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), + TP_ARGS(dd, sel, vl, idx), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u32, sel) + __field(u8, vl) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->sel = sel; + __entry->vl = vl; + __entry->idx = idx; + ), + TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", + __get_str(dev), + __entry->idx, + __entry->sel, + __entry->vl + ) +); + +DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, status) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->status = status; + __entry->idx = sde->this_idx; + ), + TP_printk("[%s] SDE(%u) status %llx", + __get_str(dev), + __entry->idx, + (unsigned long long)__entry->status + ) +); + +DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status) +); + +DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status) +); + +DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(int, aidx) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->idx = sde->this_idx; + __entry->aidx = aidx; + ), + TP_printk("[%s] SDE(%u) aidx %d", + __get_str(dev), + __entry->idx, + __entry->aidx + ) +); + +DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx)); + +DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx)); + +#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER +TRACE_EVENT(hfi1_sdma_progress, + TP_PROTO(struct sdma_engine *sde, + u16 hwhead, + u16 swhead, + struct sdma_txreq *txp + ), + TP_ARGS(sde, hwhead, swhead, txp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, sn) + __field(u16, hwhead) + __field(u16, swhead) + __field(u16, txnext) + __field(u16, tx_tail) + __field(u16, tx_head) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->hwhead = hwhead; + __entry->swhead = swhead; + __entry->tx_tail = sde->tx_tail; + __entry->tx_head = sde->tx_head; + __entry->txnext = txp ? txp->next_descq_idx : ~0; + __entry->idx = sde->this_idx; + __entry->sn = txp ? txp->sn : ~0; + ), + TP_printk( + "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", + __get_str(dev), + __entry->idx, + __entry->sn, + __entry->hwhead, + __entry->swhead, + __entry->txnext, + __entry->tx_head, + __entry->tx_tail + ) +); +#else +TRACE_EVENT(hfi1_sdma_progress, + TP_PROTO(struct sdma_engine *sde, + u16 hwhead, u16 swhead, + struct sdma_txreq *txp + ), + TP_ARGS(sde, hwhead, swhead, txp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u16, hwhead) + __field(u16, swhead) + __field(u16, txnext) + __field(u16, tx_tail) + __field(u16, tx_head) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->hwhead = hwhead; + __entry->swhead = swhead; + __entry->tx_tail = sde->tx_tail; + __entry->tx_head = sde->tx_head; + __entry->txnext = txp ? txp->next_descq_idx : ~0; + __entry->idx = sde->this_idx; + ), + TP_printk( + "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", + __get_str(dev), + __entry->idx, + __entry->hwhead, + __entry->swhead, + __entry->txnext, + __entry->tx_head, + __entry->tx_tail + ) +); +#endif + +DECLARE_EVENT_CLASS(hfi1_sdma_sn, + TP_PROTO(struct sdma_engine *sde, u64 sn), + TP_ARGS(sde, sn), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, sn) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->sn = sn; + __entry->idx = sde->this_idx; + ), + TP_printk("[%s] SDE(%u) sn %llu", + __get_str(dev), + __entry->idx, + __entry->sn + ) +); + +DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, + TP_PROTO( + struct sdma_engine *sde, + u64 sn + ), + TP_ARGS(sde, sn) +); + +DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, + TP_PROTO(struct sdma_engine *sde, u64 sn), + TP_ARGS(sde, sn) +); + +#define USDMA_HDR_FORMAT \ + "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x" + +TRACE_EVENT(hfi1_sdma_user_header, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, + struct hfi1_pkt_header *hdr, u32 tidval), + TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u8, subctxt) + __field(u16, req) + __field(u32, pbc0) + __field(u32, pbc1) + __field(u32, lrh0) + __field(u32, lrh1) + __field(u32, bth0) + __field(u32, bth1) + __field(u32, bth2) + __field(u32, kdeth0) + __field(u32, kdeth1) + __field(u32, kdeth2) + __field(u32, kdeth3) + __field(u32, kdeth4) + __field(u32, kdeth5) + __field(u32, kdeth6) + __field(u32, kdeth7) + __field(u32, kdeth8) + __field(u32, tidval) + ), + TP_fast_assign( + __le32 *pbc = (__le32 *)hdr->pbc; + __be32 *lrh = (__be32 *)hdr->lrh; + __be32 *bth = (__be32 *)hdr->bth; + __le32 *kdeth = (__le32 *)&hdr->kdeth; + + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->req = req; + __entry->pbc0 = le32_to_cpu(pbc[0]); + __entry->pbc1 = le32_to_cpu(pbc[1]); + __entry->lrh0 = be32_to_cpu(lrh[0]); + __entry->lrh1 = be32_to_cpu(lrh[1]); + __entry->bth0 = be32_to_cpu(bth[0]); + __entry->bth1 = be32_to_cpu(bth[1]); + __entry->bth2 = be32_to_cpu(bth[2]); + __entry->kdeth0 = le32_to_cpu(kdeth[0]); + __entry->kdeth1 = le32_to_cpu(kdeth[1]); + __entry->kdeth2 = le32_to_cpu(kdeth[2]); + __entry->kdeth3 = le32_to_cpu(kdeth[3]); + __entry->kdeth4 = le32_to_cpu(kdeth[4]); + __entry->kdeth5 = le32_to_cpu(kdeth[5]); + __entry->kdeth6 = le32_to_cpu(kdeth[6]); + __entry->kdeth7 = le32_to_cpu(kdeth[7]); + __entry->kdeth8 = le32_to_cpu(kdeth[8]); + __entry->tidval = tidval; + ), + TP_printk(USDMA_HDR_FORMAT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->req, + __entry->pbc1, + __entry->pbc0, + __entry->lrh0, + __entry->lrh1, + __entry->bth0, + __entry->bth1, + __entry->bth2, + __entry->kdeth0, + __entry->kdeth1, + __entry->kdeth2, + __entry->kdeth3, + __entry->kdeth4, + __entry->kdeth5, + __entry->kdeth6, + __entry->kdeth7, + __entry->kdeth8, + __entry->tidval + ) +); + +#define SDMA_UREQ_FMT \ + "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u" +TRACE_EVENT(hfi1_sdma_user_reqinfo, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), + TP_ARGS(dd, ctxt, subctxt, i), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd); + __field(u16, ctxt) + __field(u8, subctxt) + __field(u8, ver_opcode) + __field(u8, iovcnt) + __field(u16, npkts) + __field(u16, fragsize) + __field(u16, comp_idx) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->ver_opcode = i[0] & 0xff; + __entry->iovcnt = (i[0] >> 8) & 0xff; + __entry->npkts = i[1]; + __entry->fragsize = i[2]; + __entry->comp_idx = i[3]; + ), + TP_printk(SDMA_UREQ_FMT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->ver_opcode, + __entry->iovcnt, + __entry->npkts, + __entry->fragsize, + __entry->comp_idx + ) +); + +#define usdma_complete_name(st) { st, #st } +#define show_usdma_complete_state(st) \ + __print_symbolic(st, \ + usdma_complete_name(FREE), \ + usdma_complete_name(QUEUED), \ + usdma_complete_name(COMPLETE), \ + usdma_complete_name(ERROR)) + +TRACE_EVENT(hfi1_sdma_user_completion, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx, + u8 state, int code), + TP_ARGS(dd, ctxt, subctxt, idx, state, code), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u8, subctxt) + __field(u16, idx) + __field(u8, state) + __field(int, code) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->idx = idx; + __entry->state = state; + __entry->code = code; + ), + TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)", + __get_str(dev), __entry->ctxt, __entry->subctxt, + __entry->idx, show_usdma_complete_state(__entry->state), + __entry->code) +); + +const char *print_u32_array(struct trace_seq *, u32 *, int); +#define __print_u32_hex(arr, len) print_u32_array(p, arr, len) + +TRACE_EVENT(hfi1_sdma_user_header_ahg, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, + u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval), + TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u8, subctxt) + __field(u16, req) + __field(u8, sde) + __field(u8, idx) + __field(int, len) + __field(u32, tidval) + __array(u32, ahg, 10) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->req = req; + __entry->sde = sde; + __entry->idx = ahgidx; + __entry->len = len; + __entry->tidval = tidval; + memcpy(__entry->ahg, ahg, len * sizeof(u32)); + ), + TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->req, + __entry->sde, + __entry->idx, + __entry->len - 1, + __print_u32_hex(__entry->ahg, __entry->len), + __entry->tidval + ) +); + +TRACE_EVENT(hfi1_sdma_state, + TP_PROTO(struct sdma_engine *sde, + const char *cstate, + const char *nstate + ), + TP_ARGS(sde, cstate, nstate), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __string(curstate, cstate) + __string(newstate, nstate) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __assign_str(curstate, cstate); + __assign_str(newstate, nstate); + ), + TP_printk("[%s] current state %s new state %s", + __get_str(dev), + __get_str(curstate), + __get_str(newstate) + ) +); + +#define BCT_FORMAT \ + "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]" + +#define BCT(field) \ + be16_to_cpu( \ + ((struct buffer_control *)__get_dynamic_array(bct))->field \ + ) + +DECLARE_EVENT_CLASS(hfi1_bct_template, + TP_PROTO(struct hfi1_devdata *dd, + struct buffer_control *bc), + TP_ARGS(dd, bc), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __dynamic_array(u8, bct, sizeof(*bc)) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + memcpy(__get_dynamic_array(bct), bc, + sizeof(*bc)); + ), + TP_printk(BCT_FORMAT, + BCT(overall_shared_limit), + + BCT(vl[0].dedicated), + BCT(vl[0].shared), + + BCT(vl[1].dedicated), + BCT(vl[1].shared), + + BCT(vl[2].dedicated), + BCT(vl[2].shared), + + BCT(vl[3].dedicated), + BCT(vl[3].shared), + + BCT(vl[4].dedicated), + BCT(vl[4].shared), + + BCT(vl[5].dedicated), + BCT(vl[5].shared), + + BCT(vl[6].dedicated), + BCT(vl[6].shared), + + BCT(vl[7].dedicated), + BCT(vl[7].shared), + + BCT(vl[15].dedicated), + BCT(vl[15].shared) + ) +); + +DEFINE_EVENT(hfi1_bct_template, bct_set, + TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), + TP_ARGS(dd, bc)); + +DEFINE_EVENT(hfi1_bct_template, bct_get, + TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), + TP_ARGS(dd, bc)); + +#endif /* __HFI1_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c deleted file mode 100644 index e82e52a63d35..000000000000 --- a/drivers/infiniband/hw/hfi1/twsi.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> - -#include "hfi.h" -#include "twsi.h" - -/* - * "Two Wire Serial Interface" support. - * - * Originally written for a not-quite-i2c serial eeprom, which is - * still used on some supported boards. Later boards have added a - * variety of other uses, most board-specific, so the bit-boffing - * part has been split off to this file, while the other parts - * have been moved to chip-specific files. - * - * We have also dropped all pretense of fully generic (e.g. pretend - * we don't know whether '1' is the higher voltage) interface, as - * the restrictions of the generic i2c interface (e.g. no access from - * driver itself) make it unsuitable for this use. - */ - -#define READ_CMD 1 -#define WRITE_CMD 0 - -/** - * i2c_wait_for_writes - wait for a write - * @dd: the hfi1_ib device - * - * We use this instead of udelay directly, so we can make sure - * that previous register writes have been flushed all the way - * to the chip. Since we are delaying anyway, the cost doesn't - * hurt, and makes the bit twiddling more regular - */ -static void i2c_wait_for_writes(struct hfi1_devdata *dd, u32 target) -{ - /* - * implicit read of EXTStatus is as good as explicit - * read of scratch, if all we want to do is flush - * writes. - */ - hfi1_gpio_mod(dd, target, 0, 0, 0); - rmb(); /* inlined, so prevent compiler reordering */ -} - -/* - * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that - * for "almost compliant" modules - */ -#define SCL_WAIT_USEC 1000 - -/* BUF_WAIT is time bus must be free between STOP or ACK and to next START. - * Should be 20, but some chips need more. - */ -#define TWSI_BUF_WAIT_USEC 60 - -static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit) -{ - u32 mask; - - udelay(1); - - mask = QSFP_HFI0_I2CCLK; - - /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask); - - /* - * Allow for slow slaves by simple - * delay for falling edge, sampling on rise. - */ - if (!bit) { - udelay(2); - } else { - int rise_usec; - - for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) { - if (mask & hfi1_gpio_mod(dd, target, 0, 0, 0)) - break; - udelay(2); - } - if (rise_usec <= 0) - dd_dev_err(dd, "SCL interface stuck low > %d uSec\n", - SCL_WAIT_USEC); - } - i2c_wait_for_writes(dd, target); -} - -static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait) -{ - u32 read_val, mask; - - mask = QSFP_HFI0_I2CCLK; - /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, 0, mask); - read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); - if (wait) - i2c_wait_for_writes(dd, target); - return (read_val & mask) >> GPIO_SCL_NUM; -} - -static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit) -{ - u32 mask; - - mask = QSFP_HFI0_I2CDAT; - - /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask); - - i2c_wait_for_writes(dd, target); - udelay(2); -} - -static u8 sda_in(struct hfi1_devdata *dd, u32 target, int wait) -{ - u32 read_val, mask; - - mask = QSFP_HFI0_I2CDAT; - /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, 0, mask); - read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); - if (wait) - i2c_wait_for_writes(dd, target); - return (read_val & mask) >> GPIO_SDA_NUM; -} - -/** - * i2c_ackrcv - see if ack following write is true - * @dd: the hfi1_ib device - */ -static int i2c_ackrcv(struct hfi1_devdata *dd, u32 target) -{ - u8 ack_received; - - /* AT ENTRY SCL = LOW */ - /* change direction, ignore data */ - ack_received = sda_in(dd, target, 1); - scl_out(dd, target, 1); - ack_received = sda_in(dd, target, 1) == 0; - scl_out(dd, target, 0); - return ack_received; -} - -static void stop_cmd(struct hfi1_devdata *dd, u32 target); - -/** - * rd_byte - read a byte, sending STOP on last, else ACK - * @dd: the hfi1_ib device - * - * Returns byte shifted out of device - */ -static int rd_byte(struct hfi1_devdata *dd, u32 target, int last) -{ - int bit_cntr, data; - - data = 0; - - for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { - data <<= 1; - scl_out(dd, target, 1); - data |= sda_in(dd, target, 0); - scl_out(dd, target, 0); - } - if (last) { - scl_out(dd, target, 1); - stop_cmd(dd, target); - } else { - sda_out(dd, target, 0); - scl_out(dd, target, 1); - scl_out(dd, target, 0); - sda_out(dd, target, 1); - } - return data; -} - -/** - * wr_byte - write a byte, one bit at a time - * @dd: the hfi1_ib device - * @data: the byte to write - * - * Returns 0 if we got the following ack, otherwise 1 - */ -static int wr_byte(struct hfi1_devdata *dd, u32 target, u8 data) -{ - int bit_cntr; - u8 bit; - - for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { - bit = (data >> bit_cntr) & 1; - sda_out(dd, target, bit); - scl_out(dd, target, 1); - scl_out(dd, target, 0); - } - return (!i2c_ackrcv(dd, target)) ? 1 : 0; -} - -/* - * issue TWSI start sequence: - * (both clock/data high, clock high, data low while clock is high) - */ -static void start_seq(struct hfi1_devdata *dd, u32 target) -{ - sda_out(dd, target, 1); - scl_out(dd, target, 1); - sda_out(dd, target, 0); - udelay(1); - scl_out(dd, target, 0); -} - -/** - * stop_seq - transmit the stop sequence - * @dd: the hfi1_ib device - * - * (both clock/data low, clock high, data high while clock is high) - */ -static void stop_seq(struct hfi1_devdata *dd, u32 target) -{ - scl_out(dd, target, 0); - sda_out(dd, target, 0); - scl_out(dd, target, 1); - sda_out(dd, target, 1); -} - -/** - * stop_cmd - transmit the stop condition - * @dd: the hfi1_ib device - * - * (both clock/data low, clock high, data high while clock is high) - */ -static void stop_cmd(struct hfi1_devdata *dd, u32 target) -{ - stop_seq(dd, target); - udelay(TWSI_BUF_WAIT_USEC); -} - -/** - * hfi1_twsi_reset - reset I2C communication - * @dd: the hfi1_ib device - * returns 0 if ok, -EIO on error - */ -int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target) -{ - int clock_cycles_left = 9; - u32 mask; - - /* Both SCL and SDA should be high. If not, there - * is something wrong. - */ - mask = QSFP_HFI0_I2CCLK | QSFP_HFI0_I2CDAT; - - /* - * Force pins to desired innocuous state. - * This is the default power-on state with out=0 and dir=0, - * So tri-stated and should be floating high (barring HW problems) - */ - hfi1_gpio_mod(dd, target, 0, 0, mask); - - /* Check if SCL is low, if it is low then we have a slave device - * misbehaving and there is not much we can do. - */ - if (!scl_in(dd, target, 0)) - return -EIO; - - /* Check if SDA is low, if it is low then we have to clock SDA - * up to 9 times for the device to release the bus - */ - while (clock_cycles_left--) { - if (sda_in(dd, target, 0)) - return 0; - scl_out(dd, target, 0); - scl_out(dd, target, 1); - } - - return -EIO; -} - -#define HFI1_TWSI_START 0x100 -#define HFI1_TWSI_STOP 0x200 - -/* Write byte to TWSI, optionally prefixed with START or suffixed with - * STOP. - * returns 0 if OK (ACK received), else != 0 - */ -static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags) -{ - int ret = 1; - - if (flags & HFI1_TWSI_START) - start_seq(dd, target); - - /* Leaves SCL low (from i2c_ackrcv()) */ - ret = wr_byte(dd, target, data); - - if (flags & HFI1_TWSI_STOP) - stop_cmd(dd, target); - return ret; -} - -/* Added functionality for IBA7220-based cards */ -#define HFI1_TEMP_DEV 0x98 - -/* - * hfi1_twsi_blk_rd - * General interface for data transfer from twsi devices. - * One vestige of its former role is that it recognizes a device - * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, - * which responded to all TWSI device codes, interpreting them as - * address within device. On all other devices found on board handled by - * this driver, the device is followed by a N-byte "address" which selects - * the "register" or "offset" within the device from which data should - * be read. - */ -int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, - void *buffer, int len) -{ - u8 *bp = buffer; - int ret = 1; - int i; - int offset_size; - - /* obtain the offset size, strip it from the device address */ - offset_size = (dev >> 8) & 0xff; - dev &= 0xff; - - /* allow at most a 2 byte offset */ - if (offset_size > 2) - goto bail; - - if (dev == HFI1_TWSI_NO_DEV) { - /* legacy not-really-I2C */ - addr = (addr << 1) | READ_CMD; - ret = twsi_wr(dd, target, addr, HFI1_TWSI_START); - } else { - /* Actual I2C */ - if (offset_size) { - ret = twsi_wr(dd, target, - dev | WRITE_CMD, HFI1_TWSI_START); - if (ret) { - stop_cmd(dd, target); - goto bail; - } - - for (i = 0; i < offset_size; i++) { - ret = twsi_wr(dd, target, - (addr >> (i * 8)) & 0xff, 0); - udelay(TWSI_BUF_WAIT_USEC); - if (ret) { - dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", - i, addr); - goto bail; - } - } - } - ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START); - } - if (ret) { - stop_cmd(dd, target); - goto bail; - } - - /* - * block devices keeps clocking data out as long as we ack, - * automatically incrementing the address. Some have "pages" - * whose boundaries will not be crossed, but the handling - * of these is left to the caller, who is in a better - * position to know. - */ - while (len-- > 0) { - /* - * Get and store data, sending ACK if length remaining, - * else STOP - */ - *bp++ = rd_byte(dd, target, !len); - } - - ret = 0; - -bail: - return ret; -} - -/* - * hfi1_twsi_blk_wr - * General interface for data transfer to twsi devices. - * One vestige of its former role is that it recognizes a device - * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, - * which responded to all TWSI device codes, interpreting them as - * address within device. On all other devices found on board handled by - * this driver, the device is followed by a N-byte "address" which selects - * the "register" or "offset" within the device to which data should - * be written. - */ -int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, - const void *buffer, int len) -{ - const u8 *bp = buffer; - int ret = 1; - int i; - int offset_size; - - /* obtain the offset size, strip it from the device address */ - offset_size = (dev >> 8) & 0xff; - dev &= 0xff; - - /* allow at most a 2 byte offset */ - if (offset_size > 2) - goto bail; - - if (dev == HFI1_TWSI_NO_DEV) { - if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD, - HFI1_TWSI_START)) { - goto failed_write; - } - } else { - /* Real I2C */ - if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START)) - goto failed_write; - } - - for (i = 0; i < offset_size; i++) { - ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0); - udelay(TWSI_BUF_WAIT_USEC); - if (ret) { - dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", - i, addr); - goto bail; - } - } - - for (i = 0; i < len; i++) - if (twsi_wr(dd, target, *bp++, 0)) - goto failed_write; - - ret = 0; - -failed_write: - stop_cmd(dd, target); - -bail: - return ret; -} diff --git a/drivers/infiniband/hw/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h deleted file mode 100644 index 5b8a5b5e7eae..000000000000 --- a/drivers/infiniband/hw/hfi1/twsi.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _TWSI_H -#define _TWSI_H -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#define HFI1_TWSI_NO_DEV 0xFF - -struct hfi1_devdata; - -/* Bit position of SDA/SCL pins in ASIC_QSFP* registers */ -#define GPIO_SDA_NUM 1 -#define GPIO_SCL_NUM 0 - -/* these functions must be called with qsfp_lock held */ -int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target); -int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, - void *buffer, int len); -int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, - const void *buffer, int len); - -#endif /* _TWSI_H */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index df773d433297..a726d96d185f 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -119,6 +119,31 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; } /* + * Local operations are processed immediately + * after all prior requests have completed. + */ + if (wqe->wr.opcode == IB_WR_REG_MR || + wqe->wr.opcode == IB_WR_LOCAL_INV) { + int local_ops = 0; + int err = 0; + + if (qp->s_last != qp->s_cur) + goto bail; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { + err = rvt_invalidate_rkey( + qp, wqe->wr.ex.invalidate_rkey); + local_ops = 1; + } + hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR + : IB_WC_SUCCESS); + if (local_ops) + atomic_dec(&qp->local_ops_pending); + qp->s_hdrwords = 0; + goto done_free_tx; + } + /* * Start a new request. */ qp->s_psn = wqe->psn; @@ -294,46 +319,12 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; int has_grh = rcv_flags & HFI1_HAS_GRH; int ret; - u32 bth1; bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) return; - bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { - if (bth1 & HFI1_BECN_SMASK) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 rqpn, lqpn; - u16 rlid = be16_to_cpu(hdr->lrh[3]); - u8 sl, sc5; - - lqpn = bth1 & RVT_QPN_MASK; - rqpn = qp->remote_qpn; - - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; - sl = ibp->sc_to_sl[sc5]; - - process_becn(ppd, sl, rlid, lqpn, rqpn, - IB_CC_SVCTYPE_UC); - } - - if (bth1 & HFI1_FECN_SMASK) { - struct ib_grh *grh = NULL; - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); - u16 slid = be16_to_cpu(hdr->lrh[3]); - u16 dlid = be16_to_cpu(hdr->lrh[1]); - u32 src_qp = qp->remote_qpn; - u8 sc5; - - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; - if (has_grh) - grh = &hdr->u.l.grh; - - return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, - grh); - } - } + process_ecn(qp, packet, true); psn = be32_to_cpu(ohdr->bth[2]); opcode = (bth0 >> 24) & 0xff; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index be91f6fa1c87..f01e8e1d62d3 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -184,8 +184,12 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } if (ah_attr->ah_flags & IB_AH_GRH) { - hfi1_copy_sge(&qp->r_sge, &ah_attr->grh, - sizeof(struct ib_grh), 1, 0); + struct ib_grh grh; + struct ib_global_route grd = ah_attr->grh; + + hfi1_make_grh(ibp, &grh, &grd, 0, 0); + hfi1_copy_sge(&qp->r_sge, &grh, + sizeof(grh), 1, 0); wc.wc_flags |= IB_WC_GRH; } else { hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); @@ -430,10 +434,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ - priv->s_hdr->ahgcount = 0; - priv->s_hdr->ahgidx = 0; - priv->s_hdr->tx_flags = 0; - priv->s_hdr->sde = NULL; + priv->s_ahg->ahgcount = 0; + priv->s_ahg->ahgidx = 0; + priv->s_ahg->tx_flags = 0; /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; @@ -665,13 +668,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) struct hfi1_other_headers *ohdr = packet->ohdr; int opcode; u32 hdrsize = packet->hlen; - u32 pad; struct ib_wc wc; u32 qkey; u32 src_qp; u16 dlid, pkey; int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; @@ -680,52 +683,33 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) bool has_grh = rcv_flags & HFI1_HAS_GRH; u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); u32 bth1; - int is_mcast; - struct ib_grh *grh = NULL; + u8 sl_from_sc, sl; + u16 slid; + u8 extra_bytes; qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; dlid = be16_to_cpu(hdr->lrh[1]); - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & HFI1_BECN_SMASK)) { - /* - * In pre-B0 h/w the CNP_OPCODE is handled via an - * error path. - */ - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - u8 sl; - - sl = ibp->sc_to_sl[sc5]; - - process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD); - } + slid = be16_to_cpu(hdr->lrh[3]); + pkey = (u16)be32_to_cpu(ohdr->bth[0]); + sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + extra_bytes += (SIZE_OF_CRC << 2); + sl_from_sc = ibp->sc_to_sl[sc5]; - /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; opcode &= 0xff; - pkey = (u16)be32_to_cpu(ohdr->bth[0]); - - if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) { - u16 slid = be16_to_cpu(hdr->lrh[3]); - - return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh); - } + process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by * and drop incomplete packets. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto drop; - tlen -= hdrsize + pad + 4; + tlen -= hdrsize + extra_bytes; /* * Check that the permissive LID is only used on QP0 @@ -736,10 +720,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) hdr->lrh[3] == IB_LID_PERMISSIVE)) goto drop; if (qp->ibqp.qp_num > 1) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid; - - slid = be16_to_cpu(hdr->lrh[3]); if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { /* * Traps will not be sent for packets dropped @@ -748,12 +728,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * IB spec (release 1.3, section 10.9.4) */ hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - pkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & - 0xF, + pkey, sl, src_qp, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + slid, dlid); return; } } else { @@ -763,22 +740,18 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) goto drop; } if (unlikely(qkey != qp->qkey)) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, src_qp, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + slid, dlid); return; } /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && - (tlen > 2048 || - (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) + (tlen > 2048 || (sc5 == 0xF)))) goto drop; } else { /* Received on QP0, and so by definition, this is an SMP */ struct opa_smp *smp = (struct opa_smp *)data; - u16 slid = be16_to_cpu(hdr->lrh[3]); if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp)) goto drop; @@ -861,7 +834,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qp->ibqp.qp_type == IB_QPT_SMI) { if (mgmt_pkey_idx < 0) { if (net_ratelimit()) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_devdata *dd = ppd->dd; dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n", @@ -874,8 +846,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) wc.pkey_index = 0; } - wc.slid = be16_to_cpu(hdr->lrh[3]); - wc.sl = ibp->sc_to_sl[sc5]; + wc.slid = slid; + wc.sl = sl_from_sc; /* * Save the LMC lower bits if the destination LID is a unicast LID. diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 1b640a35b3fe..64d26525435a 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -82,24 +82,25 @@ struct tid_pageset { ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, - struct rb_root *); + struct hfi1_filedata *); static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); -static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, - struct mm_struct *); -static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); +static int tid_rb_insert(void *, struct mmu_rb_node *); +static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, + struct tid_rb_node *tnode); +static void tid_rb_remove(void *, struct mmu_rb_node *); +static int tid_rb_invalidate(void *, struct mmu_rb_node *); static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, u32 *, unsigned *, unsigned *); static int unprogram_rcvarray(struct file *, u32, struct tid_group **); -static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static struct mmu_rb_ops tid_rb_ops = { - .insert = mmu_rb_insert, - .remove = mmu_rb_remove, - .invalidate = mmu_rb_invalidate + .insert = tid_rb_insert, + .remove = tid_rb_remove, + .invalidate = tid_rb_invalidate }; static inline u32 rcventry2tidinfo(u32 rcventry) @@ -162,7 +163,6 @@ int hfi1_user_exp_rcv_init(struct file *fp) spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); - fd->tid_rb_root = RB_ROOT; if (!uctxt->subctxt_cnt || !fd->subctxt) { exp_tid_group_init(&uctxt->tid_group_list); @@ -197,7 +197,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) if (!fd->entry_to_rb) return -ENOMEM; - if (!HFI1_CAP_IS_USET(TID_UNMAP)) { + if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { fd->invalid_tid_idx = 0; fd->invalid_tids = kzalloc(uctxt->expected_count * sizeof(u32), GFP_KERNEL); @@ -208,15 +208,15 @@ int hfi1_user_exp_rcv_init(struct file *fp) /* * Register MMU notifier callbacks. If the registration - * fails, continue but turn off the TID caching for - * all user contexts. + * fails, continue without TID caching for this context. */ - ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops); + ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, + dd->pport->hfi1_wq, + &fd->handler); if (ret) { dd_dev_info(dd, "Failed MMU notifier registration %d\n", ret); - HFI1_CAP_USET(TID_UNMAP); ret = 0; } } @@ -235,7 +235,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) * init. */ spin_lock(&fd->tid_lock); - if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { + if (uctxt->subctxt_cnt && fd->handler) { u16 remainder; fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; @@ -261,18 +261,16 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) * The notifier would have been removed when the process'es mm * was freed. */ - if (!HFI1_CAP_IS_USET(TID_UNMAP)) - hfi1_mmu_rb_unregister(&fd->tid_rb_root); + if (fd->handler) + hfi1_mmu_rb_unregister(fd->handler); kfree(fd->invalid_tids); if (!uctxt->cnt) { if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) - unlock_exp_tids(uctxt, &uctxt->tid_full_list, - &fd->tid_rb_root); + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) - unlock_exp_tids(uctxt, &uctxt->tid_used_list, - &fd->tid_rb_root); + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, list) { list_del_init(&grp->list); @@ -399,12 +397,12 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) * pages, accept the amount pinned so far and program only that. * User space knows how to deal with partially programmed buffers. */ - if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) { + if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { ret = -ENOMEM; goto bail; } - pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); + pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); if (pinned <= 0) { ret = pinned; goto bail; @@ -559,7 +557,7 @@ nomem: * for example), unpin all unmapped pages so we can pin them nex time. */ if (mapped_pages != pinned) { - hfi1_release_user_pages(current->mm, &pages[mapped_pages], + hfi1_release_user_pages(fd->mm, &pages[mapped_pages], pinned - mapped_pages, false); fd->tid_n_pinned -= pinned - mapped_pages; @@ -829,7 +827,6 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; - struct rb_root *root = &fd->tid_rb_root; dma_addr_t phys; /* @@ -861,10 +858,10 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - if (HFI1_CAP_IS_USET(TID_UNMAP)) - ret = mmu_rb_insert(root, &node->mmu); + if (!fd->handler) + ret = tid_rb_insert(fd, &node->mmu); else - ret = hfi1_mmu_rb_insert(root, &node->mmu); + ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); if (ret) { hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", @@ -904,19 +901,19 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, node = fd->entry_to_rb[rcventry]; if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - if (HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL); - else - hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); if (grp) *grp = node->grp; - clear_tid_node(fd, fd->subctxt, node); + + if (!fd->handler) + cacheless_tid_rb_remove(fd, node); + else + hfi1_mmu_rb_remove(fd->handler, &node->mmu); + return 0; } -static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, - struct tid_rb_node *node) +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -934,7 +931,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); - hfi1_release_user_pages(current->mm, node->pages, node->npages, true); + hfi1_release_user_pages(fd->mm, node->pages, node->npages, true); fd->tid_n_pinned -= node->npages; node->grp->used--; @@ -949,12 +946,15 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, kfree(node); } +/* + * As a simple helper for hfi1_user_exp_rcv_free, this function deals with + * clearing nodes in the non-cached case. + */ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, - struct exp_tid_set *set, struct rb_root *root) + struct exp_tid_set *set, + struct hfi1_filedata *fd) { struct tid_group *grp, *ptr; - struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, - tid_rb_root); int i; list_for_each_entry_safe(grp, ptr, &set->list, list) { @@ -969,22 +969,23 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, uctxt->expected_base]; if (!node || node->rcventry != rcventry) continue; - if (HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_rb_remove(&fd->tid_rb_root, - &node->mmu, NULL); - else - hfi1_mmu_rb_remove(&fd->tid_rb_root, - &node->mmu); - clear_tid_node(fd, -1, node); + + cacheless_tid_rb_remove(fd, node); } } } } -static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) +/* + * Always return 0 from this function. A non-zero return indicates that the + * remove operation will be called and that memory should be unpinned. + * However, the driver cannot unpin out from under PSM. Instead, retain the + * memory (by returning 0) and inform PSM that the memory is going away. PSM + * will call back later when it has removed the memory from its list. + */ +static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) { - struct hfi1_filedata *fdata = - container_of(root, struct hfi1_filedata, tid_rb_root); + struct hfi1_filedata *fdata = arg; struct hfi1_ctxtdata *uctxt = fdata->uctxt; struct tid_rb_node *node = container_of(mnode, struct tid_rb_node, mmu); @@ -1025,10 +1026,9 @@ static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) return 0; } -static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) +static int tid_rb_insert(void *arg, struct mmu_rb_node *node) { - struct hfi1_filedata *fdata = - container_of(root, struct hfi1_filedata, tid_rb_root); + struct hfi1_filedata *fdata = arg; struct tid_rb_node *tnode = container_of(node, struct tid_rb_node, mmu); u32 base = fdata->uctxt->expected_base; @@ -1037,14 +1037,20 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) return 0; } -static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, - struct mm_struct *mm) +static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, + struct tid_rb_node *tnode) { - struct hfi1_filedata *fdata = - container_of(root, struct hfi1_filedata, tid_rb_root); - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); u32 base = fdata->uctxt->expected_base; fdata->entry_to_rb[tnode->rcventry - base] = NULL; + clear_tid_node(fdata, tnode); +} + +static void tid_rb_remove(void *arg, struct mmu_rb_node *node) +{ + struct hfi1_filedata *fdata = arg; + struct tid_rb_node *tnode = + container_of(node, struct tid_rb_node, mmu); + + cacheless_tid_rb_remove(fdata, tnode); } diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 88e10b5f55f1..20f4ddcac3b0 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -68,7 +68,8 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)"); * could keeping caching buffers. * */ -bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) +bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, + u32 nlocked, u32 npages) { unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, size = (cache_size * (1UL << 20)); /* convert to bytes */ @@ -89,9 +90,9 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) /* Convert to number of pages */ size = DIV_ROUND_UP(size, PAGE_SIZE); - down_read(¤t->mm->mmap_sem); - pinned = current->mm->pinned_vm; - up_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); + pinned = mm->pinned_vm; + up_read(&mm->mmap_sem); /* First, check the absolute limit against all pinned pages. */ if (pinned + npages >= ulimit && !can_lock) @@ -100,8 +101,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) return ((nlocked + npages) <= size) || can_lock; } -int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, - struct page **pages) +int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages, + bool writable, struct page **pages) { int ret; @@ -109,9 +110,9 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, if (ret < 0) return ret; - down_write(¤t->mm->mmap_sem); - current->mm->pinned_vm += ret; - up_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); + mm->pinned_vm += ret; + up_write(&mm->mmap_sem); return ret; } diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 47ffd273ecbd..0ecf27903dc2 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -145,7 +145,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Last packet in the request */ #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) -#define SDMA_REQ_IN_USE 0 +/* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 #define SDMA_REQ_SEND_DONE 2 #define SDMA_REQ_HAVE_AHG 3 @@ -183,16 +183,18 @@ struct user_sdma_iovec { struct sdma_mmu_node *node; }; -#define SDMA_CACHE_NODE_EVICT 0 - struct sdma_mmu_node { struct mmu_rb_node rb; - struct list_head list; struct hfi1_user_sdma_pkt_q *pq; atomic_t refcount; struct page **pages; unsigned npages; - unsigned long flags; +}; + +/* evict operation argument */ +struct evict_data { + u32 cleared; /* count evicted so far */ + u32 target; /* target count to evict */ }; struct user_sdma_request { @@ -305,14 +307,16 @@ static int defer_packet_queue( unsigned seq); static void activate_packet_queue(struct iowait *, int); static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); -static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, - struct mm_struct *); -static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); +static int sdma_rb_insert(void *, struct mmu_rb_node *); +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, + void *arg2, bool *stop); +static void sdma_rb_remove(void *, struct mmu_rb_node *); +static int sdma_rb_invalidate(void *, struct mmu_rb_node *); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, .insert = sdma_rb_insert, + .evict = sdma_rb_evict, .remove = sdma_rb_remove, .invalidate = sdma_rb_invalidate }; @@ -397,6 +401,11 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) if (!pq->reqs) goto pq_reqs_nomem; + memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long); + pq->req_in_use = kzalloc(memsize, GFP_KERNEL); + if (!pq->req_in_use) + goto pq_reqs_no_in_use; + INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; @@ -405,9 +414,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); - pq->sdma_rb_root = RB_ROOT; - INIT_LIST_HEAD(&pq->evict); - spin_lock_init(&pq->evict_lock); + atomic_set(&pq->n_locked, 0); + pq->mm = fd->mm; iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); @@ -437,7 +445,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) cq->nentries = hfi1_sdma_comp_ring_size; fd->cq = cq; - ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops); + ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, + &pq->handler); if (ret) { dd_dev_err(dd, "Failed to register with MMU %d", ret); goto done; @@ -453,6 +462,8 @@ cq_comps_nomem: cq_nomem: kmem_cache_destroy(pq->txreq_cache); pq_txreq_nomem: + kfree(pq->req_in_use); +pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: kfree(pq); @@ -472,8 +483,9 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, uctxt->ctxt, fd->subctxt); pq = fd->pq; - hfi1_mmu_rb_unregister(&pq->sdma_rb_root); if (pq) { + if (pq->handler) + hfi1_mmu_rb_unregister(pq->handler); spin_lock_irqsave(&uctxt->sdma_qlock, flags); if (!list_empty(&pq->list)) list_del_init(&pq->list); @@ -484,6 +496,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) pq->wait, (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); kfree(pq->reqs); + kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); fd->pq = NULL; @@ -496,10 +509,31 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) return 0; } +static u8 dlid_to_selector(u16 dlid) +{ + static u8 mapping[256]; + static int initialized; + static u8 next; + int hash; + + if (!initialized) { + memset(mapping, 0xFF, 256); + initialized = 1; + } + + hash = ((dlid >> 8) ^ dlid) & 0xFF; + if (mapping[hash] == 0xFF) { + mapping[hash] = next; + next = (next + 1) & 0x7F; + } + + return mapping[hash]; +} + int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, unsigned long dim, unsigned long *count) { - int ret = 0, i = 0; + int ret = 0, i; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq = fd->pq; @@ -511,6 +545,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct user_sdma_request *req; u8 opcode, sc, vl; int req_queued = 0; + u16 dlid; + u8 selector; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -529,30 +565,48 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); - if (cq->comps[info.comp_idx].status == QUEUED || - test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) { - hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state", - dd->unit, uctxt->ctxt, fd->subctxt, - info.comp_idx); - return -EBADSLT; + + if (info.comp_idx >= hfi1_sdma_comp_ring_size) { + hfi1_cdbg(SDMA, + "[%u:%u:%u:%u] Invalid comp index", + dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); + return -EINVAL; } + + /* + * Sanity check the header io vector count. Need at least 1 vector + * (header) and cannot be larger than the actual io vector count. + */ + if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { + hfi1_cdbg(SDMA, + "[%u:%u:%u:%u] Invalid iov count %d, dim %ld", + dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, + req_iovcnt(info.ctrl), dim); + return -EINVAL; + } + if (!info.fragsize) { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Request does not specify fragsize", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); return -EINVAL; } + + /* Try to claim the request. */ + if (test_and_set_bit(info.comp_idx, pq->req_in_use)) { + hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use", + dd->unit, uctxt->ctxt, fd->subctxt, + info.comp_idx); + return -EBADSLT; + } /* - * We've done all the safety checks that we can up to this point, - * "allocate" the request entry. + * All safety checks have been done and this request has been claimed. */ hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; memset(req, 0, sizeof(*req)); - /* Mark the request as IN_USE before we start filling it in. */ - set_bit(SDMA_REQ_IN_USE, &req->flags); - req->data_iovs = req_iovcnt(info.ctrl) - 1; + req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ req->pq = pq; req->cq = cq; req->status = -1; @@ -560,13 +614,22 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, memcpy(&req->info, &info, sizeof(info)); - if (req_opcode(info.ctrl) == EXPECTED) + if (req_opcode(info.ctrl) == EXPECTED) { + /* expected must have a TID info and at least one data vector */ + if (req->data_iovs < 2) { + SDMA_DBG(req, + "Not enough vectors for expected request"); + ret = -EINVAL; + goto free_req; + } req->data_iovs--; + } if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs, MAX_VECTORS_PER_REQ); - return -EINVAL; + ret = -EINVAL; + goto free_req; } /* Copy the header from the user buffer */ ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), @@ -634,7 +697,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, idx++; /* Save all the IO vector structures */ - while (i < req->data_iovs) { + for (i = 0; i < req->data_iovs; i++) { INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); ret = pin_vector_pages(req, &req->iovs[i]); @@ -642,7 +705,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, req->status = ret; goto free_req; } - req->data_len += req->iovs[i++].iov.iov_len; + req->data_len += req->iovs[i].iov.iov_len; } SDMA_DBG(req, "total data length %u", req->data_len); @@ -686,9 +749,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, idx++; } + dlid = be16_to_cpu(req->hdr.lrh[1]); + selector = dlid_to_selector(dlid); + /* Have to select the engine */ req->sde = sdma_select_engine_vl(dd, - (u32)(uctxt->ctxt + fd->subctxt), + (u32)(uctxt->ctxt + fd->subctxt + + selector), vl); if (!req->sde || !sdma_running(req->sde)) { ret = -ECOMM; @@ -766,14 +833,21 @@ static inline u32 compute_data_length(struct user_sdma_request *req, * The size of the data of the first packet is in the header * template. However, it includes the header and ICRC, which need * to be subtracted. + * The minimum representable packet data length in a header is 4 bytes, + * therefore, when the data length request is less than 4 bytes, there's + * only one packet, and the packet data length is equal to that of the + * request data length. * The size of the remaining packets is the minimum of the frag * size (MTU) or remaining data in the request. */ u32 len; if (!req->seqnum) { - len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - - (sizeof(tx->hdr) - 4)); + if (req->data_len < sizeof(u32)) + len = req->data_len; + else + len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - + (sizeof(tx->hdr) - 4)); } else if (req_opcode(req->info.ctrl) == EXPECTED) { u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * PAGE_SIZE; @@ -803,6 +877,13 @@ static inline u32 compute_data_length(struct user_sdma_request *req, return len; } +static inline u32 pad_len(u32 len) +{ + if (len & (sizeof(u32) - 1)) + len += sizeof(u32) - (len & (sizeof(u32) - 1)); + return len; +} + static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) { /* (Size of complete header - size of PBC) + 4B ICRC + data length */ @@ -894,7 +975,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { if (!req->seqnum) { u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); - u32 lrhlen = get_lrh_len(req->hdr, datalen); + u32 lrhlen = get_lrh_len(req->hdr, + pad_len(datalen)); /* * Copy the request header into the tx header * because the HW needs a cacheline-aligned @@ -1048,39 +1130,24 @@ static inline int num_user_pages(const struct iovec *iov) static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) { - u32 cleared = 0; - struct sdma_mmu_node *node, *ptr; - struct list_head to_evict = LIST_HEAD_INIT(to_evict); - - spin_lock(&pq->evict_lock); - list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { - /* Make sure that no one is still using the node. */ - if (!atomic_read(&node->refcount)) { - set_bit(SDMA_CACHE_NODE_EVICT, &node->flags); - list_del_init(&node->list); - list_add(&node->list, &to_evict); - cleared += node->npages; - if (cleared >= npages) - break; - } - } - spin_unlock(&pq->evict_lock); - - list_for_each_entry_safe(node, ptr, &to_evict, list) - hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); + struct evict_data evict_data; - return cleared; + evict_data.cleared = 0; + evict_data.target = npages; + hfi1_mmu_rb_evict(pq->handler, &evict_data); + return evict_data.cleared; } static int pin_vector_pages(struct user_sdma_request *req, - struct user_sdma_iovec *iovec) { + struct user_sdma_iovec *iovec) +{ int ret = 0, pinned, npages, cleared; struct page **pages; struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root, + rb_node = hfi1_mmu_rb_extract(pq->handler, (unsigned long)iovec->iov.iov_base, iovec->iov.iov_len); if (rb_node && !IS_ERR(rb_node)) @@ -1096,7 +1163,6 @@ static int pin_vector_pages(struct user_sdma_request *req, node->rb.addr = (unsigned long)iovec->iov.iov_base; node->pq = pq; atomic_set(&node->refcount, 0); - INIT_LIST_HEAD(&node->list); } npages = num_user_pages(&iovec->iov); @@ -1111,28 +1177,14 @@ static int pin_vector_pages(struct user_sdma_request *req, npages -= node->npages; - /* - * If rb_node is NULL, it means that this is brand new node - * and, therefore not on the eviction list. - * If, however, the rb_node is non-NULL, it means that the - * node is already in RB tree and, therefore on the eviction - * list (nodes are unconditionally inserted in the eviction - * list). In that case, we have to remove the node prior to - * calling the eviction function in order to prevent it from - * freeing this node. - */ - if (rb_node) { - spin_lock(&pq->evict_lock); - list_del_init(&node->list); - spin_unlock(&pq->evict_lock); - } retry: - if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { + if (!hfi1_can_pin_pages(pq->dd, pq->mm, + atomic_read(&pq->n_locked), npages)) { cleared = sdma_cache_evict(pq, npages); if (cleared >= npages) goto retry; } - pinned = hfi1_acquire_user_pages( + pinned = hfi1_acquire_user_pages(pq->mm, ((unsigned long)iovec->iov.iov_base + (node->npages * PAGE_SIZE)), npages, 0, pages + node->npages); @@ -1142,7 +1194,7 @@ retry: goto bail; } if (pinned != npages) { - unpin_vector_pages(current->mm, pages, node->npages, + unpin_vector_pages(pq->mm, pages, node->npages, pinned); ret = -EFAULT; goto bail; @@ -1152,28 +1204,22 @@ retry: node->pages = pages; node->npages += pinned; npages = node->npages; - spin_lock(&pq->evict_lock); - list_add(&node->list, &pq->evict); - pq->n_locked += pinned; - spin_unlock(&pq->evict_lock); + atomic_add(pinned, &pq->n_locked); } iovec->pages = node->pages; iovec->npages = npages; iovec->node = node; - ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); + ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); if (ret) { - spin_lock(&pq->evict_lock); - if (!list_empty(&node->list)) - list_del(&node->list); - pq->n_locked -= node->npages; - spin_unlock(&pq->evict_lock); + atomic_sub(node->npages, &pq->n_locked); + iovec->node = NULL; goto bail; } return 0; bail: if (rb_node) - unpin_vector_pages(current->mm, node->pages, 0, node->npages); + unpin_vector_pages(pq->mm, node->pages, 0, node->npages); kfree(node); return ret; } @@ -1181,7 +1227,7 @@ bail: static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, unsigned start, unsigned npages) { - hfi1_release_user_pages(mm, pages + start, npages, 0); + hfi1_release_user_pages(mm, pages + start, npages, false); kfree(pages); } @@ -1192,16 +1238,14 @@ static int check_header_template(struct user_sdma_request *req, /* * Perform safety checks for any type of packet: * - transfer size is multiple of 64bytes - * - packet length is multiple of 4bytes - * - entire request length is multiple of 4bytes + * - packet length is multiple of 4 bytes * - packet length is not larger than MTU size * * These checks are only done for the first packet of the * transfer since the header is "given" to us by user space. * For the remainder of the packets we compute the values. */ - if (req->info.fragsize % PIO_BLOCK_SIZE || - lrhlen & 0x3 || req->data_len & 0x3 || + if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || lrhlen > get_lrh_len(*hdr, req->info.fragsize)) return -EINVAL; @@ -1263,7 +1307,7 @@ static int set_txreq_header(struct user_sdma_request *req, struct hfi1_pkt_header *hdr = &tx->hdr; u16 pbclen; int ret; - u32 tidval = 0, lrhlen = get_lrh_len(*hdr, datalen); + u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); /* Copy the header template to the request before modification */ memcpy(hdr, &req->hdr, sizeof(*hdr)); @@ -1374,7 +1418,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, len); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ @@ -1534,14 +1578,14 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) continue; if (unpin) - hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, + hfi1_mmu_rb_remove(req->pq->handler, &node->rb); else atomic_dec(&node->refcount); } } kfree(req->tids); - clear_bit(SDMA_REQ_IN_USE, &req->flags); + clear_bit(req->info.comp_idx, req->pq->req_in_use); } static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, @@ -1564,7 +1608,7 @@ static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, return (bool)(node->addr == addr); } -static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) +static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); @@ -1573,48 +1617,45 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) return 0; } -static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, - struct mm_struct *mm) +/* + * Return 1 to remove the node from the rb tree and call the remove op. + * + * Called with the rb tree lock held. + */ +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, + void *evict_arg, bool *stop) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + struct evict_data *evict_data = evict_arg; + + /* is this node still being used? */ + if (atomic_read(&node->refcount)) + return 0; /* keep this node */ + + /* this node will be evicted, add its pages to our count */ + evict_data->cleared += node->npages; + + /* have enough pages been cleared? */ + if (evict_data->cleared >= evict_data->target) + *stop = true; + + return 1; /* remove this node */ +} + +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); - spin_lock(&node->pq->evict_lock); - /* - * We've been called by the MMU notifier but this node has been - * scheduled for eviction. The eviction function will take care - * of freeing this node. - * We have to take the above lock first because we are racing - * against the setting of the bit in the eviction function. - */ - if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) { - spin_unlock(&node->pq->evict_lock); - return; - } + atomic_sub(node->npages, &node->pq->n_locked); - if (!list_empty(&node->list)) - list_del(&node->list); - node->pq->n_locked -= node->npages; - spin_unlock(&node->pq->evict_lock); + unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); - /* - * If mm is set, we are being called by the MMU notifier and we - * should not pass a mm_struct to unpin_vector_page(). This is to - * prevent a deadlock when hfi1_release_user_pages() attempts to - * take the mmap_sem, which the MMU notifier has already taken. - */ - unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0, - node->npages); - /* - * If called by the MMU notifier, we have to adjust the pinned - * page count ourselves. - */ - if (mm) - mm->pinned_vm -= node->npages; kfree(node); } -static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) +static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index b9240e351161..39001714f551 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -63,14 +63,14 @@ struct hfi1_user_sdma_pkt_q { struct hfi1_devdata *dd; struct kmem_cache *txreq_cache; struct user_sdma_request *reqs; + unsigned long *req_in_use; struct iowait busy; unsigned state; wait_queue_head_t wait; unsigned long unpinned; - struct rb_root sdma_rb_root; - u32 n_locked; - struct list_head evict; - spinlock_t evict_lock; /* protect evict and n_locked */ + struct mmu_rb_handler *handler; + atomic_t n_locked; + struct mm_struct *mm; }; struct hfi1_user_sdma_comp_q { diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 849c4b9399d4..2b359540901d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -306,7 +306,10 @@ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR }; /* @@ -378,6 +381,8 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, @@ -540,19 +545,15 @@ void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release) /* * Make sure the QP is ready and able to accept the given opcode. */ -static inline int qp_ok(int opcode, struct hfi1_packet *packet) +static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) { - struct hfi1_ibport *ibp; - if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) - goto dropit; + return NULL; if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || (opcode == IB_OPCODE_CNP)) - return 1; -dropit: - ibp = &packet->rcd->ppd->ibport_data; - ibp->rvp.n_pkt_drops++; - return 0; + return opcode_handler_tbl[opcode]; + + return NULL; } /** @@ -571,6 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; + opcode_handler packet_handler; unsigned long flags; u32 qp_num; int lnh; @@ -616,8 +618,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; spin_lock_irqsave(&packet->qp->r_lock, flags); - if (likely((qp_ok(opcode, packet)))) - opcode_handler_tbl[opcode](packet); + packet_handler = qp_ok(opcode, packet); + if (likely(packet_handler)) + packet_handler(packet); + else + ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } /* @@ -634,8 +639,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; } spin_lock_irqsave(&packet->qp->r_lock, flags); - if (likely((qp_ok(opcode, packet)))) - opcode_handler_tbl[opcode](packet); + packet_handler = qp_ok(opcode, packet); + if (likely(packet_handler)) + packet_handler(packet); + else + ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); rcu_read_unlock(); } @@ -808,19 +816,19 @@ static int build_verbs_tx_desc( struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, - struct ahg_ib_header *ahdr, + struct hfi1_ahg_info *ahg_info, u64 pbc) { int ret = 0; - struct hfi1_pio_header *phdr = &tx->phdr; + struct hfi1_sdma_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; - if (!ahdr->ahgcount) { + if (!ahg_info->ahgcount) { ret = sdma_txinit_ahg( &tx->txreq, - ahdr->tx_flags, + ahg_info->tx_flags, hdrbytes + length, - ahdr->ahgidx, + ahg_info->ahgidx, 0, NULL, 0, @@ -838,11 +846,11 @@ static int build_verbs_tx_desc( } else { ret = sdma_txinit_ahg( &tx->txreq, - ahdr->tx_flags, + ahg_info->tx_flags, length, - ahdr->ahgidx, - ahdr->ahgcount, - ahdr->ahgdesc, + ahg_info->ahgidx, + ahg_info->ahgcount, + ahg_info->ahgdesc, hdrbytes, verbs_sdma_complete); if (ret) @@ -860,7 +868,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; - struct ahg_ib_header *ahdr = priv->s_hdr; + struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; @@ -888,7 +896,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, plen); } tx->wqe = qp->s_wqe; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); + ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); if (unlikely(ret)) goto bail_build; } @@ -1291,19 +1299,24 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + u16 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | + (u64)dc8051_ver_min(ver); rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | + IB_DEVICE_MEM_MGT_EXTENSIONS; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; rdi->dparms.props.hw_ver = dd->minrev; rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; - rdi->dparms.props.max_mr_size = ~0ULL; + rdi->dparms.props.max_mr_size = U64_MAX; + rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; rdi->dparms.props.max_sge = hfi1_max_sges; @@ -1567,6 +1580,17 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } +static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct hfi1_ibdev *dev = dev_from_rdi(rdi); + u16 ver = dd_from_dev(dev)->dc8051_ver; + + snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver)); +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1613,6 +1637,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; + ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; strncpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1680,6 +1705,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); + /* post send table */ + dd->verbs_dev.rdi.post_parms = hfi1_post_parms; + ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) rvt_init_port(&dd->verbs_dev.rdi, @@ -1730,8 +1758,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; - u8 sl, sc5, sc4_bit, svc_type; - bool sc4_set = has_sc4_bit(packet); + u8 sl, sc5, svc_type; switch (packet->qp->ibqp.qp_type) { case IB_QPT_UC: @@ -1754,9 +1781,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc4_bit = sc4_set << 4; - sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; - sc5 |= sc4_bit; + sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 488356775627..d1b101c54828 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -178,16 +178,14 @@ struct hfi1_ib_header { } u; } __packed; -struct ahg_ib_header { - struct sdma_engine *sde; +struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; u8 ahgcount; u8 ahgidx; - struct hfi1_ib_header ibh; }; -struct hfi1_pio_header { +struct hfi1_sdma_header { __le64 pbc; struct hfi1_ib_header hdr; } __packed; @@ -197,7 +195,7 @@ struct hfi1_pio_header { * pair is made common */ struct hfi1_qp_priv { - struct ahg_ib_header *s_hdr; /* next header to send */ + struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ u8 s_sc; /* SC[0..4] for next packet */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index a1d6e0807f97..5660897593ba 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -56,7 +56,7 @@ #include "iowait.h" struct verbs_txreq { - struct hfi1_pio_header phdr; + struct hfi1_sdma_header phdr; struct sdma_txreq txreq; struct rvt_qp *qp; struct rvt_swqe *wqe; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index d2fa72516960..5026dc79978a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1567,12 +1567,12 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( ret = i40iw_manage_qhash(iwdev, cm_info, I40IW_QHASH_TYPE_TCP_SYN, I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false); - kfree(child_listen_node); - cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++; i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "freed pointer = %p\n", child_listen_node); + kfree(child_listen_node); + cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++; } spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index bd942da91a27..2fac1db0e0a0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1557,6 +1557,9 @@ enum i40iw_alignment { #define I40IW_RING_MOVE_TAIL(_ring) \ (_ring).tail = ((_ring).tail + 1) % (_ring).size +#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \ + (_ring).head = ((_ring).head + 1) % (_ring).size + #define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index e9c6e82af9c7..c62d354f7810 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -1025,6 +1025,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, u16 txoffset, bufoffset; buf = i40iw_puda_get_listbuf(pbufl); + if (!buf) + return; nextseqnum = buf->seqnum + fpdu_len; txbuf->totallen = buf->hdrlen + fpdu_len; txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen; @@ -1048,6 +1050,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, fpdu_len -= buf->datalen; i40iw_puda_ret_bufpool(ieq, buf); buf = i40iw_puda_get_listbuf(pbufl); + if (!buf) + return; bufoffset = (u16)(buf->data - (u8 *)buf->mem.va); } while (1); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 16cc61720b53..2b1a04e9ca3c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -667,7 +667,7 @@ struct i40iw_tcp_offload_info { bool time_stamp; u8 cwnd_inc_limit; bool drop_ooo_seg; - bool dup_ack_thresh; + u8 dup_ack_thresh; u8 ttl; u8 src_mac_addr_idx; bool avoid_stretch_ack; diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index e35faea88c13..4d28c3cb03cc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -291,9 +291,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp, i40iw_set_fragment(wqe, 0, op_info->lo_sg_list); - for (i = 1; i < op_info->num_lo_sges; i++) { - byte_off = 32 + (i - 1) * 16; + for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) { i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]); + byte_off += 16; } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -401,9 +401,9 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp, i40iw_set_fragment(wqe, 0, op_info->sg_list); - for (i = 1; i < op_info->num_sges; i++) { - byte_off = 32 + (i - 1) * 16; + for (i = 1, byte_off = 32; i < op_info->num_sges; i++) { i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]); + byte_off += 16; } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -685,9 +685,9 @@ static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp, i40iw_set_fragment(wqe, 0, info->sg_list); - for (i = 1; i < info->num_sges; i++) { - byte_off = 32 + (i - 1) * 16; + for (i = 1, byte_off = 32; i < info->num_sges; i++) { i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]); + byte_off += 16; } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -753,8 +753,7 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq, * @post_cq: update cq tail */ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, - struct i40iw_cq_poll_info *info, - bool post_cq) + struct i40iw_cq_poll_info *info) { u64 comp_ctx, qword0, qword2, qword3, wqe_qword; u64 *cqe, *sw_wqe; @@ -762,7 +761,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, struct i40iw_ring *pring = NULL; u32 wqe_idx, q_type, array_idx = 0; enum i40iw_status_code ret_code = 0; - enum i40iw_status_code ret_code2 = 0; bool move_cq_head = true; u8 polarity; u8 addl_wqes = 0; @@ -870,19 +868,14 @@ exit: move_cq_head = false; if (move_cq_head) { - I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2); - - if (ret_code2 && !ret_code) - ret_code = ret_code2; + I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0) cq->polarity ^= 1; - if (post_cq) { - I40IW_RING_MOVE_TAIL(cq->cq_ring); - set_64bit_val(cq->shadow_area, 0, - I40IW_RING_GETCURRENT_HEAD(cq->cq_ring)); - } + I40IW_RING_MOVE_TAIL(cq->cq_ring); + set_64bit_val(cq->shadow_area, 0, + I40IW_RING_GETCURRENT_HEAD(cq->cq_ring)); } else { if (info->is_srq) return ret_code; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 4627646fe8cd..276bcefffd7e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -327,7 +327,7 @@ struct i40iw_cq_ops { void (*iw_cq_request_notification)(struct i40iw_cq_uk *, enum i40iw_completion_notify); enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *, - struct i40iw_cq_poll_info *, bool); + struct i40iw_cq_poll_info *); enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count); void (*iw_cq_clean)(void *, struct i40iw_cq_uk *); }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 283b64c942ee..2360338877bf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -529,7 +529,7 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev, status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift); if (status) - return -ENOSYS; + return -ENOMEM; sqdepth = sq_size << sqshift; rqdepth = rq_size << rqshift; @@ -671,7 +671,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; if (init_attr->qp_type != IB_QPT_RC) { - err_code = -ENOSYS; + err_code = -EINVAL; goto error; } if (iwdev->push_mode) @@ -1840,6 +1840,7 @@ struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd, iwmr->ibmr.lkey = stag; iwmr->page_cnt = 1; iwmr->pgaddrmem[0] = addr; + iwmr->length = size; status = i40iw_hwreg_mr(iwdev, iwmr, access); if (status) { i40iw_free_stag(iwdev, stag); @@ -1863,7 +1864,7 @@ static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc) { u64 kva = 0; - return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva); + return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva); } /** @@ -1975,18 +1976,6 @@ static ssize_t i40iw_show_rev(struct device *dev, } /** - * i40iw_show_fw_ver - */ -static ssize_t i40iw_show_fw_ver(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u32 firmware_version = I40IW_FW_VERSION; - - return sprintf(buf, "%u.%u\n", firmware_version, - (firmware_version & 0x000000ff)); -} - -/** * i40iw_show_hca */ static ssize_t i40iw_show_hca(struct device *dev, @@ -2006,13 +1995,11 @@ static ssize_t i40iw_show_board(struct device *dev, } static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); static struct device_attribute *i40iw_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -2091,8 +2078,12 @@ static int i40iw_post_send(struct ib_qp *ibqp, ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); } - if (ret) - err = -EIO; + if (ret) { + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; + } break; case IB_WR_RDMA_WRITE: info.op_type = I40IW_OP_TYPE_RDMA_WRITE; @@ -2113,8 +2104,12 @@ static int i40iw_post_send(struct ib_qp *ibqp, ret = ukqp->ops.iw_rdma_write(ukqp, &info, false); } - if (ret) - err = -EIO; + if (ret) { + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; + } break; case IB_WR_RDMA_READ_WITH_INV: inv_stag = true; @@ -2132,15 +2127,19 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false); - if (ret) - err = -EIO; + if (ret) { + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; + } break; case IB_WR_LOCAL_INV: info.op_type = I40IW_OP_TYPE_INV_STAG; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true); if (ret) - err = -EIO; + err = -ENOMEM; break; case IB_WR_REG_MR: { @@ -2174,7 +2173,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); if (ret) - err = -EIO; + err = -ENOMEM; break; } default: @@ -2214,6 +2213,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp, struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT]; enum i40iw_status_code ret = 0; unsigned long flags; + int err = 0; iwqp = (struct i40iw_qp *)ibqp; ukqp = &iwqp->sc_qp.qp_uk; @@ -2228,6 +2228,10 @@ static int i40iw_post_recv(struct ib_qp *ibqp, ret = ukqp->ops.iw_post_receive(ukqp, &post_recv); if (ret) { i40iw_pr_err(" post_recv err %d\n", ret); + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; *bad_wr = ib_wr; goto out; } @@ -2235,9 +2239,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp, } out: spin_unlock_irqrestore(&iwqp->lock, flags); - if (ret) - return -ENOSYS; - return 0; + return err; } /** @@ -2264,7 +2266,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, spin_lock_irqsave(&iwcq->lock, flags); while (cqe_count < num_entries) { - ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true); + ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info); if (ret == I40IW_ERR_QUEUE_EMPTY) { break; } else if (ret == I40IW_ERR_QUEUE_DESTROYED) { @@ -2437,6 +2439,15 @@ static const char * const i40iw_hw_stat_names[] = { "iwRdmaInv" }; +static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + u32 firmware_version = I40IW_FW_VERSION; + + snprintf(str, str_len, "%u.%u", firmware_version, + (firmware_version & 0x000000ff)); +} + /** * i40iw_alloc_hw_stats - Allocate a hw stats structure * @ibdev: device pointer from stack @@ -2528,7 +2539,7 @@ static int i40iw_modify_port(struct ib_device *ibdev, int port_modify_mask, struct ib_port_modify *props) { - return 0; + return -ENOSYS; } /** @@ -2660,6 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; + iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; iwibdev->ibdev.poll_cq = i40iw_poll_cq; iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; iwibdev->ibdev.post_send = i40iw_post_send; @@ -2723,7 +2735,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) iwdev->iwibdev = i40iw_init_rdma_device(iwdev); if (!iwdev->iwibdev) - return -ENOSYS; + return -ENOMEM; iwibdev = iwdev->iwibdev; ret = ib_register_device(&iwibdev->ibdev, NULL); @@ -2748,5 +2760,5 @@ error: kfree(iwdev->iwibdev->ibdev.iwcm); iwdev->iwibdev->ibdev.iwcm = NULL; ib_dealloc_device(&iwdev->iwibdev->ibdev); - return -ENOSYS; + return ret; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 9f8b516eb2b0..d6fc8a6e8c33 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -288,7 +288,7 @@ static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, if (cq->resize_buf) return -EBUSY; - cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL); if (!cq->resize_buf) return -ENOMEM; @@ -316,7 +316,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return -EFAULT; - cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL); if (!cq->resize_buf) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 42a46078d7d5..2af44c2de262 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2025,16 +2025,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), - (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, - (int) dev->dev->caps.fw_ver & 0xffff); -} - static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { @@ -2053,17 +2043,204 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; +struct diag_counter { + const char *name; + u32 offset; +}; + +#define DIAG_COUNTER(_name, _offset) \ + { .name = #_name, .offset = _offset } + +static const struct diag_counter diag_basic[] = { + DIAG_COUNTER(rq_num_lle, 0x00), + DIAG_COUNTER(sq_num_lle, 0x04), + DIAG_COUNTER(rq_num_lqpoe, 0x08), + DIAG_COUNTER(sq_num_lqpoe, 0x0C), + DIAG_COUNTER(rq_num_lpe, 0x18), + DIAG_COUNTER(sq_num_lpe, 0x1C), + DIAG_COUNTER(rq_num_wrfe, 0x20), + DIAG_COUNTER(sq_num_wrfe, 0x24), + DIAG_COUNTER(sq_num_mwbe, 0x2C), + DIAG_COUNTER(sq_num_bre, 0x34), + DIAG_COUNTER(sq_num_rire, 0x44), + DIAG_COUNTER(rq_num_rire, 0x48), + DIAG_COUNTER(sq_num_rae, 0x4C), + DIAG_COUNTER(rq_num_rae, 0x50), + DIAG_COUNTER(sq_num_roe, 0x54), + DIAG_COUNTER(sq_num_tree, 0x5C), + DIAG_COUNTER(sq_num_rree, 0x64), + DIAG_COUNTER(rq_num_rnr, 0x68), + DIAG_COUNTER(sq_num_rnr, 0x6C), + DIAG_COUNTER(rq_num_oos, 0x100), + DIAG_COUNTER(sq_num_oos, 0x104), +}; + +static const struct diag_counter diag_ext[] = { + DIAG_COUNTER(rq_num_dup, 0x130), + DIAG_COUNTER(sq_num_to, 0x134), +}; + +static const struct diag_counter diag_device_only[] = { + DIAG_COUNTER(num_cqovf, 0x1A0), + DIAG_COUNTER(rq_num_udsdprd, 0x118), +}; + +static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_diag_counters *diag = dev->diag_counters; + + if (!diag[!!port_num].name) + return NULL; + + return rdma_alloc_hw_stats_struct(diag[!!port_num].name, + diag[!!port_num].num_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx4_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_diag_counters *diag = dev->diag_counters; + u32 hw_value[ARRAY_SIZE(diag_device_only) + + ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {}; + int ret; + int i; + + ret = mlx4_query_diag_counters(dev->dev, + MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS, + diag[!!port].offset, hw_value, + diag[!!port].num_counters, port); + + if (ret) + return ret; + + for (i = 0; i < diag[!!port].num_counters; i++) + stats->value[i] = hw_value[i]; + + return diag[!!port].num_counters; +} + +static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, + const char ***name, + u32 **offset, + u32 *num, + bool port) +{ + u32 num_counters; + + num_counters = ARRAY_SIZE(diag_basic); + + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) + num_counters += ARRAY_SIZE(diag_ext); + + if (!port) + num_counters += ARRAY_SIZE(diag_device_only); + + *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL); + if (!*name) + return -ENOMEM; + + *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL); + if (!*offset) + goto err_name; + + *num = num_counters; + + return 0; + +err_name: + kfree(*name); + return -ENOMEM; +} + +static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, + const char **name, + u32 *offset, + bool port) +{ + int i; + int j; + + for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) { + name[i] = diag_basic[i].name; + offset[i] = diag_basic[i].offset; + } + + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) { + for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) { + name[j] = diag_ext[i].name; + offset[j] = diag_ext[i].offset; + } + } + + if (!port) { + for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) { + name[j] = diag_device_only[i].name; + offset[j] = diag_device_only[i].offset; + } + } +} + +static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) +{ + struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; + int i; + int ret; + bool per_port = !!(ibdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { + /* i == 1 means we are building port counters */ + if (i && !per_port) + continue; + + ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name, + &diag[i].offset, + &diag[i].num_counters, i); + if (ret) + goto err_alloc; + + mlx4_ib_fill_diag_counters(ibdev, diag[i].name, + diag[i].offset, i); + } + + ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; + ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; + + return 0; + +err_alloc: + if (i) { + kfree(diag[i - 1].name); + kfree(diag[i - 1].offset); + } + + return ret; +} + +static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev) +{ + int i; + + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { + kfree(ibdev->diag_counters[i].offset); + kfree(ibdev->diag_counters[i].name); + } +} + #define MLX4_IB_INVALID_MAC ((u64)-1) static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, struct net_device *dev, @@ -2280,6 +2457,17 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_fw_ver_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d", + (int) (dev->dev->caps.fw_ver >> 32), + (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->dev->caps.fw_ver & 0xffff); +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2413,6 +2601,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; + ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; if (!mlx4_is_slave(ibdev->dev)) { @@ -2555,9 +2744,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (j = 1; j <= ibdev->dev->caps.num_ports; j++) atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]); - if (ib_register_device(&ibdev->ib_dev, NULL)) + if (mlx4_ib_alloc_diag_counters(ibdev)) goto err_steer_free_bitmap; + if (ib_register_device(&ibdev->ib_dev, NULL)) + goto err_diag_counters; + if (mlx4_ib_mad_init(ibdev)) goto err_reg; @@ -2623,6 +2815,9 @@ err_mad: err_reg: ib_unregister_device(&ibdev->ib_dev); +err_diag_counters: + mlx4_ib_diag_cleanup(ibdev); + err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); @@ -2726,6 +2921,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 29acda249612..7c5832ede4bd 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -549,6 +549,14 @@ struct mlx4_ib_counters { u32 default_counter; }; +#define MLX4_DIAG_COUNTERS_TYPES 2 + +struct mlx4_ib_diag_counters { + const char **name; + u32 *offset; + u32 num_counters; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -585,6 +593,7 @@ struct mlx4_ib_dev { /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; + struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES]; }; struct ib_event_work { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9c0e67bd2ba7..308a358e5b46 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } +static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + unsigned int idx; + int np; + int i; + + wq = &qp->sq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + idx = wq->last_poll & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + wq->last_poll = wq->w_list[idx].next; + } + *npolled = np; +} + +static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + int np; + int i; + + wq = &qp->rq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + } + *npolled = np; +} + +static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_qp *qp; + + *npolled = 0; + /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + sw_send_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } + + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + goto out; + } if (unlikely(!list_empty(&cq->wc_list))) soft_polled = poll_soft_wc(cq, num_entries, wc); @@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) if (npolled) mlx5_cq_set_ci(&cq->mcq); - +out: spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) @@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->resize_buf = NULL; cq->resize_umem = NULL; cq->create_flags = attr->flags; + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); if (context) { err = create_cq_user(dev, udata, context, cq, entries, diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 53e03c8ede79..79e6309460dc 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -69,15 +69,6 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } -static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index) -{ - return ++index % gsi->cap.max_send_wr; -} - -#define for_each_outstanding_wr(gsi, index) \ - for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \ - index = next_outstanding(gsi, index)) - /* Call with gsi->lock locked */ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) { @@ -85,8 +76,9 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) struct mlx5_ib_gsi_wr *wr; u32 index; - for_each_outstanding_wr(gsi, index) { - wr = &gsi->outstanding_wrs[index]; + for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; + index++) { + wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr]; if (!wr->completed) break; @@ -430,8 +422,9 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, return -ENOMEM; } - gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; - gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); + gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi % + gsi->cap.max_send_wr]; + gsi->outstanding_pi++; if (!wc) { memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index dad63f038bb8..a84bb766fc62 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -42,11 +42,13 @@ #include <asm/pat.h> #endif #include <linux/sched.h> +#include <linux/delay.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> +#include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> #include <linux/in.h> @@ -457,8 +459,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); + struct mlx5_ib_query_device_resp resp = {}; + size_t resp_len; + u64 max_tso; - if (uhw->inlen || uhw->outlen) + resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); + if (uhw->outlen && uhw->outlen < resp_len) + return -EINVAL; + else + resp.response_length = resp_len; + + if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -511,10 +522,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; - if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - (MLX5_CAP_ETH(dev->mdev, csum_cap))) + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { + if (MLX5_CAP_ETH(mdev, csum_cap)) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); + if (max_tso) { + resp.tso_caps.max_tso = 1 << max_tso; + resp.tso_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + resp.response_length += sizeof(resp.tso_caps); + } + } + } + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; props->device_cap_flags |= IB_DEVICE_UD_TSO; @@ -576,6 +598,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + if (uhw->outlen) { + err = ib_copy_to_udata(uhw, &resp, resp.response_length); + + if (err) + return err; + } + return 0; } @@ -983,6 +1012,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_uars; } + INIT_LIST_HEAD(&context->vma_private_list); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -992,6 +1022,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); + if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; + resp.response_length += sizeof(resp.cmds_supp_uhw); + } + /* * We don't want to expose information from the PCI bar that is located * after 4096 bytes, so if the arch only supports larger pages, let's @@ -1006,8 +1041,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; resp.response_length += sizeof(resp.hca_core_clock_offset) + - sizeof(resp.reserved2) + - sizeof(resp.reserved3); + sizeof(resp.reserved2); } err = ib_copy_to_udata(udata, &resp, resp.response_length); @@ -1086,6 +1120,125 @@ static int get_index(unsigned long offset) return get_arg(offset); } +static void mlx5_ib_vma_open(struct vm_area_struct *area) +{ + /* vma_open is called when a new VMA is created on top of our VMA. This + * is done through either mremap flow or split_vma (usually due to + * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, + * as this VMA is strongly hardware related. Therefore we set the + * vm_ops of the newly created/cloned VMA to NULL, to prevent it from + * calling us again and trying to do incorrect actions. We assume that + * the original VMA size is exactly a single page, and therefore all + * "splitting" operation will not happen to it. + */ + area->vm_ops = NULL; +} + +static void mlx5_ib_vma_close(struct vm_area_struct *area) +{ + struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; + + /* It's guaranteed that all VMAs opened on a FD are closed before the + * file itself is closed, therefore no sync is needed with the regular + * closing flow. (e.g. mlx5 ib_dealloc_ucontext) + * However need a sync with accessing the vma as part of + * mlx5_ib_disassociate_ucontext. + * The close operation is usually called under mm->mmap_sem except when + * process is exiting. + * The exiting case is handled explicitly as part of + * mlx5_ib_disassociate_ucontext. + */ + mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; + + /* setting the vma context pointer to null in the mlx5_ib driver's + * private data, to protect a race condition in + * mlx5_ib_disassociate_ucontext(). + */ + mlx5_ib_vma_priv_data->vma = NULL; + list_del(&mlx5_ib_vma_priv_data->list); + kfree(mlx5_ib_vma_priv_data); +} + +static const struct vm_operations_struct mlx5_ib_vm_ops = { + .open = mlx5_ib_vma_open, + .close = mlx5_ib_vma_close +}; + +static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, + struct mlx5_ib_ucontext *ctx) +{ + struct mlx5_ib_vma_private_data *vma_prv; + struct list_head *vma_head = &ctx->vma_private_list; + + vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); + if (!vma_prv) + return -ENOMEM; + + vma_prv->vma = vma; + vma->vm_private_data = vma_prv; + vma->vm_ops = &mlx5_ib_vm_ops; + + list_add(&vma_prv->list, vma_head); + + return 0; +} + +static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + int ret; + struct vm_area_struct *vma; + struct mlx5_ib_vma_private_data *vma_private, *n; + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + /* need to protect from a race on closing the vma as part of + * mlx5_ib_vma_close. + */ + down_read(&owning_mm->mmap_sem); + list_for_each_entry_safe(vma_private, n, &context->vma_private_list, + list) { + vma = vma_private->vma; + ret = zap_vma_ptes(vma, vma->vm_start, + PAGE_SIZE); + WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); + /* context going to be destroyed, should + * not access ops any more. + */ + vma->vm_ops = NULL; + list_del(&vma_private->list); + kfree(vma_private); + } + up_read(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) { switch (cmd) { @@ -1101,8 +1254,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, - struct vm_area_struct *vma, struct mlx5_uuar_info *uuari) + struct vm_area_struct *vma, + struct mlx5_ib_ucontext *context) { + struct mlx5_uuar_info *uuari = &context->uuari; int err; unsigned long idx; phys_addr_t pfn, pa; @@ -1152,14 +1307,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), vma->vm_start, &pa); - return 0; + return mlx5_ib_set_vma_data(vma, context); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); - struct mlx5_uuar_info *uuari = &context->uuari; unsigned long command; phys_addr_t pfn; @@ -1168,7 +1322,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: - return uar_mmap(dev, command, vma, uuari); + return uar_mmap(dev, command, vma, context); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; @@ -1331,6 +1485,32 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); break; + case IB_FLOW_SPEC_IPV6: + if (ib_spec->size != sizeof(ib_spec->ipv6)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IPV6); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.src_ip, + sizeof(ib_spec->ipv6.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.src_ip, + sizeof(ib_spec->ipv6.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.dst_ip, + sizeof(ib_spec->ipv6.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.dst_ip, + sizeof(ib_spec->ipv6.val.dst_ip)); + break; case IB_FLOW_SPEC_TCP: if (ib_spec->size != sizeof(ib_spec->tcp_udp)) return -EINVAL; @@ -1801,15 +1981,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), - fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); -} - static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { @@ -1828,7 +1999,6 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); @@ -1836,7 +2006,6 @@ static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, &dev_attr_fw_pages, @@ -1854,6 +2023,65 @@ static void pkey_change_handler(struct work_struct *work) mutex_unlock(&ports->devr->mutex); } +static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) +{ + struct mlx5_ib_qp *mqp; + struct mlx5_ib_cq *send_mcq, *recv_mcq; + struct mlx5_core_cq *mcq; + struct list_head cq_armed_list; + unsigned long flags_qp; + unsigned long flags_cq; + unsigned long flags; + + INIT_LIST_HEAD(&cq_armed_list); + + /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ + spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); + list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { + spin_lock_irqsave(&mqp->sq.lock, flags_qp); + if (mqp->sq.tail != mqp->sq.head) { + send_mcq = to_mcq(mqp->ibqp.send_cq); + spin_lock_irqsave(&send_mcq->lock, flags_cq); + if (send_mcq->mcq.comp && + mqp->ibqp.send_cq->comp_handler) { + if (!send_mcq->mcq.reset_notify_added) { + send_mcq->mcq.reset_notify_added = 1; + list_add_tail(&send_mcq->mcq.reset_notify, + &cq_armed_list); + } + } + spin_unlock_irqrestore(&send_mcq->lock, flags_cq); + } + spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); + spin_lock_irqsave(&mqp->rq.lock, flags_qp); + /* no handling is needed for SRQ */ + if (!mqp->ibqp.srq) { + if (mqp->rq.tail != mqp->rq.head) { + recv_mcq = to_mcq(mqp->ibqp.recv_cq); + spin_lock_irqsave(&recv_mcq->lock, flags_cq); + if (recv_mcq->mcq.comp && + mqp->ibqp.recv_cq->comp_handler) { + if (!recv_mcq->mcq.reset_notify_added) { + recv_mcq->mcq.reset_notify_added = 1; + list_add_tail(&recv_mcq->mcq.reset_notify, + &cq_armed_list); + } + } + spin_unlock_irqrestore(&recv_mcq->lock, + flags_cq); + } + } + spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); + } + /*At that point all inflight post send were put to be executed as of we + * lock/unlock above locks Now need to arm all involved CQs. + */ + list_for_each_entry(mcq, &cq_armed_list, reset_notify) { + mcq->comp(mcq); + } + spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -1866,6 +2094,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, case MLX5_DEV_EVENT_SYS_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; + mlx5_ib_handle_internal_error(ibdev); break; case MLX5_DEV_EVENT_PORT_UP: @@ -2272,6 +2501,15 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct mlx5_ib_dev *dev = + container_of(ibdev, struct mlx5_ib_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), + fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); +} + static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; @@ -2298,6 +2536,113 @@ static void mlx5_disable_roce(struct mlx5_ib_dev *dev) unregister_netdevice_notifier(&dev->roce.nb); } +static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_ports; i++) + mlx5_core_dealloc_q_counter(dev->mdev, + dev->port[i].q_cnt_id); +} + +static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) +{ + int i; + int ret; + + for (i = 0; i < dev->num_ports; i++) { + ret = mlx5_core_alloc_q_counter(dev->mdev, + &dev->port[i].q_cnt_id); + if (ret) { + mlx5_ib_warn(dev, + "couldn't allocate queue counter for port %d, err %d\n", + i + 1, ret); + goto dealloc_counters; + } + } + + return 0; + +dealloc_counters: + while (--i >= 0) + mlx5_core_dealloc_q_counter(dev->mdev, + dev->port[i].q_cnt_id); + + return ret; +} + +static const char * const names[] = { + "rx_write_requests", + "rx_read_requests", + "rx_atomic_requests", + "out_of_buffer", + "out_of_sequence", + "duplicate_request", + "rnr_nak_retry_err", + "packet_seq_err", + "implied_nak_seq_err", + "local_ack_timeout_err", +}; + +static const size_t stats_offsets[] = { + MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests), + MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests), + MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests), + MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer), + MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence), + MLX5_BYTE_OFF(query_q_counter_out, duplicate_request), + MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err), + MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err), + MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err), + MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err), +}; + +static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); + + /* We support only per port stats */ + if (port_num == 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + __be32 val; + int ret; + int i; + + if (!port || !stats) + return -ENOSYS; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + ret = mlx5_core_query_q_counter(dev->mdev, + dev->port[port - 1].q_cnt_id, 0, + out, outlen); + if (ret) + goto free; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + val = *(__be32 *)(out + stats_offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } +free: + kvfree(out); + return ARRAY_SIZE(names); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -2320,10 +2665,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; + dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), + GFP_KERNEL); + if (!dev->port) + goto err_dealloc; + rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) - goto err_dealloc; + goto err_free_port; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); @@ -2418,6 +2768,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; + dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; @@ -2425,6 +2776,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; } + dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + mlx5_ib_internal_fill_odp_caps(dev); if (MLX5_CAP_GEN(mdev, imaicl)) { @@ -2435,6 +2788,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); } + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && + MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; + dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + } + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; @@ -2447,9 +2806,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) IB_LINK_LAYER_ETHERNET) { dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.create_wq = mlx5_ib_create_wq; + dev->ib_dev.modify_wq = mlx5_ib_modify_wq; + dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; + dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; + dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } err = init_node_data(dev); if (err) @@ -2457,6 +2826,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); + INIT_LIST_HEAD(&dev->qp_list); + spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); @@ -2472,10 +2843,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_rsrc; - err = ib_register_device(&dev->ib_dev, NULL); + err = mlx5_ib_alloc_q_counters(dev); if (err) goto err_odp; + err = ib_register_device(&dev->ib_dev, NULL); + if (err) + goto err_q_cnt; + err = create_umr_res(dev); if (err) goto err_dev; @@ -2497,6 +2872,9 @@ err_umrc: err_dev: ib_unregister_device(&dev->ib_dev); +err_q_cnt: + mlx5_ib_dealloc_q_counters(dev); + err_odp: mlx5_ib_odp_remove_one(dev); @@ -2507,6 +2885,9 @@ err_disable_roce: if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); +err_free_port: + kfree(dev->port); + err_dealloc: ib_dealloc_device((struct ib_device *)dev); @@ -2519,11 +2900,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); ib_unregister_device(&dev->ib_dev); + mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); + kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c4a9825828bc..372385d0f993 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -105,6 +105,11 @@ enum { MLX5_CQE_VERSION_V1, }; +struct mlx5_ib_vma_private_data { + struct list_head list; + struct vm_area_struct *vma; +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -116,6 +121,7 @@ struct mlx5_ib_ucontext { u8 cqe_version; /* Transport Domain number */ u32 tdn; + struct list_head vma_private_list; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -217,12 +223,41 @@ struct mlx5_ib_wq { void *qend; }; +struct mlx5_ib_rwq { + struct ib_wq ibwq; + u32 rqn; + u32 rq_num_pas; + u32 log_rq_stride; + u32 log_rq_size; + u32 rq_page_offset; + u32 log_page_size; + struct ib_umem *umem; + size_t buf_size; + unsigned int page_shift; + int create_type; + struct mlx5_db db; + u32 user_index; + u32 wqe_count; + u32 wqe_shift; + int wq_sig; +}; + enum { MLX5_QP_USER, MLX5_QP_KERNEL, MLX5_QP_EMPTY }; +enum { + MLX5_WQ_USER, + MLX5_WQ_KERNEL +}; + +struct mlx5_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_rwq_ind_tbl; + u32 rqtn; +}; + /* * Connect-IB can trigger up to four concurrent pagefaults * per-QP. @@ -266,6 +301,10 @@ struct mlx5_ib_qp_trans { u8 resp_depth; }; +struct mlx5_ib_rss_qp { + u32 tirn; +}; + struct mlx5_ib_rq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *rq; @@ -294,6 +333,7 @@ struct mlx5_ib_qp { union { struct mlx5_ib_qp_trans trans_qp; struct mlx5_ib_raw_packet_qp raw_packet_qp; + struct mlx5_ib_rss_qp rss_qp; }; struct mlx5_buf buf; @@ -340,6 +380,9 @@ struct mlx5_ib_qp { spinlock_t disable_page_faults_lock; struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; #endif + struct list_head qps_list; + struct list_head cq_recv_list; + struct list_head cq_send_list; }; struct mlx5_ib_cq_buf { @@ -401,6 +444,8 @@ struct mlx5_ib_cq { struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; + struct list_head list_send_qp; + struct list_head list_recv_qp; u32 create_flags; struct list_head wc_list; enum ib_cq_notify_flags notify_flags; @@ -546,6 +591,10 @@ struct mlx5_ib_resources { struct mutex mutex; }; +struct mlx5_ib_port { + u16 q_cnt_id; +}; + struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer @@ -581,6 +630,11 @@ struct mlx5_ib_dev { struct srcu_struct mr_srcu; #endif struct mlx5_ib_flow_db flow_db; + /* protect resources needed as part of reset flow */ + spinlock_t reset_flow_resource_lock; + struct list_head qp_list; + /* Array with num_ports elements */ + struct mlx5_ib_port *port; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -628,6 +682,16 @@ static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) return container_of(ibqp, struct mlx5_ib_qp, ibqp); } +static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq) +{ + return container_of(ibwq, struct mlx5_ib_rwq, ibwq); +} + +static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl); +} + static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) { return container_of(msrq, struct mlx5_ib_srq, msrq); @@ -762,6 +826,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); +struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_destroy_wq(struct ib_wq *wq); +int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); +struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8cf2ce50511f..4b021305c321 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1193,12 +1193,16 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { + struct mlx5_core_dev *mdev = dev->mdev; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return 0; + mlx5_ib_init_umr_context(&umr_context); umrwr.wr.wr_cqe = &umr_context.cqe; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce0a7ab35a22..0dd7d93cac95 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; +static void get_cqs(enum ib_qp_type qp_type, + struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, + struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); + static int is_qp0(enum ib_qp_type qp_type) { return qp_type == IB_QPT_SMI; @@ -609,6 +613,11 @@ static int to_mlx5_st(enum ib_qp_type type) } } +static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq); +static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq); + static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) { return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; @@ -649,6 +658,71 @@ err_umem: return err; } +static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +{ + struct mlx5_ib_ucontext *context; + + context = to_mucontext(pd->uobject->context); + mlx5_ib_db_unmap_user(context, &rwq->db); + if (rwq->umem) + ib_umem_release(rwq->umem); +} + +static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_rwq *rwq, + struct mlx5_ib_create_wq *ucmd) +{ + struct mlx5_ib_ucontext *context; + int page_shift = 0; + int npages; + u32 offset = 0; + int ncont = 0; + int err; + + if (!ucmd->buf_addr) + return -EINVAL; + + context = to_mucontext(pd->uobject->context); + rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, + rwq->buf_size, 0, 0); + if (IS_ERR(rwq->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(rwq->umem); + return err; + } + + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, + &rwq->rq_page_offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + rwq->rq_num_pas = ncont; + rwq->page_shift = page_shift; + rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); + + mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + npages, page_shift, ncont, offset); + + err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); + if (err) { + mlx5_ib_dbg(dev, "map failed\n"); + goto err_umem; + } + + rwq->create_type = MLX5_WQ_USER; + return 0; + +err_umem: + ib_umem_release(rwq->umem); + return err; +} + static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, @@ -1201,6 +1275,187 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp, rq->doorbell = &qp->db; } +static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); +} + +static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_uobject *uobj = pd->uobject; + struct ib_ucontext *ucontext = uobj->context; + struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); + struct mlx5_ib_create_qp_resp resp = {}; + int inlen; + int err; + u32 *in; + void *tirc; + void *hfso; + u32 selected_fields = 0; + size_t min_resp_len; + u32 tdn = mucontext->tdn; + struct mlx5_ib_create_qp_rss ucmd = {}; + size_t required_cmd_sz; + + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + return -EOPNOTSUPP; + + if (init_attr->create_flags || init_attr->send_cq) + return -EINVAL; + + min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index); + if (udata->outlen < min_resp_len) + return -EINVAL; + + required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); + if (udata->inlen < required_cmd_sz) { + mlx5_ib_dbg(dev, "invalid inlen\n"); + return -EINVAL; + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + mlx5_ib_dbg(dev, "inlen is not supported\n"); + return -EOPNOTSUPP; + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + if (ucmd.comp_mask) { + mlx5_ib_dbg(dev, "invalid comp mask\n"); + return -EOPNOTSUPP; + } + + if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { + mlx5_ib_dbg(dev, "invalid reserved\n"); + return -EOPNOTSUPP; + } + + err = ib_copy_to_udata(udata, &resp, min_resp_len); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EINVAL; + } + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, + init_attr->rwq_ind_tbl->ind_tbl_num); + MLX5_SET(tirc, tirc, transport_domain, tdn); + + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + switch (ucmd.rx_hash_function) { + case MLX5_RX_HASH_FUNC_TOEPLITZ: + { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + + if (len != ucmd.rx_key_len) { + err = -EINVAL; + goto err; + } + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, ucmd.rx_hash_key, len); + break; + } + default: + err = -EOPNOTSUPP; + goto err; + } + + if (!ucmd.rx_hash_fields_mask) { + /* special case when this TIR serves as steering entry without hashing */ + if (!init_attr->rwq_ind_tbl->log_ind_tbl_size) + goto create_tir; + err = -EINVAL; + goto err; + } + + if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { + err = -EINVAL; + goto err; + } + + /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */ + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + + if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { + err = -EINVAL; + goto err; + } + + /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */ + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) + selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) + selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); + +create_tir: + err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + + if (err) + goto err; + + kvfree(in); + /* qpn is reserved for that QP */ + qp->trans_qp.base.mqp.qpn = 0; + return 0; + +err: + kvfree(in); + return err; +} + static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_qp *qp) @@ -1211,6 +1466,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_cq *send_cq; + struct mlx5_ib_cq *recv_cq; + unsigned long flags; int inlen = sizeof(*in); int err; u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -1227,6 +1485,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + if (init_attr->rwq_ind_tbl) { + if (!udata) + return -ENOSYS; + + err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); + return err; + } + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); @@ -1460,6 +1726,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; + get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq, + &send_cq, &recv_cq); + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* Maintain device to QPs access, needed for further handling via reset + * flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling via reset flow + */ + if (send_cq) + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + if (recv_cq) + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + return 0; err_create: @@ -1478,23 +1761,23 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv if (send_cq) { if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } else { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } } else if (recv_cq) { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else { __acquire(&send_cq->lock); @@ -1509,21 +1792,21 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } } else { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } } else if (recv_cq) { __release(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } else { __release(&recv_cq->lock); __release(&send_cq->lock); @@ -1535,17 +1818,18 @@ static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) return to_mpd(qp->ibqp.pd); } -static void get_cqs(struct mlx5_ib_qp *qp, +static void get_cqs(enum ib_qp_type qp_type, + struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) { - switch (qp->ibqp.qp_type) { + switch (qp_type) { case IB_QPT_XRC_TGT: *send_cq = NULL; *recv_cq = NULL; break; case MLX5_IB_QPT_REG_UMR: case IB_QPT_XRC_INI: - *send_cq = to_mcq(qp->ibqp.send_cq); + *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = NULL; break; @@ -1557,8 +1841,8 @@ static void get_cqs(struct mlx5_ib_qp *qp, case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_RAW_PACKET: - *send_cq = to_mcq(qp->ibqp.send_cq); - *recv_cq = to_mcq(qp->ibqp.recv_cq); + *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; + *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL; break; case IB_QPT_MAX: @@ -1577,8 +1861,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_modify_qp_mbox_in *in; + unsigned long flags; int err; + if (qp->ibqp.rwq_ind_tbl) { + destroy_rss_raw_qp_tir(dev, qp); + return; + } + base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; @@ -1602,17 +1892,28 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) base->mqp.qpn); } - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + &send_cq, &recv_cq); + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* del from lists under both locks above to protect reset flow paths */ + list_del(&qp->qps_list); + if (send_cq) + list_del(&qp->cq_send_list); + + if (recv_cq) + list_del(&qp->cq_recv_list); if (qp->create_type == MLX5_QP_KERNEL) { - mlx5_ib_lock_cqs(send_cq, recv_cq); __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) __mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL); - mlx5_ib_unlock_cqs(send_cq, recv_cq); } + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { destroy_raw_packet_qp(dev, qp); @@ -2300,7 +2601,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } pd = get_pd(qp); - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + &send_cq, &recv_cq); context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; @@ -2349,6 +2651,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : + qp->port) - 1; + struct mlx5_ib_port *mibport = &dev->port[port_num]; + + context->qp_counter_set_usr_page |= + cpu_to_be32((u32)(mibport->q_cnt_id) << 24); + } + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); @@ -2439,6 +2750,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; + if (ibqp->rwq_ind_tbl) + return -ENOSYS; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3397,6 +3711,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; @@ -3424,6 +3739,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); @@ -3725,6 +4047,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; struct mlx5_rwqe_sig *sig; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; @@ -3736,6 +4060,13 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -4055,6 +4386,9 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int err = 0; u8 raw_packet_qp_state; + if (ibqp->rwq_ind_tbl) + return -ENOSYS; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); @@ -4164,3 +4498,322 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) return 0; } + +static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, + struct ib_wq_init_attr *init_attr) +{ + struct mlx5_ib_dev *dev; + __be64 *rq_pas0; + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + dev = to_mdev(pd->device); + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + MLX5_SET(rqc, rqc, mem_rq_type, + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); + MLX5_SET(rqc, rqc, user_index, rwq->user_index); + MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); + MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); + MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); + MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); + MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); + MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); + MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); + rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); + mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn); + kvfree(in); + return err; +} + +static int set_user_rq_size(struct mlx5_ib_dev *dev, + struct ib_wq_init_attr *wq_init_attr, + struct mlx5_ib_create_wq *ucmd, + struct mlx5_ib_rwq *rwq) +{ + /* Sanity check RQ size before proceeding */ + if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz))) + return -EINVAL; + + if (!ucmd->rq_wqe_count) + return -EINVAL; + + rwq->wqe_count = ucmd->rq_wqe_count; + rwq->wqe_shift = ucmd->rq_wqe_shift; + rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + rwq->log_rq_stride = rwq->wqe_shift; + rwq->log_rq_size = ilog2(rwq->wqe_count); + return 0; +} + +static int prepare_user_rq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata, + struct mlx5_ib_rwq *rwq) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_create_wq ucmd = {}; + int err; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) { + mlx5_ib_dbg(dev, "invalid inlen\n"); + return -EINVAL; + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + mlx5_ib_dbg(dev, "inlen is not supported\n"); + return -EOPNOTSUPP; + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + if (ucmd.comp_mask) { + mlx5_ib_dbg(dev, "invalid comp mask\n"); + return -EOPNOTSUPP; + } + + if (ucmd.reserved) { + mlx5_ib_dbg(dev, "invalid reserved\n"); + return -EOPNOTSUPP; + } + + err = set_user_rq_size(dev, init_attr, &ucmd, rwq); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + err = create_user_rq(dev, pd, rwq, &ucmd); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + if (err) + return err; + } + + rwq->user_index = ucmd.user_index; + return 0; +} + +struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev; + struct mlx5_ib_rwq *rwq; + struct mlx5_ib_create_wq_resp resp = {}; + size_t min_resp_len; + int err; + + if (!udata) + return ERR_PTR(-ENOSYS); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + dev = to_mdev(pd->device); + switch (init_attr->wq_type) { + case IB_WQT_RQ: + rwq = kzalloc(sizeof(*rwq), GFP_KERNEL); + if (!rwq) + return ERR_PTR(-ENOMEM); + err = prepare_user_rq(pd, init_attr, udata, rwq); + if (err) + goto err; + err = create_rq(rwq, pd, init_attr); + if (err) + goto err_user_rq; + break; + default: + mlx5_ib_dbg(dev, "unsupported wq type %d\n", + init_attr->wq_type); + return ERR_PTR(-EINVAL); + } + + rwq->ibwq.wq_num = rwq->rqn; + rwq->ibwq.state = IB_WQS_RESET; + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err_copy; + } + + return &rwq->ibwq; + +err_copy: + mlx5_core_destroy_rq(dev->mdev, rwq->rqn); +err_user_rq: + destroy_user_rq(pd, rwq); +err: + kfree(rwq); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_wq(struct ib_wq *wq) +{ + struct mlx5_ib_dev *dev = to_mdev(wq->device); + struct mlx5_ib_rwq *rwq = to_mrwq(wq); + + mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + destroy_user_rq(wq->pd, rwq); + kfree(rwq); + + return 0; +} + +struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; + int sz = 1 << init_attr->log_ind_tbl_size; + struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; + size_t min_resp_len; + int inlen; + int err; + int i; + u32 *in; + void *rqtc; + + if (udata->inlen > 0 && + !ib_is_udata_cleared(udata, 0, + udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); + if (!rwq_ind_tbl) + return ERR_PTR(-ENOMEM); + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err; + } + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); + + err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); + kvfree(in); + + if (err) + goto err; + + rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err_copy; + } + + return &rwq_ind_tbl->ib_rwq_ind_tbl; + +err_copy: + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); +err: + kfree(rwq_ind_tbl); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); + struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); + + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + + kfree(rwq_ind_tbl); + return 0; +} + +int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(wq->device); + struct mlx5_ib_rwq *rwq = to_mrwq(wq); + struct mlx5_ib_modify_wq ucmd = {}; + size_t required_cmd_sz; + int curr_wq_state; + int wq_state; + int inlen; + int err; + void *rqc; + void *in; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) + return -EFAULT; + + if (ucmd.comp_mask || ucmd.reserved) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ? + wq_attr->curr_wq_state : wq->state; + wq_state = (wq_attr_mask & IB_WQ_STATE) ? + wq_attr->wq_state : curr_wq_state; + if (curr_wq_state == IB_WQS_ERR) + curr_wq_state = MLX5_RQC_STATE_ERR; + if (wq_state == IB_WQS_ERR) + wq_state = MLX5_RQC_STATE_ERR; + MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); + MLX5_SET(rqc, rqc, state, wq_state); + + err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen); + kvfree(in); + if (!err) + rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3b2ddd64a371..ed6ac52355f1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -74,14 +74,12 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) } static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, - struct mlx5_create_srq_mbox_in **in, - struct ib_udata *udata, int buf_size, int *inlen, - int is_xrc) + struct mlx5_srq_attr *in, + struct ib_udata *udata, int buf_size) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; size_t ucmdlen; - void *xsrqc; int err; int npages; int page_shift; @@ -104,7 +102,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata->inlen - sizeof(ucmd))) return -EINVAL; - if (is_xrc) { + if (in->type == IB_SRQT_XRC) { err = get_srq_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) @@ -130,14 +128,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_umem; } - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; - *in = mlx5_vzalloc(*inlen); - if (!(*in)) { + in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont); + if (!in->pas) { err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); + mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &srq->db); @@ -146,20 +143,16 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_in; } - (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); - - if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && - is_xrc){ - xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, - xrc_srq_context_entry); - MLX5_SET(xrc_srqc, xsrqc, user_index, uidx); - } + in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->page_offset = offset; + if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && + in->type == IB_SRQT_XRC) + in->user_index = uidx; return 0; err_in: - kvfree(*in); + kvfree(in->pas); err_umem: ib_umem_release(srq->umem); @@ -168,15 +161,13 @@ err_umem: } static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, - struct mlx5_create_srq_mbox_in **in, int buf_size, - int *inlen, int is_xrc) + struct mlx5_srq_attr *in, int buf_size) { int err; int i; struct mlx5_wqe_srq_next_seg *next; int page_shift; int npages; - void *xsrqc; err = mlx5_db_alloc(dev->mdev, &srq->db); if (err) { @@ -204,13 +195,12 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", buf_size, page_shift, srq->buf.npages, npages); - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; - *in = mlx5_vzalloc(*inlen); - if (!*in) { + in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages); + if (!in->pas) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, (*in)->pas); + mlx5_fill_page_array(&srq->buf, in->pas); srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); if (!srq->wrid) { @@ -221,20 +211,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } srq->wq_sig = !!srq_signature; - (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - - if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && - is_xrc){ - xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, - xrc_srq_context_entry); - /* 0xffffff means we ask to work with cqe version 0 */ - MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX); - } + in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && + in->type == IB_SRQT_XRC) + in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; err_in: - kvfree(*in); + kvfree(in->pas); err_buf: mlx5_buf_free(dev->mdev, &srq->buf); @@ -267,10 +252,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int desc_size; int buf_size; int err; - struct mlx5_create_srq_mbox_in *uninitialized_var(in); - int uninitialized_var(inlen); - int is_xrc; - u32 flgs, xrcdn; + struct mlx5_srq_attr in = {0}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -302,14 +284,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); - is_xrc = (init_attr->srq_type == IB_SRQT_XRC); - if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, - is_xrc); + err = create_srq_user(pd, srq, &in, udata, buf_size); else - err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, - is_xrc); + err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", @@ -317,23 +295,23 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in->ctx.state_log_sz = ilog2(srq->msrq.max); - flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; - xrcdn = 0; - if (is_xrc) { - xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); + in.type = init_attr->srq_type; + in.log_size = ilog2(srq->msrq.max); + in.wqe_shift = srq->msrq.wqe_shift - 4; + if (srq->wq_sig) + in.flags |= MLX5_SRQ_FLAG_WQ_SIG; + if (init_attr->srq_type == IB_SRQT_XRC) { + in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; + in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; } else if (init_attr->srq_type == IB_SRQT_BASIC) { - xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; - in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); + in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; } - in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); - - in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); - in->ctx.db_record = cpu_to_be64(srq->db.dma); - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc); - kvfree(in); + in.pd = to_mpd(pd)->pdn; + in.db_record = srq->db.dma; + err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); goto err_usr_kern_srq; @@ -401,7 +379,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); struct mlx5_ib_srq *srq = to_msrq(ibsrq); int ret; - struct mlx5_query_srq_mbox_out *out; + struct mlx5_srq_attr *out; out = kzalloc(sizeof(*out), GFP_KERNEL); if (!out) @@ -411,7 +389,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (ret) goto out_box; - srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); + srq_attr->srq_limit = out->lwm; srq_attr->max_wr = srq->msrq.max - 1; srq_attr->max_sge = srq->msrq.max_gs; @@ -458,6 +436,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct mlx5_ib_srq *srq = to_msrq(ibsrq); struct mlx5_wqe_srq_next_seg *next; struct mlx5_wqe_data_seg *scat; + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; @@ -465,6 +445,12 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, spin_lock_irqsave(&srq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + goto out; + } + for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->num_sge > srq->msrq.max_gs)) { err = -EINVAL; @@ -507,7 +493,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, *srq->db.db = cpu_to_be32(srq->wqe_ctr); } - +out: spin_unlock_irqrestore(&srq->lock, flags); return err; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 61bc308bb802..188dac4301b5 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -46,6 +46,10 @@ enum { MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, }; +enum { + MLX5_WQ_FLAG_SIGNATURE = 1 << 0, +}; + /* Increment this value if any changes that break userspace ABI * compatibility are made. @@ -79,6 +83,10 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; +enum mlx5_user_cmds_supp_uhw { + MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, +}; + struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; @@ -94,8 +102,8 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 comp_mask; __u32 response_length; __u8 cqe_version; - __u8 reserved2; - __u16 reserved3; + __u8 cmds_supp_uhw; + __u16 reserved2; __u64 hca_core_clock_offset; }; @@ -103,6 +111,22 @@ struct mlx5_ib_alloc_pd_resp { __u32 pdn; }; +struct mlx5_ib_tso_caps { + __u32 max_tso; /* Maximum tso payload size in bytes */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; +}; + +struct mlx5_ib_query_device_resp { + __u32 comp_mask; + __u32 response_length; + struct mlx5_ib_tso_caps tso_caps; +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; @@ -148,6 +172,40 @@ struct mlx5_ib_create_qp { __u64 sq_buf_addr; }; +/* RX Hash function flags */ +enum mlx5_rx_hash_function_flags { + MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP + * and *TCP and *UDP flags can't be enabled together on the same QP. +*/ +enum mlx5_rx_hash_fields { + MLX5_RX_HASH_SRC_IPV4 = 1 << 0, + MLX5_RX_HASH_DST_IPV4 = 1 << 1, + MLX5_RX_HASH_SRC_IPV6 = 1 << 2, + MLX5_RX_HASH_DST_IPV6 = 1 << 3, + MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + +struct mlx5_ib_create_qp_rss { + __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ + __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ + __u8 rx_key_len; /* valid only for Toeplitz */ + __u8 reserved[6]; + __u8 rx_hash_key[128]; /* valid only for Toeplitz */ + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; @@ -159,6 +217,32 @@ struct mlx5_ib_alloc_mw { __u16 reserved2; }; +struct mlx5_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 user_index; + __u32 flags; + __u32 comp_mask; + __u32 reserved; +}; + +struct mlx5_ib_create_wq_resp { + __u32 response_length; + __u32 reserved; +}; + +struct mlx5_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; + +struct mlx5_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9866c35cc977..da2335f7f7c3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1081,16 +1081,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return sprintf(buf, "%x\n", dev->rev_id); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), - (int) (dev->fw_ver >> 16) & 0xffff, - (int) dev->fw_ver & 0xffff); -} - static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { @@ -1120,13 +1110,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mthca_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -1187,6 +1175,17 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d", + (int) (dev->fw_ver >> 32), + (int) (dev->fw_ver >> 16) & 0xffff, + (int) dev->fw_ver & 0xffff); +} + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1266,6 +1265,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; dev->ib_dev.get_port_immutable = mthca_port_immutable; + dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 74c6a9426047..6727af27c017 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -98,7 +98,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENOMEM; mthca_err(mdev, "Couldn't allocate memory to save HCA " "PCI header, aborting.\n"); - goto out; + goto put_dev; } for (i = 0; i < 64; ++i) { @@ -108,7 +108,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't save HCA " "PCI header, aborting.\n"); - goto out; + goto free_hca; } } @@ -121,7 +121,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENOMEM; mthca_err(mdev, "Couldn't allocate memory to save HCA " "bridge PCI header, aborting.\n"); - goto out; + goto free_hca; } for (i = 0; i < 64; ++i) { @@ -131,7 +131,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't save HCA bridge " "PCI header, aborting.\n"); - goto out; + goto free_bh; } } bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX); @@ -139,7 +139,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't locate HCA bridge " "PCI-X capability, aborting.\n"); - goto out; + goto free_bh; } } @@ -152,7 +152,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENOMEM; mthca_err(mdev, "Couldn't map HCA reset register, " "aborting.\n"); - goto out; + goto free_bh; } writel(MTHCA_RESET_VALUE, reset); @@ -172,7 +172,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't access HCA after reset, " "aborting.\n"); - goto out; + goto free_bh; } if (v != 0xffffffff) @@ -184,7 +184,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "PCI device did not come back after reset, " "aborting.\n"); - goto out; + goto free_bh; } good: @@ -195,14 +195,14 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge Upstream " "split transaction control, aborting.\n"); - goto out; + goto free_bh; } if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc, bridge_header[(bridge_pcix_cap + 0xc) / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge Downstream " "split transaction control, aborting.\n"); - goto out; + goto free_bh; } /* * Bridge control register is at 0x3e, so we'll @@ -216,7 +216,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " "aborting.\n", i); - goto out; + goto free_bh; } } @@ -225,7 +225,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " "aborting.\n"); - goto out; + goto free_bh; } } @@ -235,7 +235,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI-X " "command register, aborting.\n"); - goto out; + goto free_bh; } } @@ -246,7 +246,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); - goto out; + goto free_bh; } linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, @@ -254,7 +254,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); - goto out; + goto free_bh; } } @@ -266,7 +266,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA reg %x, " "aborting.\n", i); - goto out; + goto free_bh; } } @@ -275,14 +275,12 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA COMMAND, " "aborting.\n"); - goto out; } - -out: - if (bridge) - pci_dev_put(bridge); +free_bh: kfree(bridge_header); +free_hca: kfree(hca_header); - +put_dev: + pci_dev_put(bridge); return err; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 464d6da5fe91..bd69125731c1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2606,23 +2606,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, /** - * show_fw_ver - */ -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct nes_ib_device *nesibdev = - container_of(dev, struct nes_ib_device, ibdev.dev); - struct nes_vnic *nesvnic = nesibdev->nesvnic; - - nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "%u.%u\n", - (nesvnic->nesdev->nesadapter->firmware_version >> 16), - (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); -} - - -/** * show_hca */ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, @@ -2645,13 +2628,11 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *nes_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -3703,6 +3684,19 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + struct nes_ib_device *nesibdev = + container_of(dev, struct nes_ib_device, ibdev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + snprintf(str, str_len, "%u.%u", + (nesvnic->nesdev->nesadapter->firmware_version >> 16), + (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); +} + /** * nes_init_ofa_device */ @@ -3802,6 +3796,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; nesibdev->ibdev.get_port_immutable = nes_port_immutable; + nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 3d75f65ce87e..07d0c6c5b046 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -107,6 +107,14 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct ocrdma_dev *dev = get_ocrdma_dev(device); + + snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]); +} + static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); @@ -193,6 +201,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.process_mad = ocrdma_process_mad; dev->ibdev.get_port_immutable = ocrdma_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_str; if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -262,14 +271,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ocrdma_dev *dev = dev_get_drvdata(device); - - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]); -} - static ssize_t show_hca_type(struct device *device, struct device_attribute *attr, char *buf) { @@ -279,12 +280,10 @@ static ssize_t show_hca_type(struct device *device, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); static struct device_attribute *ocrdma_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type }; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 575b737d9ef3..9cc0aae1d781 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -106,6 +106,49 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; +const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = { +[IB_WR_RDMA_WRITE] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_RDMA_READ] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC, +}, + +[IB_WR_ATOMIC_CMP_AND_SWP] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_ATOMIC_FETCH_AND_ADD] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_RDMA_WRITE_WITH_IMM] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND_WITH_IMM] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +}; + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) { diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 846e6c726df7..10d062561bd9 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -169,8 +169,12 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } if (ah_attr->ah_flags & IB_AH_GRH) { - qib_copy_sge(&qp->r_sge, &ah_attr->grh, - sizeof(struct ib_grh), 1); + struct ib_grh grh; + struct ib_global_route grd = ah_attr->grh; + + qib_make_grh(ibp, &grh, &grd, 0, 0); + qib_copy_sge(&qp->r_sge, &grh, + sizeof(grh), 1); wc.wc_flags |= IB_WC_GRH; } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index cbf6200e6afc..fd1dfbce5539 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1582,6 +1582,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_total_mcast_qp_attach = rdi->dparms.props.max_mcast_qp_attach * rdi->dparms.props.max_mcast_grp; + /* post send table */ + dd->verbs_dev.rdi.post_parms = qib_post_parms; } /** diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 4f878151f81f..736ced684842 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -497,4 +497,6 @@ extern unsigned int ib_qib_max_srq_wrs; extern const u32 ib_qib_rnr_table[]; +extern const struct rvt_operation_params qib_post_parms[]; + #endif /* QIB_VERBS_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 565c881a44ba..c229b9f4a52d 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -331,6 +331,21 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void usnic_get_dev_fw_str(struct ib_device *device, + char *str, + size_t str_len) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev); + struct ethtool_drvinfo info; + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + mutex_unlock(&us_ibdev->usdev_lock); + + snprintf(str, str_len, "%s", info.fw_version); +} + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -414,6 +429,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; + us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; if (ib_register_device(&us_ibdev->ib_dev, NULL)) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 3412ea06116e..80ef3f8998c8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -45,21 +45,6 @@ #include "usnic_ib_verbs.h" #include "usnic_log.h" -static ssize_t usnic_ib_show_fw_ver(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct usnic_ib_dev *us_ibdev = - container_of(device, struct usnic_ib_dev, ib_dev.dev); - struct ethtool_drvinfo info; - - mutex_lock(&us_ibdev->usdev_lock); - us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); - mutex_unlock(&us_ibdev->usdev_lock); - - return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version); -} - static ssize_t usnic_ib_show_board(struct device *device, struct device_attribute *attr, char *buf) @@ -192,7 +177,6 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } -static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL); static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); @@ -201,7 +185,6 @@ static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); static struct device_attribute *usnic_class_attributes[] = { - &dev_attr_fw_ver, &dev_attr_board_id, &dev_attr_config, &dev_attr_iface, diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile index 988b6a0101a4..8b095b27db87 100644 --- a/drivers/infiniband/sw/Makefile +++ b/drivers/infiniband/sw/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ +obj-$(CONFIG_RDMA_RXE) += rxe/ diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index 11aa6a34bd71..1da8d01a6855 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -1,6 +1,5 @@ config INFINIBAND_RDMAVT tristate "RDMA verbs transport library" depends on 64BIT - default m ---help--- This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6ca6fa80dd6e..f2f229efbe64 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -510,6 +510,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); if (!rdi->worker) return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0f4d4500f45e..80c4b6b401b8 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -140,6 +140,7 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, init_completion(&mr->comp); /* count returning the ptr to user */ atomic_set(&mr->refcount, 1); + atomic_set(&mr->lkey_invalid, 0); mr->pd = pd; mr->max_segs = count; return 0; @@ -480,6 +481,123 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, } /** + * rvt_set_page - page assignment function called by ib_sg_to_pages + * @ibmr: memory region + * @addr: dma address of mapped page + * + * Return: 0 on success + */ +static int rvt_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rvt_mr *mr = to_imr(ibmr); + u32 ps = 1 << mr->mr.page_shift; + u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; + int m, n; + + if (unlikely(mapped_segs == mr->mr.max_segs)) + return -ENOMEM; + + if (mr->mr.length == 0) { + mr->mr.user_base = addr; + mr->mr.iova = addr; + } + + m = mapped_segs / RVT_SEGSZ; + n = mapped_segs % RVT_SEGSZ; + mr->mr.map[m]->segs[n].vaddr = (void *)addr; + mr->mr.map[m]->segs[n].length = ps; + mr->mr.length += ps; + + return 0; +} + +/** + * rvt_map_mr_sg - map sg list and set it the memory region + * @ibmr: memory region + * @sg: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg + * + * Return: number of sg elements mapped to the memory region + */ +int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) +{ + struct rvt_mr *mr = to_imr(ibmr); + + mr->mr.length = 0; + mr->mr.page_shift = PAGE_SHIFT; + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, + rvt_set_page); +} + +/** + * rvt_fast_reg_mr - fast register physical MR + * @qp: the queue pair where the work request comes from + * @ibmr: the memory region to be registered + * @key: updated key for this memory region + * @access: access flags for this memory region + * + * Returns 0 on success. + */ +int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, + int access) +{ + struct rvt_mr *mr = to_imr(ibmr); + + if (qp->ibqp.pd != mr->mr.pd) + return -EACCES; + + /* not applicable to dma MR or user MR */ + if (!mr->mr.lkey || mr->umem) + return -EINVAL; + + if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) + return -EINVAL; + + ibmr->lkey = key; + ibmr->rkey = key; + mr->mr.lkey = key; + mr->mr.access_flags = access; + atomic_set(&mr->mr.lkey_invalid, 0); + + return 0; +} +EXPORT_SYMBOL(rvt_fast_reg_mr); + +/** + * rvt_invalidate_rkey - invalidate an MR rkey + * @qp: queue pair associated with the invalidate op + * @rkey: rkey to invalidate + * + * Returns 0 on success. + */ +int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) +{ + struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + struct rvt_mregion *mr; + + if (rkey == 0) + return -EINVAL; + + rcu_read_lock(); + mr = rcu_dereference( + rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail; + + atomic_set(&mr->lkey_invalid, 1); + rcu_read_unlock(); + return 0; + +bail: + rcu_read_unlock(); + return -EINVAL; +} +EXPORT_SYMBOL(rvt_invalidate_rkey); + +/** * rvt_alloc_fmr - allocate a fast memory region * @pd: the protection domain for this memory region * @mr_access_flags: access flags for this memory region @@ -682,7 +800,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, } mr = rcu_dereference( rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); - if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || + mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; off = sge->addr - mr->user_base; @@ -782,7 +901,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference( rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); - if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || + mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; off = vaddr - mr->iova; diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 69380512c6d1..132800ee0205 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -82,6 +82,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr); struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 41ba7e9cadaa..bdb540f25a88 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -435,8 +435,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) for (n = 0; n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } @@ -584,6 +583,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->r_rq.wq->tail = 0; } qp->r_sge.num_sge = 0; + atomic_set(&qp->s_reserved_used, 0); } /** @@ -613,6 +613,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; gfp_t gfp; + size_t sqsize; if (!rdi) return ERR_PTR(-EINVAL); @@ -643,7 +644,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, init_attr->cap.max_recv_wr == 0) return ERR_PTR(-EINVAL); } - + sqsize = + init_attr->cap.max_send_wr + 1 + + rdi->dparms.reserved_operations; switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: @@ -658,11 +661,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sizeof(struct rvt_swqe); if (gfp == GFP_NOIO) swq = __vmalloc( - (init_attr->cap.max_send_wr + 1) * sz, + sqsize * sz, gfp | __GFP_ZERO, PAGE_KERNEL); else swq = vzalloc_node( - (init_attr->cap.max_send_wr + 1) * sz, + sqsize * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -741,13 +744,14 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); + atomic_set(&qp->local_ops_pending, 0); init_waitqueue_head(&qp->wait); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; - qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_size = sqsize; qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) @@ -1332,7 +1336,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask; attr->dest_qp_num = qp->remote_qpn; attr->qp_access_flags = qp->qp_access_flags; - attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_send_wr = qp->s_size - 1 - + rdi->dparms.reserved_operations; attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; attr->cap.max_send_sge = qp->s_max_sge; attr->cap.max_recv_sge = qp->r_rq.max_sge; @@ -1440,25 +1445,116 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } /** - * qp_get_savail - return number of avail send entries + * rvt_qp_valid_operation - validate post send wr request + * @qp - the qp + * @post-parms - the post send table for the driver + * @wr - the work request + * + * The routine validates the operation based on the + * validation table an returns the length of the operation + * which can extend beyond the ib_send_bw. Operation + * dependent flags key atomic operation validation. * + * There is an exception for UD qps that validates the pd and + * overrides the length to include the additional UD specific + * length. + * + * Returns a negative error or the length of the work request + * for building the swqe. + */ +static inline int rvt_qp_valid_operation( + struct rvt_qp *qp, + const struct rvt_operation_params *post_parms, + struct ib_send_wr *wr) +{ + int len; + + if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length) + return -EINVAL; + if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type))) + return -EINVAL; + if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) && + ibpd_to_rvtpd(qp->ibqp.pd)->user) + return -EINVAL; + if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) + return -EINVAL; + if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC && + !qp->s_max_rd_atomic) + return -EINVAL; + len = post_parms[wr->opcode].length; + /* UD specific */ + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + len = sizeof(struct ib_ud_wr); + } + return len; +} + +/** + * rvt_qp_is_avail - determine queue capacity * @qp - the qp + * @rdi - the rdmavt device + * @reserved_op - is reserved operation * * This assumes the s_hlock is held but the s_last * qp variable is uncontrolled. + * + * For non reserved operations, the qp->s_avail + * may be changed. + * + * The return value is zero or a -ENOMEM. */ -static inline u32 qp_get_savail(struct rvt_qp *qp) +static inline int rvt_qp_is_avail( + struct rvt_qp *qp, + struct rvt_dev_info *rdi, + bool reserved_op) { u32 slast; - u32 ret; - + u32 avail; + u32 reserved_used; + + /* see rvt_qp_wqe_unreserve() */ + smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); + if (unlikely(reserved_op)) { + /* see rvt_qp_wqe_unreserve() */ + smp_mb__before_atomic(); + if (reserved_used >= rdi->dparms.reserved_operations) + return -ENOMEM; + return 0; + } + /* non-reserved operations */ + if (likely(qp->s_avail)) + return 0; smp_read_barrier_depends(); /* see rc.c */ slast = ACCESS_ONCE(qp->s_last); if (qp->s_head >= slast) - ret = qp->s_size - (qp->s_head - slast); + avail = qp->s_size - (qp->s_head - slast); else - ret = slast - qp->s_head; - return ret - 1; + avail = slast - qp->s_head; + + /* see rvt_qp_wqe_unreserve() */ + smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); + avail = avail - 1 - + (rdi->dparms.reserved_operations - reserved_used); + /* insure we don't assign a negative s_avail */ + if ((s32)avail <= 0) + return -ENOMEM; + qp->s_avail = avail; + if (WARN_ON(qp->s_avail > + (qp->s_size - 1 - rdi->dparms.reserved_operations))) + rvt_pr_err(rdi, + "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", + qp->ibqp.qp_num, qp->s_size, qp->s_avail, + qp->s_head, qp->s_tail, qp->s_cur, + qp->s_acked, qp->s_last); + return 0; } /** @@ -1480,49 +1576,64 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; int ret; + size_t cplen; + bool reserved_op; + int local_ops_delayed = 0; + + BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE)); /* IB spec says that num_sge == 0 is OK. */ if (unlikely(wr->num_sge > qp->s_max_sge)) return -EINVAL; + ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr); + if (ret < 0) + return ret; + cplen = ret; + /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. + * Local operations include fast register and local invalidate. + * Fast register needs to be processed immediately because the + * registered lkey may be used by following work requests and the + * lkey needs to be valid at the time those requests are posted. + * Local invalidate can be processed immediately if fencing is + * not required and no previous local invalidate ops are pending. + * Signaled local operations that have been processed immediately + * need to have requests with "completion only" flags set posted + * to the send queue in order to generate completions. */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) - return -EINVAL; - } else if (qp->ibqp.qp_type != IB_QPT_RC) { - /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ - if (wr->opcode != IB_WR_SEND && - wr->opcode != IB_WR_SEND_WITH_IMM) - return -EINVAL; - /* Check UD destination address PD */ - if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) { + switch (wr->opcode) { + case IB_WR_REG_MR: + ret = rvt_fast_reg_mr(qp, + reg_wr(wr)->mr, + reg_wr(wr)->key, + reg_wr(wr)->access); + if (ret || !(wr->send_flags & IB_SEND_SIGNALED)) + return ret; + break; + case IB_WR_LOCAL_INV: + if ((wr->send_flags & IB_SEND_FENCE) || + atomic_read(&qp->local_ops_pending)) { + local_ops_delayed = 1; + } else { + ret = rvt_invalidate_rkey( + qp, wr->ex.invalidate_rkey); + if (ret || !(wr->send_flags & IB_SEND_SIGNALED)) + return ret; + } + break; + default: return -EINVAL; - } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { - return -EINVAL; - } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) { - return -EINVAL; - } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { - return -EINVAL; + } } + + reserved_op = rdi->post_parms[wr->opcode].flags & + RVT_OPERATION_USE_RESERVE; /* check for avail */ - if (unlikely(!qp->s_avail)) { - qp->s_avail = qp_get_savail(qp); - if (WARN_ON(qp->s_avail > (qp->s_size - 1))) - rvt_pr_err(rdi, - "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", - qp->ibqp.qp_num, qp->s_size, qp->s_avail, - qp->s_head, qp->s_tail, qp->s_cur, - qp->s_acked, qp->s_last); - if (!qp->s_avail) - return -ENOMEM; - } + ret = rvt_qp_is_avail(qp, rdi, reserved_op); + if (ret) + return ret; next = qp->s_head + 1; if (next >= qp->s_size) next = 0; @@ -1531,18 +1642,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = rvt_get_swqe_ptr(qp, qp->s_head); - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) - memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); - else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE || - wr->opcode == IB_WR_RDMA_READ) - memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); - else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); - else - memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + /* cplen has length from above */ + memcpy(&wqe->wr, wr, cplen); wqe->length = 0; j = 0; @@ -1585,14 +1686,29 @@ static int rvt_post_one_wr(struct rvt_qp *qp, atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } - wqe->ssn = qp->s_ssn++; - wqe->psn = qp->s_next_psn; - wqe->lpsn = wqe->psn + - (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); - qp->s_next_psn = wqe->lpsn + 1; + if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { + if (local_ops_delayed) + atomic_inc(&qp->local_ops_pending); + else + wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY; + wqe->ssn = 0; + wqe->psn = 0; + wqe->lpsn = 0; + } else { + wqe->ssn = qp->s_ssn++; + wqe->psn = qp->s_next_psn; + wqe->lpsn = wqe->psn + + (wqe->length ? + ((wqe->length - 1) >> log_pmtu) : + 0); + qp->s_next_psn = wqe->lpsn + 1; + } trace_rvt_post_one_wr(qp, wqe); + if (unlikely(reserved_op)) + rvt_qp_wqe_reserve(qp, wqe); + else + qp->s_avail--; smp_wmb(); /* see request builders */ - qp->s_avail--; qp->s_head = next; return 0; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 30c4fda7a05a..d430c2f7cec4 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -370,6 +370,7 @@ enum { REG_USER_MR, DEREG_MR, ALLOC_MR, + MAP_MR_SG, ALLOC_FMR, MAP_PHYS_FMR, UNMAP_FMR, @@ -528,7 +529,8 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) post_send), rvt_post_send)) if (!rdi->driver_f.schedule_send || - !rdi->driver_f.do_send) + !rdi->driver_f.do_send || + !rdi->post_parms) return -EINVAL; break; @@ -633,6 +635,12 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) rvt_alloc_mr); break; + case MAP_MR_SG: + check_driver_override(rdi, offsetof(struct ib_device, + map_mr_sg), + rvt_map_mr_sg); + break; + case MAP_PHYS_FMR: check_driver_override(rdi, offsetof(struct ib_device, map_phys_fmr), diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig new file mode 100644 index 000000000000..1e4e628fe7b0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -0,0 +1,24 @@ +config RDMA_RXE + tristate "Software RDMA over Ethernet (RoCE) driver" + depends on INET && PCI && INFINIBAND + depends on NET_UDP_TUNNEL + ---help--- + This driver implements the InfiniBand RDMA transport over + the Linux network stack. It enables a system with a + standard Ethernet adapter to interoperate with a RoCE + adapter or with another system running the RXE driver. + Documentation on InfiniBand and RoCE can be downloaded at + www.infinibandta.org and www.openfabrics.org. (See also + siw which is a similar software driver for iWARP.) + + The driver is split into two layers, one interfaces with the + Linux RDMA stack and implements a kernel or user space + verbs API. The user space verbs API requires a support + library named librxe which is loaded by the generic user + space verbs API, libibverbs. The other layer interfaces + with the Linux network stack at layer 3. + + To configure and work with soft-RoCE driver please use the + following wiki page under "configure Soft-RoCE (RXE)" section: + + https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile new file mode 100644 index 000000000000..3b3fb9d1c470 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Makefile @@ -0,0 +1,24 @@ +obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o + +rdma_rxe-y := \ + rxe.o \ + rxe_comp.o \ + rxe_req.o \ + rxe_resp.o \ + rxe_recv.o \ + rxe_pool.o \ + rxe_queue.o \ + rxe_verbs.o \ + rxe_av.o \ + rxe_srq.o \ + rxe_qp.o \ + rxe_cq.o \ + rxe_mr.o \ + rxe_dma.o \ + rxe_opcode.o \ + rxe_mmap.o \ + rxe_icrc.o \ + rxe_mcast.o \ + rxe_task.o \ + rxe_net.o \ + rxe_sysfs.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c new file mode 100644 index 000000000000..55f0e8f0ca79 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); +MODULE_DESCRIPTION("Soft RDMA transport"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.2"); + +/* free resources for all ports on a device */ +static void rxe_cleanup_ports(struct rxe_dev *rxe) +{ + kfree(rxe->port.pkey_tbl); + rxe->port.pkey_tbl = NULL; + +} + +/* free resources for a rxe device all objects created for this device must + * have been destroyed + */ +static void rxe_cleanup(struct rxe_dev *rxe) +{ + rxe_pool_cleanup(&rxe->uc_pool); + rxe_pool_cleanup(&rxe->pd_pool); + rxe_pool_cleanup(&rxe->ah_pool); + rxe_pool_cleanup(&rxe->srq_pool); + rxe_pool_cleanup(&rxe->qp_pool); + rxe_pool_cleanup(&rxe->cq_pool); + rxe_pool_cleanup(&rxe->mr_pool); + rxe_pool_cleanup(&rxe->mw_pool); + rxe_pool_cleanup(&rxe->mc_grp_pool); + rxe_pool_cleanup(&rxe->mc_elem_pool); + + rxe_cleanup_ports(rxe); +} + +/* called when all references have been dropped */ +void rxe_release(struct kref *kref) +{ + struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt); + + rxe_cleanup(rxe); + ib_dealloc_device(&rxe->ib_dev); +} + +void rxe_dev_put(struct rxe_dev *rxe) +{ + kref_put(&rxe->ref_cnt, rxe_release); +} +EXPORT_SYMBOL_GPL(rxe_dev_put); + +/* initialize rxe device parameters */ +static int rxe_init_device_param(struct rxe_dev *rxe) +{ + rxe->max_inline_data = RXE_MAX_INLINE_DATA; + + rxe->attr.fw_ver = RXE_FW_VER; + rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; + rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; + rxe->attr.vendor_id = RXE_VENDOR_ID; + rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; + rxe->attr.hw_ver = RXE_HW_VER; + rxe->attr.max_qp = RXE_MAX_QP; + rxe->attr.max_qp_wr = RXE_MAX_QP_WR; + rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; + rxe->attr.max_cq = RXE_MAX_CQ; + rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; + rxe->attr.max_mr = RXE_MAX_MR; + rxe->attr.max_pd = RXE_MAX_PD; + rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; + rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; + rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; + rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; + rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; + rxe->attr.atomic_cap = RXE_ATOMIC_CAP; + rxe->attr.max_ee = RXE_MAX_EE; + rxe->attr.max_rdd = RXE_MAX_RDD; + rxe->attr.max_mw = RXE_MAX_MW; + rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; + rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; + rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; + rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; + rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; + rxe->attr.max_ah = RXE_MAX_AH; + rxe->attr.max_fmr = RXE_MAX_FMR; + rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; + rxe->attr.max_srq = RXE_MAX_SRQ; + rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; + rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; + rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; + rxe->attr.max_pkeys = RXE_MAX_PKEYS; + rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + + rxe->max_ucontext = RXE_MAX_UCONTEXT; + + return 0; +} + +/* initialize port attributes */ +static int rxe_init_port_param(struct rxe_port *port) +{ + port->attr.state = RXE_PORT_STATE; + port->attr.max_mtu = RXE_PORT_MAX_MTU; + port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; + port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; + port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; + port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; + port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; + port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; + port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; + port->attr.lid = RXE_PORT_LID; + port->attr.sm_lid = RXE_PORT_SM_LID; + port->attr.lmc = RXE_PORT_LMC; + port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; + port->attr.sm_sl = RXE_PORT_SM_SL; + port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; + port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; + port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; + port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; + port->attr.phys_state = RXE_PORT_PHYS_STATE; + port->mtu_cap = + ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU); + port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); + + return 0; +} + +/* initialize port state, note IB convention that HCA ports are always + * numbered from 1 + */ +static int rxe_init_ports(struct rxe_dev *rxe) +{ + struct rxe_port *port = &rxe->port; + + rxe_init_port_param(port); + + if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) + return -EINVAL; + + port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, + sizeof(*port->pkey_tbl), GFP_KERNEL); + + if (!port->pkey_tbl) + return -ENOMEM; + + port->pkey_tbl[0] = 0xffff; + port->port_guid = rxe->ifc_ops->port_guid(rxe); + + spin_lock_init(&port->port_lock); + + return 0; +} + +/* init pools of managed objects */ +static int rxe_init_pools(struct rxe_dev *rxe) +{ + int err; + + err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, + rxe->max_ucontext); + if (err) + goto err1; + + err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, + rxe->attr.max_pd); + if (err) + goto err2; + + err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, + rxe->attr.max_ah); + if (err) + goto err3; + + err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, + rxe->attr.max_srq); + if (err) + goto err4; + + err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, + rxe->attr.max_qp); + if (err) + goto err5; + + err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, + rxe->attr.max_cq); + if (err) + goto err6; + + err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, + rxe->attr.max_mr); + if (err) + goto err7; + + err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, + rxe->attr.max_mw); + if (err) + goto err8; + + err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, + rxe->attr.max_mcast_grp); + if (err) + goto err9; + + err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, + rxe->attr.max_total_mcast_qp_attach); + if (err) + goto err10; + + return 0; + +err10: + rxe_pool_cleanup(&rxe->mc_grp_pool); +err9: + rxe_pool_cleanup(&rxe->mw_pool); +err8: + rxe_pool_cleanup(&rxe->mr_pool); +err7: + rxe_pool_cleanup(&rxe->cq_pool); +err6: + rxe_pool_cleanup(&rxe->qp_pool); +err5: + rxe_pool_cleanup(&rxe->srq_pool); +err4: + rxe_pool_cleanup(&rxe->ah_pool); +err3: + rxe_pool_cleanup(&rxe->pd_pool); +err2: + rxe_pool_cleanup(&rxe->uc_pool); +err1: + return err; +} + +/* initialize rxe device state */ +static int rxe_init(struct rxe_dev *rxe) +{ + int err; + + /* init default device parameters */ + rxe_init_device_param(rxe); + + err = rxe_init_ports(rxe); + if (err) + goto err1; + + err = rxe_init_pools(rxe); + if (err) + goto err2; + + /* init pending mmap list */ + spin_lock_init(&rxe->mmap_offset_lock); + spin_lock_init(&rxe->pending_lock); + INIT_LIST_HEAD(&rxe->pending_mmaps); + INIT_LIST_HEAD(&rxe->list); + + mutex_init(&rxe->usdev_lock); + + return 0; + +err2: + rxe_cleanup_ports(rxe); +err1: + return err; +} + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) +{ + struct rxe_port *port = &rxe->port; + enum ib_mtu mtu; + + mtu = eth_mtu_int_to_enum(ndev_mtu); + + /* Make sure that new MTU in range */ + mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; + + port->attr.active_mtu = mtu; + port->mtu_cap = ib_mtu_enum_to_int(mtu); + + return 0; +} +EXPORT_SYMBOL(rxe_set_mtu); + +/* called by ifc layer to create new rxe device. + * The caller should allocate memory for rxe by calling ib_alloc_device. + */ +int rxe_add(struct rxe_dev *rxe, unsigned int mtu) +{ + int err; + + kref_init(&rxe->ref_cnt); + + err = rxe_init(rxe); + if (err) + goto err1; + + err = rxe_set_mtu(rxe, mtu); + if (err) + goto err1; + + err = rxe_register_device(rxe); + if (err) + goto err1; + + return 0; + +err1: + rxe_dev_put(rxe); + return err; +} +EXPORT_SYMBOL(rxe_add); + +/* called by the ifc layer to remove a device */ +void rxe_remove(struct rxe_dev *rxe) +{ + rxe_unregister_device(rxe); + + rxe_dev_put(rxe); +} +EXPORT_SYMBOL(rxe_remove); + +static int __init rxe_module_init(void) +{ + int err; + + /* initialize slab caches for managed objects */ + err = rxe_cache_init(); + if (err) { + pr_err("rxe: unable to init object pools\n"); + return err; + } + + err = rxe_net_init(); + if (err) { + pr_err("rxe: unable to init\n"); + rxe_cache_exit(); + return err; + } + pr_info("rxe: loaded\n"); + + return 0; +} + +static void __exit rxe_module_exit(void) +{ + rxe_remove_all(); + rxe_net_exit(); + rxe_cache_exit(); + + pr_info("rxe: unloaded\n"); +} + +module_init(rxe_module_init); +module_exit(rxe_module_exit); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h new file mode 100644 index 000000000000..12c71c549f97 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_H +#define RXE_H + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/crc32.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> + +#include "rxe_net.h" +#include "rxe_opcode.h" +#include "rxe_hdr.h" +#include "rxe_param.h" +#include "rxe_verbs.h" + +#define RXE_UVERBS_ABI_VERSION (1) + +#define IB_PHYS_STATE_LINK_UP (5) +#define IB_PHYS_STATE_LINK_DOWN (3) + +#define RXE_ROCE_V2_SPORT (0xc000) + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); + +int rxe_add(struct rxe_dev *rxe, unsigned int mtu); +void rxe_remove(struct rxe_dev *rxe); +void rxe_remove_all(void); + +int rxe_rcv(struct sk_buff *skb); + +void rxe_dev_put(struct rxe_dev *rxe); +struct rxe_dev *net_to_rxe(struct net_device *ndev); +struct rxe_dev *get_rxe_by_name(const char* name); + +void rxe_port_up(struct rxe_dev *rxe); +void rxe_port_down(struct rxe_dev *rxe); + +#endif /* RXE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c new file mode 100644 index 000000000000..5c9474212d4e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) +{ + struct rxe_port *port; + + if (attr->port_num != 1) { + pr_info("rxe: invalid port_num = %d\n", attr->port_num); + return -EINVAL; + } + + port = &rxe->port; + + if (attr->ah_flags & IB_AH_GRH) { + if (attr->grh.sgid_index > port->attr.gid_tbl_len) { + pr_info("rxe: invalid sgid index = %d\n", + attr->grh.sgid_index); + return -EINVAL; + } + } + + return 0; +} + +int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, + struct rxe_av *av, struct ib_ah_attr *attr) +{ + memset(av, 0, sizeof(*av)); + memcpy(&av->grh, &attr->grh, sizeof(attr->grh)); + av->port_num = port_num; + return 0; +} + +int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, + struct ib_ah_attr *attr) +{ + memcpy(&attr->grh, &av->grh, sizeof(av->grh)); + attr->port_num = av->port_num; + return 0; +} + +int rxe_av_fill_ip_info(struct rxe_dev *rxe, + struct rxe_av *av, + struct ib_ah_attr *attr, + struct ib_gid_attr *sgid_attr, + union ib_gid *sgid) +{ + rdma_gid2ip(&av->sgid_addr._sockaddr, sgid); + rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid); + av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + + return 0; +} + +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +{ + if (!pkt || !pkt->qp) + return NULL; + + if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) + return &pkt->qp->pri_av; + + return (pkt->wqe) ? &pkt->wqe->av : NULL; +} diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c new file mode 100644 index 000000000000..36f67de44095 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" +#include "rxe_task.h" + +enum comp_state { + COMPST_GET_ACK, + COMPST_GET_WQE, + COMPST_COMP_WQE, + COMPST_COMP_ACK, + COMPST_CHECK_PSN, + COMPST_CHECK_ACK, + COMPST_READ, + COMPST_ATOMIC, + COMPST_WRITE_SEND, + COMPST_UPDATE_COMP, + COMPST_ERROR_RETRY, + COMPST_RNR_RETRY, + COMPST_ERROR, + COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ + COMPST_DONE, /* The completer finished successflly */ +}; + +static char *comp_state_name[] = { + [COMPST_GET_ACK] = "GET ACK", + [COMPST_GET_WQE] = "GET WQE", + [COMPST_COMP_WQE] = "COMP WQE", + [COMPST_COMP_ACK] = "COMP ACK", + [COMPST_CHECK_PSN] = "CHECK PSN", + [COMPST_CHECK_ACK] = "CHECK ACK", + [COMPST_READ] = "READ", + [COMPST_ATOMIC] = "ATOMIC", + [COMPST_WRITE_SEND] = "WRITE/SEND", + [COMPST_UPDATE_COMP] = "UPDATE COMP", + [COMPST_ERROR_RETRY] = "ERROR RETRY", + [COMPST_RNR_RETRY] = "RNR RETRY", + [COMPST_ERROR] = "ERROR", + [COMPST_EXIT] = "EXIT", + [COMPST_DONE] = "DONE", +}; + +static unsigned long rnrnak_usec[32] = { + [IB_RNR_TIMER_655_36] = 655360, + [IB_RNR_TIMER_000_01] = 10, + [IB_RNR_TIMER_000_02] = 20, + [IB_RNR_TIMER_000_03] = 30, + [IB_RNR_TIMER_000_04] = 40, + [IB_RNR_TIMER_000_06] = 60, + [IB_RNR_TIMER_000_08] = 80, + [IB_RNR_TIMER_000_12] = 120, + [IB_RNR_TIMER_000_16] = 160, + [IB_RNR_TIMER_000_24] = 240, + [IB_RNR_TIMER_000_32] = 320, + [IB_RNR_TIMER_000_48] = 480, + [IB_RNR_TIMER_000_64] = 640, + [IB_RNR_TIMER_000_96] = 960, + [IB_RNR_TIMER_001_28] = 1280, + [IB_RNR_TIMER_001_92] = 1920, + [IB_RNR_TIMER_002_56] = 2560, + [IB_RNR_TIMER_003_84] = 3840, + [IB_RNR_TIMER_005_12] = 5120, + [IB_RNR_TIMER_007_68] = 7680, + [IB_RNR_TIMER_010_24] = 10240, + [IB_RNR_TIMER_015_36] = 15360, + [IB_RNR_TIMER_020_48] = 20480, + [IB_RNR_TIMER_030_72] = 30720, + [IB_RNR_TIMER_040_96] = 40960, + [IB_RNR_TIMER_061_44] = 61410, + [IB_RNR_TIMER_081_92] = 81920, + [IB_RNR_TIMER_122_88] = 122880, + [IB_RNR_TIMER_163_84] = 163840, + [IB_RNR_TIMER_245_76] = 245760, + [IB_RNR_TIMER_327_68] = 327680, + [IB_RNR_TIMER_491_52] = 491520, +}; + +static inline unsigned long rnrnak_jiffies(u8 timeout) +{ + return max_t(unsigned long, + usecs_to_jiffies(rnrnak_usec[timeout]), 1); +} + +static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; + case IB_WR_SEND: return IB_WC_SEND; + case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; + case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; + case IB_WR_LSO: return IB_WC_LSO; + case IB_WR_SEND_WITH_INV: return IB_WC_SEND; + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; + case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; + case IB_WR_REG_MR: return IB_WC_REG_MR; + + default: + return 0xff; + } +} + +void retransmit_timer(unsigned long data) +{ + struct rxe_qp *qp = (struct rxe_qp *)data; + + if (qp->valid) { + qp->comp.timeout = 1; + rxe_run_task(&qp->comp.task, 1); + } +} + +void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + + skb_queue_tail(&qp->resp_pkts, skb); + + must_sched = skb_queue_len(&qp->resp_pkts) > 1; + rxe_run_task(&qp->comp.task, must_sched); +} + +static inline enum comp_state get_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe **wqe_p) +{ + struct rxe_send_wqe *wqe; + + /* we come here whether or not we found a response packet to see if + * there are any posted WQEs + */ + wqe = queue_head(qp->sq.queue); + *wqe_p = wqe; + + /* no WQE or requester has not started it yet */ + if (!wqe || wqe->state == wqe_state_posted) + return pkt ? COMPST_DONE : COMPST_EXIT; + + /* WQE does not require an ack */ + if (wqe->state == wqe_state_done) + return COMPST_COMP_WQE; + + /* WQE caused an error */ + if (wqe->state == wqe_state_error) + return COMPST_ERROR; + + /* we have a WQE, if we also have an ack check its PSN */ + return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; +} + +static inline void reset_retry_counters(struct rxe_qp *qp) +{ + qp->comp.retry_cnt = qp->attr.retry_cnt; + qp->comp.rnr_retry = qp->attr.rnr_retry; +} + +static inline enum comp_state check_psn(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + s32 diff; + + /* check to see if response is past the oldest WQE. if it is, complete + * send/write or error read/atomic + */ + diff = psn_compare(pkt->psn, wqe->last_psn); + if (diff > 0) { + if (wqe->state == wqe_state_pending) { + if (wqe->mask & WR_ATOMIC_OR_READ_MASK) + return COMPST_ERROR_RETRY; + + reset_retry_counters(qp); + return COMPST_COMP_WQE; + } else { + return COMPST_DONE; + } + } + + /* compare response packet to expected response */ + diff = psn_compare(pkt->psn, qp->comp.psn); + if (diff < 0) { + /* response is most likely a retried packet if it matches an + * uncompleted WQE go complete it else ignore it + */ + if (pkt->psn == wqe->last_psn) + return COMPST_COMP_ACK; + else + return COMPST_DONE; + } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { + return COMPST_ERROR_RETRY; + } else { + return COMPST_CHECK_ACK; + } +} + +static inline enum comp_state check_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + unsigned int mask = pkt->mask; + u8 syn; + + /* Check the sequence only */ + switch (qp->comp.opcode) { + case -1: + /* Will catch all *_ONLY cases. */ + if (!(mask & RXE_START_MASK)) + return COMPST_ERROR; + + break; + + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && + pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + return COMPST_ERROR; + } + break; + default: + WARN_ON(1); + } + + /* Check operation validity. */ + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE + * doesn't have an AETH) + */ + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (wqe->wr.opcode != IB_WR_RDMA_READ && + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + return COMPST_ERROR; + } + reset_retry_counters(qp); + return COMPST_READ; + + case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP && + wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD) + return COMPST_ERROR; + reset_retry_counters(qp); + return COMPST_ATOMIC; + + case IB_OPCODE_RC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + switch (syn & AETH_TYPE_MASK) { + case AETH_ACK: + reset_retry_counters(qp); + return COMPST_WRITE_SEND; + + case AETH_RNR_NAK: + return COMPST_RNR_RETRY; + + case AETH_NAK: + switch (syn) { + case AETH_NAK_PSN_SEQ_ERROR: + /* a nak implicitly acks all packets with psns + * before + */ + if (psn_compare(pkt->psn, qp->comp.psn) > 0) { + qp->comp.psn = pkt->psn; + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + } + return COMPST_ERROR_RETRY; + + case AETH_NAK_INVALID_REQ: + wqe->status = IB_WC_REM_INV_REQ_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_ACC_ERR: + wqe->status = IB_WC_REM_ACCESS_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_OP_ERR: + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + + default: + pr_warn("unexpected nak %x\n", syn); + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + } + + default: + return COMPST_ERROR; + } + break; + + default: + pr_warn("unexpected opcode\n"); + } + + return COMPST_ERROR; +} + +static inline enum comp_state do_read(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int ret; + + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, payload_addr(pkt), + payload_size(pkt), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + + if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) + return COMPST_COMP_ACK; + else + return COMPST_UPDATE_COMP; +} + +static inline enum comp_state do_atomic(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int ret; + + u64 atomic_orig = atmack_orig(pkt); + + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, &atomic_orig, + sizeof(u64), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + else + return COMPST_COMP_ACK; +} + +static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_cqe *cqe) +{ + memset(cqe, 0, sizeof(*cqe)); + + if (!qp->is_user) { + struct ib_wc *wc = &cqe->ibwc; + + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + wc->qp = &qp->ibqp; + } else { + struct ib_uverbs_wc *uwc = &cqe->uibwc; + + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + uwc->qp_num = qp->ibqp.qp_num; + } +} + +static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe; + + if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + (qp->req.state == QP_STATE_ERROR)) { + make_send_cqe(qp, wqe, &cqe); + rxe_cq_post(qp->scq, &cqe, 0); + } + + advance_consumer(qp->sq.queue); + + /* + * we completed something so let req run again + * if it is trying to fence + */ + if (qp->req.wait_fence) { + qp->req.wait_fence = 0; + rxe_run_task(&qp->req.task, 1); + } +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + unsigned long flags; + + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + rxe_run_task(&qp->req.task, 1); + } + } + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* state_lock used by requester & completer */ + spin_lock_irqsave(&qp->state_lock, flags); + if ((qp->req.state == QP_STATE_DRAIN) && + (qp->comp.psn == qp->req.psn)) { + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } else { + spin_unlock_irqrestore(&qp->state_lock, flags); + } + } + + do_complete(qp, wqe); + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + return COMPST_UPDATE_COMP; + else + return COMPST_DONE; +} + +static inline enum comp_state complete_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + qp->comp.opcode = -1; + + if (pkt) { + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + } + + do_complete(qp, wqe); + + return COMPST_GET_WQE; +} + +int rxe_completer(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_send_wqe *wqe = wqe; + struct sk_buff *skb = NULL; + struct rxe_pkt_info *pkt = NULL; + enum comp_state state; + + if (!qp->valid) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while (queue_head(qp->sq.queue)) + advance_consumer(qp->sq.queue); + + goto exit; + } + + if (qp->req.state == QP_STATE_ERROR) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while ((wqe = queue_head(qp->sq.queue))) { + wqe->status = IB_WC_WR_FLUSH_ERR; + do_complete(qp, wqe); + } + + goto exit; + } + + if (qp->req.state == QP_STATE_RESET) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while (queue_head(qp->sq.queue)) + advance_consumer(qp->sq.queue); + + goto exit; + } + + if (qp->comp.timeout) { + qp->comp.timeout_retry = 1; + qp->comp.timeout = 0; + } else { + qp->comp.timeout_retry = 0; + } + + if (qp->req.need_retry) + goto exit; + + state = COMPST_GET_ACK; + + while (1) { + pr_debug("state = %s\n", comp_state_name[state]); + switch (state) { + case COMPST_GET_ACK: + skb = skb_dequeue(&qp->resp_pkts); + if (skb) { + pkt = SKB_TO_PKT(skb); + qp->comp.timeout_retry = 0; + } + state = COMPST_GET_WQE; + break; + + case COMPST_GET_WQE: + state = get_wqe(qp, pkt, &wqe); + break; + + case COMPST_CHECK_PSN: + state = check_psn(qp, pkt, wqe); + break; + + case COMPST_CHECK_ACK: + state = check_ack(qp, pkt, wqe); + break; + + case COMPST_READ: + state = do_read(qp, pkt, wqe); + break; + + case COMPST_ATOMIC: + state = do_atomic(qp, pkt, wqe); + break; + + case COMPST_WRITE_SEND: + if (wqe->state == wqe_state_pending && + wqe->last_psn == pkt->psn) + state = COMPST_COMP_ACK; + else + state = COMPST_UPDATE_COMP; + break; + + case COMPST_COMP_ACK: + state = complete_ack(qp, pkt, wqe); + break; + + case COMPST_COMP_WQE: + state = complete_wqe(qp, pkt, wqe); + break; + + case COMPST_UPDATE_COMP: + if (pkt->mask & RXE_END_MASK) + qp->comp.opcode = -1; + else + qp->comp.opcode = pkt->opcode; + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + + state = COMPST_DONE; + break; + + case COMPST_DONE: + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto done; + + case COMPST_EXIT: + if (qp->comp.timeout_retry && wqe) { + state = COMPST_ERROR_RETRY; + break; + } + + /* re reset the timeout counter if + * (1) QP is type RC + * (2) the QP is alive + * (3) there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * (4) the timeout parameter is set + */ + if ((qp_type(qp) == IB_QPT_RC) && + (qp->req.state == QP_STATE_READY) && + (psn_compare(qp->req.psn, qp->comp.psn) > 0) && + qp->qp_timeout_jiffies) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + goto exit; + + case COMPST_ERROR_RETRY: + /* we come here if the retry timer fired and we did + * not receive a response packet. try to retry the send + * queue if that makes sense and the limits have not + * been exceeded. remember that some timeouts are + * spurious since we do not reset the timer but kick + * it down the road or let it expire + */ + + /* there is nothing to retry in this case */ + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; + + if (qp->comp.retry_cnt > 0) { + if (qp->comp.retry_cnt != 7) + qp->comp.retry_cnt--; + + /* no point in retrying if we have already + * seen the last ack that the requester could + * have caused + */ + if (psn_compare(qp->req.psn, + qp->comp.psn) > 0) { + /* tell the requester to retry the + * send send queue next time around + */ + qp->req.need_retry = 1; + rxe_run_task(&qp->req.task, 1); + } + goto exit; + } else { + wqe->status = IB_WC_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_RNR_RETRY: + if (qp->comp.rnr_retry > 0) { + if (qp->comp.rnr_retry != 7) + qp->comp.rnr_retry--; + + qp->req.need_retry = 1; + pr_debug("set rnr nak timer\n"); + mod_timer(&qp->rnr_nak_timer, + jiffies + rnrnak_jiffies(aeth_syn(pkt) + & ~AETH_TYPE_MASK)); + goto exit; + } else { + wqe->status = IB_WC_RNR_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_ERROR: + do_complete(qp, wqe); + rxe_qp_error(qp); + goto exit; + } + } + +exit: + /* we come here if we are done with processing and want the task to + * exit from the loop calling us + */ + return -EAGAIN; + +done: + /* we come here if we have processed a packet we want the task to call + * us again to see if there is anything else to do + */ + return 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c new file mode 100644 index 000000000000..e5e6a5e7dee9 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, + int cqe, int comp_vector, struct ib_udata *udata) +{ + int count; + + if (cqe <= 0) { + pr_warn("cqe(%d) <= 0\n", cqe); + goto err1; + } + + if (cqe > rxe->attr.max_cqe) { + pr_warn("cqe(%d) > max_cqe(%d)\n", + cqe, rxe->attr.max_cqe); + goto err1; + } + + if (cq) { + count = queue_count(cq->queue); + if (cqe < count) { + pr_warn("cqe(%d) < current # elements in queue (%d)", + cqe, count); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static void rxe_send_complete(unsigned long data) +{ + struct rxe_cq *cq = (struct rxe_cq *)data; + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + int err; + + cq->queue = rxe_queue_init(rxe, &cqe, + sizeof(struct rxe_cqe)); + if (!cq->queue) { + pr_warn("unable to create cq\n"); + return -ENOMEM; + } + + err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, + cq->queue->buf_size, &cq->queue->ip); + if (err) { + kvfree(cq->queue->buf); + kfree(cq->queue); + return err; + } + + if (udata) + cq->is_user = 1; + + tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); + + spin_lock_init(&cq->cq_lock); + cq->ibcq.cqe = cqe; + return 0; +} + +int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata) +{ + int err; + + err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, + sizeof(struct rxe_cqe), + cq->queue->ip ? cq->queue->ip->context : NULL, + udata, NULL, &cq->cq_lock); + if (!err) + cq->ibcq.cqe = cqe; + + return err; +} + +int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) +{ + struct ib_event ev; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + + if (unlikely(queue_full(cq->queue))) { + spin_unlock_irqrestore(&cq->cq_lock, flags); + if (cq->ibcq.event_handler) { + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + + return -EBUSY; + } + + memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); + + /* make sure all changes to the CQ are written before we update the + * producer pointer + */ + smp_wmb(); + + advance_producer(cq->queue); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + if ((cq->notify == IB_CQ_NEXT_COMP) || + (cq->notify == IB_CQ_SOLICITED && solicited)) { + cq->notify = 0; + tasklet_schedule(&cq->comp_task); + } + + return 0; +} + +void rxe_cq_cleanup(void *arg) +{ + struct rxe_cq *cq = arg; + + if (cq->queue) + rxe_queue_cleanup(cq->queue); +} diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c new file mode 100644 index 000000000000..7634c1a81b2b --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_dma.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +#define DMA_BAD_ADDER ((u64)0) + +static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == DMA_BAD_ADDER; +} + +static u64 rxe_dma_map_single(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); + return (uintptr_t)cpu_addr; +} + +static void rxe_dma_unmap_single(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static u64 rxe_dma_map_page(struct ib_device *dev, + struct page *page, + unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + u64 addr; + + WARN_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = DMA_BAD_ADDER; + goto done; + } + + addr = (uintptr_t)page_address(page); + if (addr) + addr += offset; + +done: + return addr; +} + +static void rxe_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + WARN_ON(!valid_dma_direction(direction)); + + for_each_sg(sgl, sg, nents, i) { + addr = (uintptr_t)page_address(sg_page(sg)); + if (!addr) { + ret = 0; + break; + } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif + } + + return ret; +} + +static void rxe_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static void rxe_sync_single_for_cpu(struct ib_device *dev, + u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void rxe_sync_single_for_device(struct ib_device *dev, + u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + + if (dma_handle) + *dma_handle = (uintptr_t)addr; + + return addr; +} + +static void rxe_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops rxe_dma_mapping_ops = { + .mapping_error = rxe_mapping_error, + .map_single = rxe_dma_map_single, + .unmap_single = rxe_dma_unmap_single, + .map_page = rxe_dma_map_page, + .unmap_page = rxe_dma_unmap_page, + .map_sg = rxe_map_sg, + .unmap_sg = rxe_unmap_sg, + .sync_single_for_cpu = rxe_sync_single_for_cpu, + .sync_single_for_device = rxe_sync_single_for_device, + .alloc_coherent = rxe_dma_alloc_coherent, + .free_coherent = rxe_dma_free_coherent +}; diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h new file mode 100644 index 000000000000..d57b5e956ceb --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -0,0 +1,952 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_HDR_H +#define RXE_HDR_H + +/* extracted information about a packet carried in an sk_buff struct fits in + * the skbuff cb array. Must be at most 48 bytes. stored in control block of + * sk_buff for received packets. + */ +struct rxe_pkt_info { + struct rxe_dev *rxe; /* device that owns packet */ + struct rxe_qp *qp; /* qp that owns packet */ + struct rxe_send_wqe *wqe; /* send wqe */ + u8 *hdr; /* points to bth */ + u32 mask; /* useful info about pkt */ + u32 psn; /* bth psn of packet */ + u16 pkey_index; /* partition of pkt */ + u16 paylen; /* length of bth - icrc */ + u8 port_num; /* port pkt received on */ + u8 opcode; /* bth opcode of packet */ + u8 offset; /* bth offset from pkt->hdr */ +}; + +/* Macros should be used only for received skb */ +#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb) +#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb) + +/* + * IBA header types and methods + * + * Some of these are for reference and completeness only since + * rxe does not currently support RD transport + * most of this could be moved into IB core. ib_pack.h has + * part of this but is incomplete + * + * Header specific routines to insert/extract values to/from headers + * the routines that are named __hhh_(set_)fff() take a pointer to a + * hhh header and get(set) the fff field. The routines named + * hhh_(set_)fff take a packet info struct and find the + * header and field based on the opcode in the packet. + * Conversion to/from network byte order from cpu order is also done. + */ + +#define RXE_ICRC_SIZE (4) +#define RXE_MAX_HDR_LENGTH (80) + +/****************************************************************************** + * Base Transport Header + ******************************************************************************/ +struct rxe_bth { + u8 opcode; + u8 flags; + __be16 pkey; + __be32 qpn; + __be32 apsn; +}; + +#define BTH_TVER (0) +#define BTH_DEF_PKEY (0xffff) + +#define BTH_SE_MASK (0x80) +#define BTH_MIG_MASK (0x40) +#define BTH_PAD_MASK (0x30) +#define BTH_TVER_MASK (0x0f) +#define BTH_FECN_MASK (0x80000000) +#define BTH_BECN_MASK (0x40000000) +#define BTH_RESV6A_MASK (0x3f000000) +#define BTH_QPN_MASK (0x00ffffff) +#define BTH_ACK_MASK (0x80000000) +#define BTH_RESV7_MASK (0x7f000000) +#define BTH_PSN_MASK (0x00ffffff) + +static inline u8 __bth_opcode(void *arg) +{ + struct rxe_bth *bth = arg; + + return bth->opcode; +} + +static inline void __bth_set_opcode(void *arg, u8 opcode) +{ + struct rxe_bth *bth = arg; + + bth->opcode = opcode; +} + +static inline u8 __bth_se(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (BTH_SE_MASK & bth->flags); +} + +static inline void __bth_set_se(void *arg, int se) +{ + struct rxe_bth *bth = arg; + + if (se) + bth->flags |= BTH_SE_MASK; + else + bth->flags &= ~BTH_SE_MASK; +} + +static inline u8 __bth_mig(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (BTH_MIG_MASK & bth->flags); +} + +static inline void __bth_set_mig(void *arg, u8 mig) +{ + struct rxe_bth *bth = arg; + + if (mig) + bth->flags |= BTH_MIG_MASK; + else + bth->flags &= ~BTH_MIG_MASK; +} + +static inline u8 __bth_pad(void *arg) +{ + struct rxe_bth *bth = arg; + + return (BTH_PAD_MASK & bth->flags) >> 4; +} + +static inline void __bth_set_pad(void *arg, u8 pad) +{ + struct rxe_bth *bth = arg; + + bth->flags = (BTH_PAD_MASK & (pad << 4)) | + (~BTH_PAD_MASK & bth->flags); +} + +static inline u8 __bth_tver(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_TVER_MASK & bth->flags; +} + +static inline void __bth_set_tver(void *arg, u8 tver) +{ + struct rxe_bth *bth = arg; + + bth->flags = (BTH_TVER_MASK & tver) | + (~BTH_TVER_MASK & bth->flags); +} + +static inline u16 __bth_pkey(void *arg) +{ + struct rxe_bth *bth = arg; + + return be16_to_cpu(bth->pkey); +} + +static inline void __bth_set_pkey(void *arg, u16 pkey) +{ + struct rxe_bth *bth = arg; + + bth->pkey = cpu_to_be16(pkey); +} + +static inline u32 __bth_qpn(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_QPN_MASK & be32_to_cpu(bth->qpn); +} + +static inline void __bth_set_qpn(void *arg, u32 qpn) +{ + struct rxe_bth *bth = arg; + u32 resvqpn = be32_to_cpu(bth->qpn); + + bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) | + (~BTH_QPN_MASK & resvqpn)); +} + +static inline int __bth_fecn(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn); +} + +static inline void __bth_set_fecn(void *arg, int fecn) +{ + struct rxe_bth *bth = arg; + + if (fecn) + bth->qpn |= cpu_to_be32(BTH_FECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK); +} + +static inline int __bth_becn(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn); +} + +static inline void __bth_set_becn(void *arg, int becn) +{ + struct rxe_bth *bth = arg; + + if (becn) + bth->qpn |= cpu_to_be32(BTH_BECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK); +} + +static inline u8 __bth_resv6a(void *arg) +{ + struct rxe_bth *bth = arg; + + return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24; +} + +static inline void __bth_set_resv6a(void *arg) +{ + struct rxe_bth *bth = arg; + + bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK); +} + +static inline int __bth_ack(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn); +} + +static inline void __bth_set_ack(void *arg, int ack) +{ + struct rxe_bth *bth = arg; + + if (ack) + bth->apsn |= cpu_to_be32(BTH_ACK_MASK); + else + bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK); +} + +static inline void __bth_set_resv7(void *arg) +{ + struct rxe_bth *bth = arg; + + bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK); +} + +static inline u32 __bth_psn(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_PSN_MASK & be32_to_cpu(bth->apsn); +} + +static inline void __bth_set_psn(void *arg, u32 psn) +{ + struct rxe_bth *bth = arg; + u32 apsn = be32_to_cpu(bth->apsn); + + bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) | + (~BTH_PSN_MASK & apsn)); +} + +static inline u8 bth_opcode(struct rxe_pkt_info *pkt) +{ + return __bth_opcode(pkt->hdr + pkt->offset); +} + +static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) +{ + __bth_set_opcode(pkt->hdr + pkt->offset, opcode); +} + +static inline u8 bth_se(struct rxe_pkt_info *pkt) +{ + return __bth_se(pkt->hdr + pkt->offset); +} + +static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) +{ + __bth_set_se(pkt->hdr + pkt->offset, se); +} + +static inline u8 bth_mig(struct rxe_pkt_info *pkt) +{ + return __bth_mig(pkt->hdr + pkt->offset); +} + +static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) +{ + __bth_set_mig(pkt->hdr + pkt->offset, mig); +} + +static inline u8 bth_pad(struct rxe_pkt_info *pkt) +{ + return __bth_pad(pkt->hdr + pkt->offset); +} + +static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) +{ + __bth_set_pad(pkt->hdr + pkt->offset, pad); +} + +static inline u8 bth_tver(struct rxe_pkt_info *pkt) +{ + return __bth_tver(pkt->hdr + pkt->offset); +} + +static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) +{ + __bth_set_tver(pkt->hdr + pkt->offset, tver); +} + +static inline u16 bth_pkey(struct rxe_pkt_info *pkt) +{ + return __bth_pkey(pkt->hdr + pkt->offset); +} + +static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) +{ + __bth_set_pkey(pkt->hdr + pkt->offset, pkey); +} + +static inline u32 bth_qpn(struct rxe_pkt_info *pkt) +{ + return __bth_qpn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) +{ + __bth_set_qpn(pkt->hdr + pkt->offset, qpn); +} + +static inline int bth_fecn(struct rxe_pkt_info *pkt) +{ + return __bth_fecn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) +{ + __bth_set_fecn(pkt->hdr + pkt->offset, fecn); +} + +static inline int bth_becn(struct rxe_pkt_info *pkt) +{ + return __bth_becn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) +{ + __bth_set_becn(pkt->hdr + pkt->offset, becn); +} + +static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) +{ + return __bth_resv6a(pkt->hdr + pkt->offset); +} + +static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) +{ + __bth_set_resv6a(pkt->hdr + pkt->offset); +} + +static inline int bth_ack(struct rxe_pkt_info *pkt) +{ + return __bth_ack(pkt->hdr + pkt->offset); +} + +static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) +{ + __bth_set_ack(pkt->hdr + pkt->offset, ack); +} + +static inline void bth_set_resv7(struct rxe_pkt_info *pkt) +{ + __bth_set_resv7(pkt->hdr + pkt->offset); +} + +static inline u32 bth_psn(struct rxe_pkt_info *pkt) +{ + return __bth_psn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) +{ + __bth_set_psn(pkt->hdr + pkt->offset, psn); +} + +static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, + int mig, int pad, u16 pkey, u32 qpn, int ack_req, + u32 psn) +{ + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); + + bth->opcode = opcode; + bth->flags = (pad << 4) & BTH_PAD_MASK; + if (se) + bth->flags |= BTH_SE_MASK; + if (mig) + bth->flags |= BTH_MIG_MASK; + bth->pkey = cpu_to_be16(pkey); + bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK); + psn &= BTH_PSN_MASK; + if (ack_req) + psn |= BTH_ACK_MASK; + bth->apsn = cpu_to_be32(psn); +} + +/****************************************************************************** + * Reliable Datagram Extended Transport Header + ******************************************************************************/ +struct rxe_rdeth { + __be32 een; +}; + +#define RDETH_EEN_MASK (0x00ffffff) + +static inline u8 __rdeth_een(void *arg) +{ + struct rxe_rdeth *rdeth = arg; + + return RDETH_EEN_MASK & be32_to_cpu(rdeth->een); +} + +static inline void __rdeth_set_een(void *arg, u32 een) +{ + struct rxe_rdeth *rdeth = arg; + + rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een); +} + +static inline u8 rdeth_een(struct rxe_pkt_info *pkt) +{ + return __rdeth_een(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); +} + +static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) +{ + __rdeth_set_een(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); +} + +/****************************************************************************** + * Datagram Extended Transport Header + ******************************************************************************/ +struct rxe_deth { + __be32 qkey; + __be32 sqp; +}; + +#define GSI_QKEY (0x80010000) +#define DETH_SQP_MASK (0x00ffffff) + +static inline u32 __deth_qkey(void *arg) +{ + struct rxe_deth *deth = arg; + + return be32_to_cpu(deth->qkey); +} + +static inline void __deth_set_qkey(void *arg, u32 qkey) +{ + struct rxe_deth *deth = arg; + + deth->qkey = cpu_to_be32(qkey); +} + +static inline u32 __deth_sqp(void *arg) +{ + struct rxe_deth *deth = arg; + + return DETH_SQP_MASK & be32_to_cpu(deth->sqp); +} + +static inline void __deth_set_sqp(void *arg, u32 sqp) +{ + struct rxe_deth *deth = arg; + + deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp); +} + +static inline u32 deth_qkey(struct rxe_pkt_info *pkt) +{ + return __deth_qkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); +} + +static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) +{ + __deth_set_qkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); +} + +static inline u32 deth_sqp(struct rxe_pkt_info *pkt) +{ + return __deth_sqp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); +} + +static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) +{ + __deth_set_sqp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); +} + +/****************************************************************************** + * RDMA Extended Transport Header + ******************************************************************************/ +struct rxe_reth { + __be64 va; + __be32 rkey; + __be32 len; +}; + +static inline u64 __reth_va(void *arg) +{ + struct rxe_reth *reth = arg; + + return be64_to_cpu(reth->va); +} + +static inline void __reth_set_va(void *arg, u64 va) +{ + struct rxe_reth *reth = arg; + + reth->va = cpu_to_be64(va); +} + +static inline u32 __reth_rkey(void *arg) +{ + struct rxe_reth *reth = arg; + + return be32_to_cpu(reth->rkey); +} + +static inline void __reth_set_rkey(void *arg, u32 rkey) +{ + struct rxe_reth *reth = arg; + + reth->rkey = cpu_to_be32(rkey); +} + +static inline u32 __reth_len(void *arg) +{ + struct rxe_reth *reth = arg; + + return be32_to_cpu(reth->len); +} + +static inline void __reth_set_len(void *arg, u32 len) +{ + struct rxe_reth *reth = arg; + + reth->len = cpu_to_be32(len); +} + +static inline u64 reth_va(struct rxe_pkt_info *pkt) +{ + return __reth_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); +} + +static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) +{ + __reth_set_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); +} + +static inline u32 reth_rkey(struct rxe_pkt_info *pkt) +{ + return __reth_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); +} + +static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) +{ + __reth_set_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); +} + +static inline u32 reth_len(struct rxe_pkt_info *pkt) +{ + return __reth_len(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); +} + +static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) +{ + __reth_set_len(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); +} + +/****************************************************************************** + * Atomic Extended Transport Header + ******************************************************************************/ +struct rxe_atmeth { + __be64 va; + __be32 rkey; + __be64 swap_add; + __be64 comp; +} __attribute__((__packed__)); + +static inline u64 __atmeth_va(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->va); +} + +static inline void __atmeth_set_va(void *arg, u64 va) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->va = cpu_to_be64(va); +} + +static inline u32 __atmeth_rkey(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be32_to_cpu(atmeth->rkey); +} + +static inline void __atmeth_set_rkey(void *arg, u32 rkey) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->rkey = cpu_to_be32(rkey); +} + +static inline u64 __atmeth_swap_add(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->swap_add); +} + +static inline void __atmeth_set_swap_add(void *arg, u64 swap_add) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->swap_add = cpu_to_be64(swap_add); +} + +static inline u64 __atmeth_comp(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->comp); +} + +static inline void __atmeth_set_comp(void *arg, u64 comp) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->comp = cpu_to_be64(comp); +} + +static inline u64 atmeth_va(struct rxe_pkt_info *pkt) +{ + return __atmeth_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) +{ + __atmeth_set_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); +} + +static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) +{ + return __atmeth_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) +{ + __atmeth_set_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); +} + +static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) +{ + return __atmeth_swap_add(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) +{ + __atmeth_set_swap_add(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); +} + +static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) +{ + return __atmeth_comp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) +{ + __atmeth_set_comp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); +} + +/****************************************************************************** + * Ack Extended Transport Header + ******************************************************************************/ +struct rxe_aeth { + __be32 smsn; +}; + +#define AETH_SYN_MASK (0xff000000) +#define AETH_MSN_MASK (0x00ffffff) + +enum aeth_syndrome { + AETH_TYPE_MASK = 0xe0, + AETH_ACK = 0x00, + AETH_RNR_NAK = 0x20, + AETH_RSVD = 0x40, + AETH_NAK = 0x60, + AETH_ACK_UNLIMITED = 0x1f, + AETH_NAK_PSN_SEQ_ERROR = 0x60, + AETH_NAK_INVALID_REQ = 0x61, + AETH_NAK_REM_ACC_ERR = 0x62, + AETH_NAK_REM_OP_ERR = 0x63, + AETH_NAK_INV_RD_REQ = 0x64, +}; + +static inline u8 __aeth_syn(void *arg) +{ + struct rxe_aeth *aeth = arg; + + return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24; +} + +static inline void __aeth_set_syn(void *arg, u8 syn) +{ + struct rxe_aeth *aeth = arg; + u32 smsn = be32_to_cpu(aeth->smsn); + + aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) | + (~AETH_SYN_MASK & smsn)); +} + +static inline u32 __aeth_msn(void *arg) +{ + struct rxe_aeth *aeth = arg; + + return AETH_MSN_MASK & be32_to_cpu(aeth->smsn); +} + +static inline void __aeth_set_msn(void *arg, u32 msn) +{ + struct rxe_aeth *aeth = arg; + u32 smsn = be32_to_cpu(aeth->smsn); + + aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) | + (~AETH_MSN_MASK & smsn)); +} + +static inline u8 aeth_syn(struct rxe_pkt_info *pkt) +{ + return __aeth_syn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); +} + +static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) +{ + __aeth_set_syn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); +} + +static inline u32 aeth_msn(struct rxe_pkt_info *pkt) +{ + return __aeth_msn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); +} + +static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) +{ + __aeth_set_msn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); +} + +/****************************************************************************** + * Atomic Ack Extended Transport Header + ******************************************************************************/ +struct rxe_atmack { + __be64 orig; +}; + +static inline u64 __atmack_orig(void *arg) +{ + struct rxe_atmack *atmack = arg; + + return be64_to_cpu(atmack->orig); +} + +static inline void __atmack_set_orig(void *arg, u64 orig) +{ + struct rxe_atmack *atmack = arg; + + atmack->orig = cpu_to_be64(orig); +} + +static inline u64 atmack_orig(struct rxe_pkt_info *pkt) +{ + return __atmack_orig(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); +} + +static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) +{ + __atmack_set_orig(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); +} + +/****************************************************************************** + * Immediate Extended Transport Header + ******************************************************************************/ +struct rxe_immdt { + __be32 imm; +}; + +static inline __be32 __immdt_imm(void *arg) +{ + struct rxe_immdt *immdt = arg; + + return immdt->imm; +} + +static inline void __immdt_set_imm(void *arg, __be32 imm) +{ + struct rxe_immdt *immdt = arg; + + immdt->imm = imm; +} + +static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) +{ + return __immdt_imm(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); +} + +static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) +{ + __immdt_set_imm(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); +} + +/****************************************************************************** + * Invalidate Extended Transport Header + ******************************************************************************/ +struct rxe_ieth { + __be32 rkey; +}; + +static inline u32 __ieth_rkey(void *arg) +{ + struct rxe_ieth *ieth = arg; + + return be32_to_cpu(ieth->rkey); +} + +static inline void __ieth_set_rkey(void *arg, u32 rkey) +{ + struct rxe_ieth *ieth = arg; + + ieth->rkey = cpu_to_be32(rkey); +} + +static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) +{ + return __ieth_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); +} + +static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) +{ + __ieth_set_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); +} + +enum rxe_hdr_length { + RXE_BTH_BYTES = sizeof(struct rxe_bth), + RXE_DETH_BYTES = sizeof(struct rxe_deth), + RXE_IMMDT_BYTES = sizeof(struct rxe_immdt), + RXE_RETH_BYTES = sizeof(struct rxe_reth), + RXE_AETH_BYTES = sizeof(struct rxe_aeth), + RXE_ATMACK_BYTES = sizeof(struct rxe_atmack), + RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth), + RXE_IETH_BYTES = sizeof(struct rxe_ieth), + RXE_RDETH_BYTES = sizeof(struct rxe_rdeth), +}; + +static inline size_t header_size(struct rxe_pkt_info *pkt) +{ + return pkt->offset + rxe_opcode[pkt->opcode].length; +} + +static inline void *payload_addr(struct rxe_pkt_info *pkt) +{ + return pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; +} + +static inline size_t payload_size(struct rxe_pkt_info *pkt) +{ + return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD] + - bth_pad(pkt) - RXE_ICRC_SIZE; +} + +#endif /* RXE_HDR_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c new file mode 100644 index 000000000000..413b56b23a06 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +/* Compute a partial ICRC for all the IB transport headers. */ +u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb) +{ + unsigned int bth_offset = 0; + struct iphdr *ip4h = NULL; + struct ipv6hdr *ip6h = NULL; + struct udphdr *udph; + struct rxe_bth *bth; + int crc; + int length; + int hdr_size = sizeof(struct udphdr) + + (skb->protocol == htons(ETH_P_IP) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)); + /* pseudo header buffer size is calculate using ipv6 header size since + * it is bigger than ipv4 + */ + u8 pshdr[sizeof(struct udphdr) + + sizeof(struct ipv6hdr) + + RXE_BTH_BYTES]; + + /* This seed is the result of computing a CRC with a seed of + * 0xfffffff and 8 bytes of 0xff representing a masked LRH. + */ + crc = 0xdebb20e3; + + if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ + memcpy(pshdr, ip_hdr(skb), hdr_size); + ip4h = (struct iphdr *)pshdr; + udph = (struct udphdr *)(ip4h + 1); + + ip4h->ttl = 0xff; + ip4h->check = CSUM_MANGLED_0; + ip4h->tos = 0xff; + } else { /* IPv6 */ + memcpy(pshdr, ipv6_hdr(skb), hdr_size); + ip6h = (struct ipv6hdr *)pshdr; + udph = (struct udphdr *)(ip6h + 1); + + memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl)); + ip6h->priority = 0xf; + ip6h->hop_limit = 0xff; + } + udph->check = CSUM_MANGLED_0; + + bth_offset += hdr_size; + + memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES); + bth = (struct rxe_bth *)&pshdr[bth_offset]; + + /* exclude bth.resv8a */ + bth->qpn |= cpu_to_be32(~BTH_QPN_MASK); + + length = hdr_size + RXE_BTH_BYTES; + crc = crc32_le(crc, pshdr, length); + + /* And finish to compute the CRC on the remainder of the headers. */ + crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES, + rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES); + return crc; +} diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h new file mode 100644 index 000000000000..4a5484ef604f --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_LOC_H +#define RXE_LOC_H + +/* rxe_av.c */ + +int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr); + +int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, + struct rxe_av *av, struct ib_ah_attr *attr); + +int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, + struct ib_ah_attr *attr); + +int rxe_av_fill_ip_info(struct rxe_dev *rxe, + struct rxe_av *av, + struct ib_ah_attr *attr, + struct ib_gid_attr *sgid_attr, + union ib_gid *sgid); + +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); + +/* rxe_cq.c */ +int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, + int cqe, int comp_vector, struct ib_udata *udata); + +int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata); + +int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata); + +int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); + +void rxe_cq_cleanup(void *arg); + +/* rxe_mcast.c */ +int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mc_grp **grp_p); + +int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + struct rxe_mc_grp *grp); + +int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + union ib_gid *mgid); + +void rxe_drop_all_mcast_groups(struct rxe_qp *qp); + +void rxe_mc_cleanup(void *arg); + +/* rxe_mmap.c */ +struct rxe_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + struct kref ref; + void *obj; + + struct mminfo info; +}; + +void rxe_mmap_release(struct kref *ref); + +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, + u32 size, + struct ib_ucontext *context, + void *obj); + +int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +/* rxe_mr.c */ +enum copy_direction { + to_mem_obj, + from_mem_obj, +}; + +int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, + int access, struct rxe_mem *mem); + +int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, + u64 length, u64 iova, int access, struct ib_udata *udata, + struct rxe_mem *mr); + +int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, + int max_pages, struct rxe_mem *mem); + +int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, + int length, enum copy_direction dir, u32 *crcp); + +int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access, + struct rxe_dma_info *dma, void *addr, int length, + enum copy_direction dir, u32 *crcp); + +void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length); + +enum lookup_type { + lookup_local, + lookup_remote, +}; + +struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, + enum lookup_type type); + +int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); + +int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, + u64 *page, int num_pages, u64 iova); + +void rxe_mem_cleanup(void *arg); + +int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); + +/* rxe_qp.c */ +int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); + +int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, + struct ib_qp_init_attr *init, struct ib_udata *udata, + struct ib_pd *ibpd); + +int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); + +int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_attr *attr, int mask); + +int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata); + +int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); + +void rxe_qp_error(struct rxe_qp *qp); + +void rxe_qp_destroy(struct rxe_qp *qp); + +void rxe_qp_cleanup(void *arg); + +static inline int qp_num(struct rxe_qp *qp) +{ + return qp->ibqp.qp_num; +} + +static inline enum ib_qp_type qp_type(struct rxe_qp *qp) +{ + return qp->ibqp.qp_type; +} + +static inline enum ib_qp_state qp_state(struct rxe_qp *qp) +{ + return qp->attr.qp_state; +} + +static inline int qp_mtu(struct rxe_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) + return qp->attr.path_mtu; + else + return RXE_PORT_MAX_MTU; +} + +static inline int rcv_wqe_size(int max_sge) +{ + return sizeof(struct rxe_recv_wqe) + + max_sge * sizeof(struct ib_sge); +} + +void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); + +static inline void rxe_advance_resp_resource(struct rxe_qp *qp) +{ + qp->resp.res_head++; + if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic)) + qp->resp.res_head = 0; +} + +void retransmit_timer(unsigned long data); +void rnr_nak_timer(unsigned long data); + +void dump_qp(struct rxe_qp *qp); + +/* rxe_srq.c */ +#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) + +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); + +int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata); + +int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, + struct ib_udata *udata); + +extern struct ib_dma_mapping_ops rxe_dma_mapping_ops; + +void rxe_release(struct kref *kref); + +int rxe_completer(void *arg); +int rxe_requester(void *arg); +int rxe_responder(void *arg); + +u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb); + +void rxe_resp_queue_pkt(struct rxe_dev *rxe, + struct rxe_qp *qp, struct sk_buff *skb); + +void rxe_comp_queue_pkt(struct rxe_dev *rxe, + struct rxe_qp *qp, struct sk_buff *skb); + +static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp) +{ + return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; +} + +static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, + struct rxe_pkt_info *pkt, struct sk_buff *skb) +{ + int err; + int is_request = pkt->mask & RXE_REQ_MASK; + + if ((is_request && (qp->req.state != QP_STATE_READY)) || + (!is_request && (qp->resp.state != QP_STATE_READY))) { + pr_info("Packet dropped. QP is not in ready state\n"); + goto drop; + } + + if (pkt->mask & RXE_LOOPBACK_MASK) { + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); + err = rxe->ifc_ops->loopback(skb); + } else { + err = rxe->ifc_ops->send(rxe, pkt, skb); + } + + if (err) { + rxe->xmit_errors++; + return err; + } + + atomic_inc(&qp->skb_out); + + if ((qp_type(qp) != IB_QPT_RC) && + (pkt->mask & RXE_END_MASK)) { + pkt->wqe->state = wqe_state_done; + rxe_run_task(&qp->comp.task, 1); + } + + goto done; + +drop: + kfree_skb(skb); + err = 0; +done: + return err; +} + +#endif /* RXE_LOC_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c new file mode 100644 index 000000000000..fa95544ca7e0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mc_grp **grp_p) +{ + int err; + struct rxe_mc_grp *grp; + + if (rxe->attr.max_mcast_qp_attach == 0) { + err = -EINVAL; + goto err1; + } + + grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + if (grp) + goto done; + + grp = rxe_alloc(&rxe->mc_grp_pool); + if (!grp) { + err = -ENOMEM; + goto err1; + } + + INIT_LIST_HEAD(&grp->qp_list); + spin_lock_init(&grp->mcg_lock); + grp->rxe = rxe; + + rxe_add_key(grp, mgid); + + err = rxe->ifc_ops->mcast_add(rxe, mgid); + if (err) + goto err2; + +done: + *grp_p = grp; + return 0; + +err2: + rxe_drop_ref(grp); +err1: + return err; +} + +int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + struct rxe_mc_grp *grp) +{ + int err; + struct rxe_mc_elem *elem; + + /* check to see of the qp is already a member of the group */ + spin_lock_bh(&qp->grp_lock); + spin_lock_bh(&grp->mcg_lock); + list_for_each_entry(elem, &grp->qp_list, qp_list) { + if (elem->qp == qp) { + err = 0; + goto out; + } + } + + if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { + err = -ENOMEM; + goto out; + } + + elem = rxe_alloc(&rxe->mc_elem_pool); + if (!elem) { + err = -ENOMEM; + goto out; + } + + /* each qp holds a ref on the grp */ + rxe_add_ref(grp); + + grp->num_qp++; + elem->qp = qp; + elem->grp = grp; + + list_add(&elem->qp_list, &grp->qp_list); + list_add(&elem->grp_list, &qp->grp_list); + + err = 0; +out: + spin_unlock_bh(&grp->mcg_lock); + spin_unlock_bh(&qp->grp_lock); + return err; +} + +int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + union ib_gid *mgid) +{ + struct rxe_mc_grp *grp; + struct rxe_mc_elem *elem, *tmp; + + grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + if (!grp) + goto err1; + + spin_lock_bh(&qp->grp_lock); + spin_lock_bh(&grp->mcg_lock); + + list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { + if (elem->qp == qp) { + list_del(&elem->qp_list); + list_del(&elem->grp_list); + grp->num_qp--; + + spin_unlock_bh(&grp->mcg_lock); + spin_unlock_bh(&qp->grp_lock); + rxe_drop_ref(elem); + rxe_drop_ref(grp); /* ref held by QP */ + rxe_drop_ref(grp); /* ref from get_key */ + return 0; + } + } + + spin_unlock_bh(&grp->mcg_lock); + spin_unlock_bh(&qp->grp_lock); + rxe_drop_ref(grp); /* ref from get_key */ +err1: + return -EINVAL; +} + +void rxe_drop_all_mcast_groups(struct rxe_qp *qp) +{ + struct rxe_mc_grp *grp; + struct rxe_mc_elem *elem; + + while (1) { + spin_lock_bh(&qp->grp_lock); + if (list_empty(&qp->grp_list)) { + spin_unlock_bh(&qp->grp_lock); + break; + } + elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, + grp_list); + list_del(&elem->grp_list); + spin_unlock_bh(&qp->grp_lock); + + grp = elem->grp; + spin_lock_bh(&grp->mcg_lock); + list_del(&elem->qp_list); + grp->num_qp--; + spin_unlock_bh(&grp->mcg_lock); + rxe_drop_ref(grp); + rxe_drop_ref(elem); + } +} + +void rxe_mc_cleanup(void *arg) +{ + struct rxe_mc_grp *grp = arg; + struct rxe_dev *rxe = grp->rxe; + + rxe_drop_key(grp); + rxe->ifc_ops->mcast_delete(rxe, &grp->mgid); +} diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c new file mode 100644 index 000000000000..54b3c7c99eff --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <asm/pgtable.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +void rxe_mmap_release(struct kref *ref) +{ + struct rxe_mmap_info *ip = container_of(ref, + struct rxe_mmap_info, ref); + struct rxe_dev *rxe = to_rdev(ip->context->device); + + spin_lock_bh(&rxe->pending_lock); + + if (!list_empty(&ip->pending_mmaps)) + list_del(&ip->pending_mmaps); + + spin_unlock_bh(&rxe->pending_lock); + + vfree(ip->obj); /* buf */ + kfree(ip); +} + +/* + * open and close keep track of how many times the memory region is mapped, + * to avoid releasing it. + */ +static void rxe_vma_open(struct vm_area_struct *vma) +{ + struct rxe_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void rxe_vma_close(struct vm_area_struct *vma) +{ + struct rxe_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, rxe_mmap_release); +} + +static struct vm_operations_struct rxe_vm_ops = { + .open = rxe_vma_open, + .close = rxe_vma_close, +}; + +/** + * rxe_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct rxe_dev *rxe = to_rdev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct rxe_mmap_info *ip, *pp; + int ret; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_bh(&rxe->pending_lock); + list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) { + if (context != ip->context || (__u64)offset != ip->info.offset) + continue; + + /* Don't allow a mmap larger than the object. */ + if (size > ip->info.size) { + pr_err("mmap region is larger than the object!\n"); + spin_unlock_bh(&rxe->pending_lock); + ret = -EINVAL; + goto done; + } + + goto found_it; + } + pr_warn("unable to find pending mmap info\n"); + spin_unlock_bh(&rxe->pending_lock); + ret = -EINVAL; + goto done; + +found_it: + list_del_init(&ip->pending_mmaps); + spin_unlock_bh(&rxe->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) { + pr_err("rxe: err %d from remap_vmalloc_range\n", ret); + goto done; + } + + vma->vm_ops = &rxe_vm_ops; + vma->vm_private_data = ip; + rxe_vma_open(vma); +done: + return ret; +} + +/* + * Allocate information for rxe_mmap + */ +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, + u32 size, + struct ib_ucontext *context, + void *obj) +{ + struct rxe_mmap_info *ip; + + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return NULL; + + size = PAGE_ALIGN(size); + + spin_lock_bh(&rxe->mmap_offset_lock); + + if (rxe->mmap_offset == 0) + rxe->mmap_offset = PAGE_SIZE; + + ip->info.offset = rxe->mmap_offset; + rxe->mmap_offset += size; + + spin_unlock_bh(&rxe->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->info.size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + + return ip; +} diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c new file mode 100644 index 000000000000..f3dab6574504 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +/* + * lfsr (linear feedback shift register) with period 255 + */ +static u8 rxe_get_key(void) +{ + static unsigned key = 1; + + key = key << 1; + + key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) + ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); + + key &= 0xff; + + return key; +} + +int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) +{ + switch (mem->type) { + case RXE_MEM_TYPE_DMA: + return 0; + + case RXE_MEM_TYPE_MR: + case RXE_MEM_TYPE_FMR: + return ((iova < mem->iova) || + ((iova + length) > (mem->iova + mem->length))) ? + -EFAULT : 0; + + default: + return -EFAULT; + } +} + +#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ + | IB_ACCESS_REMOTE_WRITE \ + | IB_ACCESS_REMOTE_ATOMIC) + +static void rxe_mem_init(int access, struct rxe_mem *mem) +{ + u32 lkey = mem->pelem.index << 8 | rxe_get_key(); + u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; + + if (mem->pelem.pool->type == RXE_TYPE_MR) { + mem->ibmr.lkey = lkey; + mem->ibmr.rkey = rkey; + } + + mem->lkey = lkey; + mem->rkey = rkey; + mem->state = RXE_MEM_STATE_INVALID; + mem->type = RXE_MEM_TYPE_NONE; + mem->map_shift = ilog2(RXE_BUF_PER_MAP); +} + +void rxe_mem_cleanup(void *arg) +{ + struct rxe_mem *mem = arg; + int i; + + if (mem->umem) + ib_umem_release(mem->umem); + + if (mem->map) { + for (i = 0; i < mem->num_map; i++) + kfree(mem->map[i]); + + kfree(mem->map); + } +} + +static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) +{ + int i; + int num_map; + struct rxe_map **map = mem->map; + + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; + + mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); + if (!mem->map) + goto err1; + + for (i = 0; i < num_map; i++) { + mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); + if (!mem->map[i]) + goto err2; + } + + WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP)); + + mem->map_shift = ilog2(RXE_BUF_PER_MAP); + mem->map_mask = RXE_BUF_PER_MAP - 1; + + mem->num_buf = num_buf; + mem->num_map = num_map; + mem->max_buf = num_map * RXE_BUF_PER_MAP; + + return 0; + +err2: + for (i--; i >= 0; i--) + kfree(mem->map[i]); + + kfree(mem->map); +err1: + return -ENOMEM; +} + +int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, + int access, struct rxe_mem *mem) +{ + rxe_mem_init(access, mem); + + mem->pd = pd; + mem->access = access; + mem->state = RXE_MEM_STATE_VALID; + mem->type = RXE_MEM_TYPE_DMA; + + return 0; +} + +int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, + u64 length, u64 iova, int access, struct ib_udata *udata, + struct rxe_mem *mem) +{ + int entry; + struct rxe_map **map; + struct rxe_phys_buf *buf = NULL; + struct ib_umem *umem; + struct scatterlist *sg; + int num_buf; + void *vaddr; + int err; + + umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); + if (IS_ERR(umem)) { + pr_warn("err %d from rxe_umem_get\n", + (int)PTR_ERR(umem)); + err = -EINVAL; + goto err1; + } + + mem->umem = umem; + num_buf = umem->nmap; + + rxe_mem_init(access, mem); + + err = rxe_mem_alloc(rxe, mem, num_buf); + if (err) { + pr_warn("err %d from rxe_mem_alloc\n", err); + ib_umem_release(umem); + goto err1; + } + + WARN_ON(!is_power_of_2(umem->page_size)); + + mem->page_shift = ilog2(umem->page_size); + mem->page_mask = umem->page_size - 1; + + num_buf = 0; + map = mem->map; + if (length > 0) { + buf = map[0]->buf; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + pr_warn("null vaddr\n"); + err = -ENOMEM; + goto err1; + } + + buf->addr = (uintptr_t)vaddr; + buf->size = umem->page_size; + num_buf++; + buf++; + + if (num_buf >= RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + } + } + + mem->pd = pd; + mem->umem = umem; + mem->access = access; + mem->length = length; + mem->iova = iova; + mem->va = start; + mem->offset = ib_umem_offset(umem); + mem->state = RXE_MEM_STATE_VALID; + mem->type = RXE_MEM_TYPE_MR; + + return 0; + +err1: + return err; +} + +int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, + int max_pages, struct rxe_mem *mem) +{ + int err; + + rxe_mem_init(0, mem); + + /* In fastreg, we also set the rkey */ + mem->ibmr.rkey = mem->ibmr.lkey; + + err = rxe_mem_alloc(rxe, mem, max_pages); + if (err) + goto err1; + + mem->pd = pd; + mem->max_buf = max_pages; + mem->state = RXE_MEM_STATE_FREE; + mem->type = RXE_MEM_TYPE_MR; + + return 0; + +err1: + return err; +} + +static void lookup_iova( + struct rxe_mem *mem, + u64 iova, + int *m_out, + int *n_out, + size_t *offset_out) +{ + size_t offset = iova - mem->iova + mem->offset; + int map_index; + int buf_index; + u64 length; + + if (likely(mem->page_shift)) { + *offset_out = offset & mem->page_mask; + offset >>= mem->page_shift; + *n_out = offset & mem->map_mask; + *m_out = offset >> mem->map_shift; + } else { + map_index = 0; + buf_index = 0; + + length = mem->map[map_index]->buf[buf_index].size; + + while (offset >= length) { + offset -= length; + buf_index++; + + if (buf_index == RXE_BUF_PER_MAP) { + map_index++; + buf_index = 0; + } + length = mem->map[map_index]->buf[buf_index].size; + } + + *m_out = map_index; + *n_out = buf_index; + *offset_out = offset; + } +} + +void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) +{ + size_t offset; + int m, n; + void *addr; + + if (mem->state != RXE_MEM_STATE_VALID) { + pr_warn("mem not in valid state\n"); + addr = NULL; + goto out; + } + + if (!mem->map) { + addr = (void *)(uintptr_t)iova; + goto out; + } + + if (mem_check_range(mem, iova, length)) { + pr_warn("range violation\n"); + addr = NULL; + goto out; + } + + lookup_iova(mem, iova, &m, &n, &offset); + + if (offset + length > mem->map[m]->buf[n].size) { + pr_warn("crosses page boundary\n"); + addr = NULL; + goto out; + } + + addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; + +out: + return addr; +} + +/* copy data from a range (vaddr, vaddr+length-1) to or from + * a mem object starting at iova. Compute incremental value of + * crc32 if crcp is not zero. caller must hold a reference to mem + */ +int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, + enum copy_direction dir, u32 *crcp) +{ + int err; + int bytes; + u8 *va; + struct rxe_map **map; + struct rxe_phys_buf *buf; + int m; + int i; + size_t offset; + u32 crc = crcp ? (*crcp) : 0; + + if (mem->type == RXE_MEM_TYPE_DMA) { + u8 *src, *dest; + + src = (dir == to_mem_obj) ? + addr : ((void *)(uintptr_t)iova); + + dest = (dir == to_mem_obj) ? + ((void *)(uintptr_t)iova) : addr; + + if (crcp) + *crcp = crc32_le(*crcp, src, length); + + memcpy(dest, src, length); + + return 0; + } + + WARN_ON(!mem->map); + + err = mem_check_range(mem, iova, length); + if (err) { + err = -EFAULT; + goto err1; + } + + lookup_iova(mem, iova, &m, &i, &offset); + + map = mem->map + m; + buf = map[0]->buf + i; + + while (length > 0) { + u8 *src, *dest; + + va = (u8 *)(uintptr_t)buf->addr + offset; + src = (dir == to_mem_obj) ? addr : va; + dest = (dir == to_mem_obj) ? va : addr; + + bytes = buf->size - offset; + + if (bytes > length) + bytes = length; + + if (crcp) + crc = crc32_le(crc, src, bytes); + + memcpy(dest, src, bytes); + + length -= bytes; + addr += bytes; + + offset = 0; + buf++; + i++; + + if (i == RXE_BUF_PER_MAP) { + i = 0; + map++; + buf = map[0]->buf; + } + } + + if (crcp) + *crcp = crc; + + return 0; + +err1: + return err; +} + +/* copy data in or out of a wqe, i.e. sg list + * under the control of a dma descriptor + */ +int copy_data( + struct rxe_dev *rxe, + struct rxe_pd *pd, + int access, + struct rxe_dma_info *dma, + void *addr, + int length, + enum copy_direction dir, + u32 *crcp) +{ + int bytes; + struct rxe_sge *sge = &dma->sge[dma->cur_sge]; + int offset = dma->sge_offset; + int resid = dma->resid; + struct rxe_mem *mem = NULL; + u64 iova; + int err; + + if (length == 0) + return 0; + + if (length > resid) { + err = -EINVAL; + goto err2; + } + + if (sge->length && (offset < sge->length)) { + mem = lookup_mem(pd, access, sge->lkey, lookup_local); + if (!mem) { + err = -EINVAL; + goto err1; + } + } + + while (length > 0) { + bytes = length; + + if (offset >= sge->length) { + if (mem) { + rxe_drop_ref(mem); + mem = NULL; + } + sge++; + dma->cur_sge++; + offset = 0; + + if (dma->cur_sge >= dma->num_sge) { + err = -ENOSPC; + goto err2; + } + + if (sge->length) { + mem = lookup_mem(pd, access, sge->lkey, + lookup_local); + if (!mem) { + err = -EINVAL; + goto err1; + } + } else { + continue; + } + } + + if (bytes > sge->length - offset) + bytes = sge->length - offset; + + if (bytes > 0) { + iova = sge->addr + offset; + + err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); + if (err) + goto err2; + + offset += bytes; + resid -= bytes; + length -= bytes; + addr += bytes; + } + } + + dma->sge_offset = offset; + dma->resid = resid; + + if (mem) + rxe_drop_ref(mem); + + return 0; + +err2: + if (mem) + rxe_drop_ref(mem); +err1: + return err; +} + +int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) +{ + struct rxe_sge *sge = &dma->sge[dma->cur_sge]; + int offset = dma->sge_offset; + int resid = dma->resid; + + while (length) { + unsigned int bytes; + + if (offset >= sge->length) { + sge++; + dma->cur_sge++; + offset = 0; + if (dma->cur_sge >= dma->num_sge) + return -ENOSPC; + } + + bytes = length; + + if (bytes > sge->length - offset) + bytes = sge->length - offset; + + offset += bytes; + resid -= bytes; + length -= bytes; + } + + dma->sge_offset = offset; + dma->resid = resid; + + return 0; +} + +/* (1) find the mem (mr or mw) corresponding to lkey/rkey + * depending on lookup_type + * (2) verify that the (qp) pd matches the mem pd + * (3) verify that the mem can support the requested access + * (4) verify that mem state is valid + */ +struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, + enum lookup_type type) +{ + struct rxe_mem *mem; + struct rxe_dev *rxe = to_rdev(pd->ibpd.device); + int index = key >> 8; + + if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { + mem = rxe_pool_get_index(&rxe->mr_pool, index); + if (!mem) + goto err1; + } else { + goto err1; + } + + if ((type == lookup_local && mem->lkey != key) || + (type == lookup_remote && mem->rkey != key)) + goto err2; + + if (mem->pd != pd) + goto err2; + + if (access && !(access & mem->access)) + goto err2; + + if (mem->state != RXE_MEM_STATE_VALID) + goto err2; + + return mem; + +err2: + rxe_drop_ref(mem); +err1: + return NULL; +} + +int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, + u64 *page, int num_pages, u64 iova) +{ + int i; + int num_buf; + int err; + struct rxe_map **map; + struct rxe_phys_buf *buf; + int page_size; + + if (num_pages > mem->max_buf) { + err = -EINVAL; + goto err1; + } + + num_buf = 0; + page_size = 1 << mem->page_shift; + map = mem->map; + buf = map[0]->buf; + + for (i = 0; i < num_pages; i++) { + buf->addr = *page++; + buf->size = page_size; + buf++; + num_buf++; + + if (num_buf == RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + } + + mem->iova = iova; + mem->va = iova; + mem->length = num_pages << mem->page_shift; + mem->state = RXE_MEM_STATE_VALID; + + return 0; + +err1: + return err; +} diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c new file mode 100644 index 000000000000..0b8d2ea8b41d --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/netdevice.h> +#include <linux/if.h> +#include <linux/if_vlan.h> +#include <net/udp_tunnel.h> +#include <net/sch_generic.h> +#include <linux/netfilter.h> +#include <rdma/ib_addr.h> + +#include "rxe.h" +#include "rxe_net.h" +#include "rxe_loc.h" + +static LIST_HEAD(rxe_dev_list); +static spinlock_t dev_list_lock; /* spinlock for device list */ + +struct rxe_dev *net_to_rxe(struct net_device *ndev) +{ + struct rxe_dev *rxe; + struct rxe_dev *found = NULL; + + spin_lock_bh(&dev_list_lock); + list_for_each_entry(rxe, &rxe_dev_list, list) { + if (rxe->ndev == ndev) { + found = rxe; + break; + } + } + spin_unlock_bh(&dev_list_lock); + + return found; +} + +struct rxe_dev *get_rxe_by_name(const char* name) +{ + struct rxe_dev *rxe; + struct rxe_dev *found = NULL; + + spin_lock_bh(&dev_list_lock); + list_for_each_entry(rxe, &rxe_dev_list, list) { + if (!strcmp(name, rxe->ib_dev.name)) { + found = rxe; + break; + } + } + spin_unlock_bh(&dev_list_lock); + return found; +} + + +struct rxe_recv_sockets recv_sockets; + +static __be64 rxe_mac_to_eui64(struct net_device *ndev) +{ + unsigned char *mac_addr = ndev->dev_addr; + __be64 eui64; + unsigned char *dst = (unsigned char *)&eui64; + + dst[0] = mac_addr[0] ^ 2; + dst[1] = mac_addr[1]; + dst[2] = mac_addr[2]; + dst[3] = 0xff; + dst[4] = 0xfe; + dst[5] = mac_addr[3]; + dst[6] = mac_addr[4]; + dst[7] = mac_addr[5]; + + return eui64; +} + +static __be64 node_guid(struct rxe_dev *rxe) +{ + return rxe_mac_to_eui64(rxe->ndev); +} + +static __be64 port_guid(struct rxe_dev *rxe) +{ + return rxe_mac_to_eui64(rxe->ndev); +} + +static struct device *dma_device(struct rxe_dev *rxe) +{ + struct net_device *ndev; + + ndev = rxe->ndev; + + if (ndev->priv_flags & IFF_802_1Q_VLAN) + ndev = vlan_dev_real_dev(ndev); + + return ndev->dev.parent; +} + +static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) +{ + int err; + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_add(rxe->ndev, ll_addr); + + return err; +} + +static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) +{ + int err; + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_del(rxe->ndev, ll_addr); + + return err; +} + +static struct dst_entry *rxe_find_route4(struct net_device *ndev, + struct in_addr *saddr, + struct in_addr *daddr) +{ + struct rtable *rt; + struct flowi4 fl = { { 0 } }; + + memset(&fl, 0, sizeof(fl)); + fl.flowi4_oif = ndev->ifindex; + memcpy(&fl.saddr, saddr, sizeof(*saddr)); + memcpy(&fl.daddr, daddr, sizeof(*daddr)); + fl.flowi4_proto = IPPROTO_UDP; + + rt = ip_route_output_key(&init_net, &fl); + if (IS_ERR(rt)) { + pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); + return NULL; + } + + return &rt->dst; +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *rxe_find_route6(struct net_device *ndev, + struct in6_addr *saddr, + struct in6_addr *daddr) +{ + struct dst_entry *ndst; + struct flowi6 fl6 = { { 0 } }; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = ndev->ifindex; + memcpy(&fl6.saddr, saddr, sizeof(*saddr)); + memcpy(&fl6.daddr, daddr, sizeof(*daddr)); + fl6.flowi6_proto = IPPROTO_UDP; + + if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk), + recv_sockets.sk6->sk, &ndst, &fl6))) { + pr_err_ratelimited("no route to %pI6\n", daddr); + goto put; + } + + if (unlikely(ndst->error)) { + pr_err("no route to %pI6\n", daddr); + goto put; + } + + return ndst; +put: + dst_release(ndst); + return NULL; +} + +#else + +static struct dst_entry *rxe_find_route6(struct net_device *ndev, + struct in6_addr *saddr, + struct in6_addr *daddr) +{ + return NULL; +} + +#endif + +static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udphdr *udph; + struct net_device *ndev = skb->dev; + struct rxe_dev *rxe = net_to_rxe(ndev); + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + if (!rxe) + goto drop; + + if (skb_linearize(skb)) { + pr_err("skb_linearize failed\n"); + goto drop; + } + + udph = udp_hdr(skb); + pkt->rxe = rxe; + pkt->port_num = 1; + pkt->hdr = (u8 *)(udph + 1); + pkt->mask = RXE_GRH_MASK; + pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); + + return rxe_rcv(skb); +drop: + kfree_skb(skb); + return 0; +} + +static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, + bool ipv6) +{ + int err; + struct socket *sock; + struct udp_port_cfg udp_cfg; + struct udp_tunnel_sock_cfg tnl_cfg; + + memset(&udp_cfg, 0, sizeof(udp_cfg)); + + if (ipv6) { + udp_cfg.family = AF_INET6; + udp_cfg.ipv6_v6only = 1; + } else { + udp_cfg.family = AF_INET; + } + + udp_cfg.local_udp_port = port; + + /* Create UDP socket */ + err = udp_sock_create(net, &udp_cfg, &sock); + if (err < 0) { + pr_err("failed to create udp socket. err = %d\n", err); + return ERR_PTR(err); + } + + tnl_cfg.sk_user_data = NULL; + tnl_cfg.encap_type = 1; + tnl_cfg.encap_rcv = rxe_udp_encap_recv; + tnl_cfg.encap_destroy = NULL; + + /* Setup UDP tunnel */ + setup_udp_tunnel_sock(net, sock, &tnl_cfg); + + return sock; +} + +static void rxe_release_udp_tunnel(struct socket *sk) +{ + udp_tunnel_sock_release(sk); +} + +static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, + __be16 dst_port) +{ + struct udphdr *udph; + + __skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + + udph->dest = dst_port; + udph->source = src_port; + udph->len = htons(skb->len); + udph->check = 0; +} + +static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, + __be32 saddr, __be32 daddr, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) +{ + struct iphdr *iph; + + skb_scrub_packet(skb, xnet); + + skb_clear_hash(skb); + skb_dst_set(skb, dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = IPVERSION; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = daddr; + iph->saddr = saddr; + iph->ttl = ttl; + __ip_select_ident(dev_net(dst->dev), iph, + skb_shinfo(skb)->gso_segs ?: 1); + iph->tot_len = htons(skb->len); + ip_send_check(iph); +} + +static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 proto, __u8 prio, __u8 ttl) +{ + struct ipv6hdr *ip6h; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED + | IPSKB_REROUTED); + skb_dst_set(skb, dst); + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = proto; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); +} + +static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +{ + struct dst_entry *dst; + bool xnet = false; + __be16 df = htons(IP_DF); + struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; + struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + dst = rxe_find_route4(rxe->ndev, saddr, daddr); + if (!dst) { + pr_err("Host not reachable\n"); + return -EHOSTUNREACH; + } + + if (!memcmp(saddr, daddr, sizeof(*daddr))) + pkt->mask |= RXE_LOOPBACK_MASK; + + prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), + htons(ROCE_V2_UDP_DPORT)); + + prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, + av->grh.traffic_class, av->grh.hop_limit, df, xnet); + return 0; +} + +static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +{ + struct dst_entry *dst; + struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; + struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + dst = rxe_find_route6(rxe->ndev, saddr, daddr); + if (!dst) { + pr_err("Host not reachable\n"); + return -EHOSTUNREACH; + } + + if (!memcmp(saddr, daddr, sizeof(*daddr))) + pkt->mask |= RXE_LOOPBACK_MASK; + + prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), + htons(ROCE_V2_UDP_DPORT)); + + prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, + av->grh.traffic_class, + av->grh.hop_limit); + return 0; +} + +static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 *crc) +{ + int err = 0; + struct rxe_av *av = rxe_get_av(pkt); + + if (av->network_type == RDMA_NETWORK_IPV4) + err = prepare4(rxe, skb, av); + else if (av->network_type == RDMA_NETWORK_IPV6) + err = prepare6(rxe, skb, av); + + *crc = rxe_icrc_hdr(pkt, skb); + + return err; +} + +static void rxe_skb_tx_dtor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct rxe_qp *qp = sk->sk_user_data; + int skb_out = atomic_dec_return(&qp->skb_out); + + if (unlikely(qp->need_req_skb && + skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) + rxe_run_task(&qp->req.task, 1); +} + +static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb) +{ + struct sk_buff *nskb; + struct rxe_av *av; + int err; + + av = rxe_get_av(pkt); + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + nskb->destructor = rxe_skb_tx_dtor; + nskb->sk = pkt->qp->sk->sk; + + if (av->network_type == RDMA_NETWORK_IPV4) { + err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); + } else if (av->network_type == RDMA_NETWORK_IPV6) { + err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); + } else { + pr_err("Unknown layer 3 protocol: %d\n", av->network_type); + kfree_skb(nskb); + return -EINVAL; + } + + if (unlikely(net_xmit_eval(err))) { + pr_debug("error sending packet: %d\n", err); + return -EAGAIN; + } + + kfree_skb(skb); + + return 0; +} + +static int loopback(struct sk_buff *skb) +{ + return rxe_rcv(skb); +} + +static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av) +{ + return rxe->port.port_guid == av->grh.dgid.global.interface_id; +} + +static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av, + int paylen, struct rxe_pkt_info *pkt) +{ + unsigned int hdr_len; + struct sk_buff *skb; + + if (av->network_type == RDMA_NETWORK_IPV4) + hdr_len = ETH_HLEN + sizeof(struct udphdr) + + sizeof(struct iphdr); + else + hdr_len = ETH_HLEN + sizeof(struct udphdr) + + sizeof(struct ipv6hdr); + + skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev), + GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); + + skb->dev = rxe->ndev; + if (av->network_type == RDMA_NETWORK_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + pkt->rxe = rxe; + pkt->port_num = 1; + pkt->hdr = skb_put(skb, paylen); + pkt->mask |= RXE_GRH_MASK; + + memset(pkt->hdr, 0, paylen); + + return skb; +} + +/* + * this is required by rxe_cfg to match rxe devices in + * /sys/class/infiniband up with their underlying ethernet devices + */ +static char *parent_name(struct rxe_dev *rxe, unsigned int port_num) +{ + return rxe->ndev->name; +} + +static enum rdma_link_layer link_layer(struct rxe_dev *rxe, + unsigned int port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static struct rxe_ifc_ops ifc_ops = { + .node_guid = node_guid, + .port_guid = port_guid, + .dma_device = dma_device, + .mcast_add = mcast_add, + .mcast_delete = mcast_delete, + .prepare = prepare, + .send = send, + .loopback = loopback, + .init_packet = init_packet, + .parent_name = parent_name, + .link_layer = link_layer, +}; + +struct rxe_dev *rxe_net_add(struct net_device *ndev) +{ + int err; + struct rxe_dev *rxe = NULL; + + rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); + if (!rxe) + return NULL; + + rxe->ifc_ops = &ifc_ops; + rxe->ndev = ndev; + + err = rxe_add(rxe, ndev->mtu); + if (err) { + ib_dealloc_device(&rxe->ib_dev); + return NULL; + } + + spin_lock_bh(&dev_list_lock); + list_add_tail(&rxe_dev_list, &rxe->list); + spin_unlock_bh(&dev_list_lock); + return rxe; +} + +void rxe_remove_all(void) +{ + spin_lock_bh(&dev_list_lock); + while (!list_empty(&rxe_dev_list)) { + struct rxe_dev *rxe = + list_first_entry(&rxe_dev_list, struct rxe_dev, list); + + list_del(&rxe->list); + spin_unlock_bh(&dev_list_lock); + rxe_remove(rxe); + spin_lock_bh(&dev_list_lock); + } + spin_unlock_bh(&dev_list_lock); +} +EXPORT_SYMBOL(rxe_remove_all); + +static void rxe_port_event(struct rxe_dev *rxe, + enum ib_event_type event) +{ + struct ib_event ev; + + ev.device = &rxe->ib_dev; + ev.element.port_num = 1; + ev.event = event; + + ib_dispatch_event(&ev); +} + +/* Caller must hold net_info_lock */ +void rxe_port_up(struct rxe_dev *rxe) +{ + struct rxe_port *port; + + port = &rxe->port; + port->attr.state = IB_PORT_ACTIVE; + port->attr.phys_state = IB_PHYS_STATE_LINK_UP; + + rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); + pr_info("rxe: set %s active\n", rxe->ib_dev.name); + return; +} + +/* Caller must hold net_info_lock */ +void rxe_port_down(struct rxe_dev *rxe) +{ + struct rxe_port *port; + + port = &rxe->port; + port->attr.state = IB_PORT_DOWN; + port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; + + rxe_port_event(rxe, IB_EVENT_PORT_ERR); + pr_info("rxe: set %s down\n", rxe->ib_dev.name); + return; +} + +static int rxe_notify(struct notifier_block *not_blk, + unsigned long event, + void *arg) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(arg); + struct rxe_dev *rxe = net_to_rxe(ndev); + + if (!rxe) + goto out; + + switch (event) { + case NETDEV_UNREGISTER: + list_del(&rxe->list); + rxe_remove(rxe); + break; + case NETDEV_UP: + rxe_port_up(rxe); + break; + case NETDEV_DOWN: + rxe_port_down(rxe); + break; + case NETDEV_CHANGEMTU: + pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); + rxe_set_mtu(rxe, ndev->mtu); + break; + case NETDEV_REBOOT: + case NETDEV_CHANGE: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + default: + pr_info("rxe: ignoring netdev event = %ld for %s\n", + event, ndev->name); + break; + } +out: + return NOTIFY_OK; +} + +static struct notifier_block rxe_net_notifier = { + .notifier_call = rxe_notify, +}; + +int rxe_net_init(void) +{ + int err; + + spin_lock_init(&dev_list_lock); + + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); + return -1; + } + + recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), false); + if (IS_ERR(recv_sockets.sk4)) { + rxe_release_udp_tunnel(recv_sockets.sk6); + recv_sockets.sk4 = NULL; + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); + return -1; + } + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); + pr_err("rxe: Failed to rigister netdev notifier\n"); + } + + return err; +} + +void rxe_net_exit(void) +{ + if (recv_sockets.sk6) + rxe_release_udp_tunnel(recv_sockets.sk6); + + if (recv_sockets.sk4) + rxe_release_udp_tunnel(recv_sockets.sk4); + + unregister_netdevice_notifier(&rxe_net_notifier); +} diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h new file mode 100644 index 000000000000..7b06f76d16cc --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_NET_H +#define RXE_NET_H + +#include <net/sock.h> +#include <net/if_inet6.h> +#include <linux/module.h> + +struct rxe_recv_sockets { + struct socket *sk4; + struct socket *sk6; +}; + +extern struct rxe_recv_sockets recv_sockets; + +struct rxe_dev *rxe_net_add(struct net_device *ndev); + +int rxe_net_init(void); +void rxe_net_exit(void); + +#endif /* RXE_NET_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c new file mode 100644 index 000000000000..61927c165b59 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -0,0 +1,961 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_pack.h> +#include "rxe_opcode.h" +#include "rxe_hdr.h" + +/* useful information about work request opcodes and pkt opcodes in + * table form + */ +struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { + [IB_WR_RDMA_WRITE] = { + .name = "IB_WR_RDMA_WRITE", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, + }, + }, + [IB_WR_RDMA_WRITE_WITH_IMM] = { + .name = "IB_WR_RDMA_WRITE_WITH_IMM", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, + }, + }, + [IB_WR_SEND] = { + .name = "IB_WR_SEND", + .mask = { + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_SEND_WITH_IMM] = { + .name = "IB_WR_SEND_WITH_IMM", + .mask = { + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_RDMA_READ] = { + .name = "IB_WR_RDMA_READ", + .mask = { + [IB_QPT_RC] = WR_READ_MASK, + }, + }, + [IB_WR_ATOMIC_CMP_AND_SWP] = { + .name = "IB_WR_ATOMIC_CMP_AND_SWP", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_MASK, + }, + }, + [IB_WR_ATOMIC_FETCH_AND_ADD] = { + .name = "IB_WR_ATOMIC_FETCH_AND_ADD", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_MASK, + }, + }, + [IB_WR_LSO] = { + .name = "IB_WR_LSO", + .mask = { + /* not supported */ + }, + }, + [IB_WR_SEND_WITH_INV] = { + .name = "IB_WR_SEND_WITH_INV", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_RDMA_READ_WITH_INV] = { + .name = "IB_WR_RDMA_READ_WITH_INV", + .mask = { + [IB_QPT_RC] = WR_READ_MASK, + }, + }, + [IB_WR_LOCAL_INV] = { + .name = "IB_WR_LOCAL_INV", + .mask = { + [IB_QPT_RC] = WR_REG_MASK, + }, + }, + [IB_WR_REG_MR] = { + .name = "IB_WR_REG_MR", + .mask = { + [IB_QPT_RC] = WR_REG_MASK, + }, + }, +}; + +struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { + [IB_OPCODE_RC_SEND_FIRST] = { + .name = "IB_OPCODE_RC_SEND_FIRST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_MIDDLE] = { + .name = "IB_OPCODE_RC_SEND_MIDDLE]", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST] = { + .name = "IB_OPCODE_RC_SEND_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY] = { + .name = "IB_OPCODE_RC_SEND_ONLY", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_REQUEST] = { + .name = "IB_OPCODE_RC_RDMA_READ_REQUEST", + .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST", + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST", + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY", + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RC_ACKNOWLEDGE", + .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE", + .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMACK_BYTES + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_COMPARE_SWAP] = { + .name = "IB_OPCODE_RC_COMPARE_SWAP", + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_ATMETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, + } + }, + [IB_OPCODE_RC_FETCH_ADD] = { + .name = "IB_OPCODE_RC_FETCH_ADD", + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_ATMETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = { + .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE", + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = { + .name = "IB_OPCODE_RC_SEND_ONLY_INV", + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, + } + }, + + /* UC */ + [IB_OPCODE_UC_SEND_FIRST] = { + .name = "IB_OPCODE_UC_SEND_FIRST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_MIDDLE] = { + .name = "IB_OPCODE_UC_SEND_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_LAST] = { + .name = "IB_OPCODE_UC_SEND_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_SEND_ONLY] = { + .name = "IB_OPCODE_UC_SEND_ONLY", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + + /* RD */ + [IB_OPCODE_RD_SEND_FIRST] = { + .name = "IB_OPCODE_RD_SEND_FIRST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_MIDDLE] = { + .name = "IB_OPCODE_RD_SEND_MIDDLE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_SEND_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_LAST] = { + .name = "IB_OPCODE_RD_SEND_LAST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_SEND_ONLY] = { + .name = "IB_OPCODE_RD_SEND_ONLY", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES + + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_REQUEST] = { + .name = "IB_OPCODE_RD_RDMA_READ_REQUEST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_REQ_MASK | RXE_READ_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK + | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE", + .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK + | RXE_ACK_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK + | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RD_ACKNOWLEDGE", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK + | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_COMPARE_SWAP] = { + .name = "RD_COMPARE_SWAP", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK + | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_FETCH_ADD] = { + .name = "IB_OPCODE_RD_FETCH_ADD", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK + | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + + RXE_RDETH_BYTES, + } + }, + + /* UD */ + [IB_OPCODE_UD_SEND_ONLY] = { + .name = "IB_OPCODE_UD_SEND_ONLY", + .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_DETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_DETH] = RXE_BTH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + +}; diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h new file mode 100644 index 000000000000..307604e9c78d --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_OPCODE_H +#define RXE_OPCODE_H + +/* + * contains header bit mask definitions and header lengths + * declaration of the rxe_opcode_info struct and + * rxe_wr_opcode_info struct + */ + +enum rxe_wr_mask { + WR_INLINE_MASK = BIT(0), + WR_ATOMIC_MASK = BIT(1), + WR_SEND_MASK = BIT(2), + WR_READ_MASK = BIT(3), + WR_WRITE_MASK = BIT(4), + WR_LOCAL_MASK = BIT(5), + WR_REG_MASK = BIT(6), + + WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, + WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, + WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, + WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, +}; + +#define WR_MAX_QPT (8) + +struct rxe_wr_opcode_info { + char *name; + enum rxe_wr_mask mask[WR_MAX_QPT]; +}; + +extern struct rxe_wr_opcode_info rxe_wr_opcode_info[]; + +enum rxe_hdr_type { + RXE_LRH, + RXE_GRH, + RXE_BTH, + RXE_RETH, + RXE_AETH, + RXE_ATMETH, + RXE_ATMACK, + RXE_IETH, + RXE_RDETH, + RXE_DETH, + RXE_IMMDT, + RXE_PAYLOAD, + NUM_HDR_TYPES +}; + +enum rxe_hdr_mask { + RXE_LRH_MASK = BIT(RXE_LRH), + RXE_GRH_MASK = BIT(RXE_GRH), + RXE_BTH_MASK = BIT(RXE_BTH), + RXE_IMMDT_MASK = BIT(RXE_IMMDT), + RXE_RETH_MASK = BIT(RXE_RETH), + RXE_AETH_MASK = BIT(RXE_AETH), + RXE_ATMETH_MASK = BIT(RXE_ATMETH), + RXE_ATMACK_MASK = BIT(RXE_ATMACK), + RXE_IETH_MASK = BIT(RXE_IETH), + RXE_RDETH_MASK = BIT(RXE_RDETH), + RXE_DETH_MASK = BIT(RXE_DETH), + RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD), + + RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0), + RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1), + RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2), + RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), + RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4), + RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), + + RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6), + RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7), + + RXE_START_MASK = BIT(NUM_HDR_TYPES + 8), + RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), + RXE_END_MASK = BIT(NUM_HDR_TYPES + 10), + + RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), + + RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), + RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), +}; + +#define OPCODE_NONE (-1) +#define RXE_NUM_OPCODE 256 + +struct rxe_opcode_info { + char *name; + enum rxe_hdr_mask mask; + int length; + int offset[NUM_HDR_TYPES]; +}; + +extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE]; + +#endif /* RXE_OPCODE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h new file mode 100644 index 000000000000..f459c43a77c8 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_PARAM_H +#define RXE_PARAM_H + +static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) +{ + if (mtu < 256) + return 0; + else if (mtu < 512) + return IB_MTU_256; + else if (mtu < 1024) + return IB_MTU_512; + else if (mtu < 2048) + return IB_MTU_1024; + else if (mtu < 4096) + return IB_MTU_2048; + else + return IB_MTU_4096; +} + +/* Find the IB mtu for a given network MTU. */ +static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) +{ + mtu -= RXE_MAX_HDR_LENGTH; + + return rxe_mtu_int_to_enum(mtu); +} + +/* default/initial rxe device parameter settings */ +enum rxe_device_param { + RXE_FW_VER = 0, + RXE_MAX_MR_SIZE = -1ull, + RXE_PAGE_SIZE_CAP = 0xfffff000, + RXE_VENDOR_ID = 0, + RXE_VENDOR_PART_ID = 0, + RXE_HW_VER = 0, + RXE_MAX_QP = 0x10000, + RXE_MAX_QP_WR = 0x4000, + RXE_MAX_INLINE_DATA = 400, + RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR + | IB_DEVICE_BAD_QKEY_CNTR + | IB_DEVICE_AUTO_PATH_MIG + | IB_DEVICE_CHANGE_PHY_PORT + | IB_DEVICE_UD_AV_PORT_ENFORCE + | IB_DEVICE_PORT_ACTIVE_EVENT + | IB_DEVICE_SYS_IMAGE_GUID + | IB_DEVICE_RC_RNR_NAK_GEN + | IB_DEVICE_SRQ_RESIZE + | IB_DEVICE_MEM_MGT_EXTENSIONS, + RXE_MAX_SGE = 32, + RXE_MAX_SGE_RD = 32, + RXE_MAX_CQ = 16384, + RXE_MAX_LOG_CQE = 13, + RXE_MAX_MR = 2 * 1024, + RXE_MAX_PD = 0x7ffc, + RXE_MAX_QP_RD_ATOM = 128, + RXE_MAX_EE_RD_ATOM = 0, + RXE_MAX_RES_RD_ATOM = 0x3f000, + RXE_MAX_QP_INIT_RD_ATOM = 128, + RXE_MAX_EE_INIT_RD_ATOM = 0, + RXE_ATOMIC_CAP = 1, + RXE_MAX_EE = 0, + RXE_MAX_RDD = 0, + RXE_MAX_MW = 0, + RXE_MAX_RAW_IPV6_QP = 0, + RXE_MAX_RAW_ETHY_QP = 0, + RXE_MAX_MCAST_GRP = 8192, + RXE_MAX_MCAST_QP_ATTACH = 56, + RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, + RXE_MAX_AH = 100, + RXE_MAX_FMR = 0, + RXE_MAX_MAP_PER_FMR = 0, + RXE_MAX_SRQ = 960, + RXE_MAX_SRQ_WR = 0x4000, + RXE_MIN_SRQ_WR = 1, + RXE_MAX_SRQ_SGE = 27, + RXE_MIN_SRQ_SGE = 1, + RXE_MAX_FMR_PAGE_LIST_LEN = 512, + RXE_MAX_PKEYS = 64, + RXE_LOCAL_CA_ACK_DELAY = 15, + + RXE_MAX_UCONTEXT = 512, + + RXE_NUM_PORT = 1, + RXE_NUM_COMP_VECTORS = 1, + + RXE_MIN_QP_INDEX = 16, + RXE_MAX_QP_INDEX = 0x00020000, + + RXE_MIN_SRQ_INDEX = 0x00020001, + RXE_MAX_SRQ_INDEX = 0x00040000, + + RXE_MIN_MR_INDEX = 0x00000001, + RXE_MAX_MR_INDEX = 0x00040000, + RXE_MIN_MW_INDEX = 0x00040001, + RXE_MAX_MW_INDEX = 0x00060000, + RXE_MAX_PKT_PER_ACK = 64, + + RXE_MAX_UNACKED_PSNS = 128, + + /* Max inflight SKBs per queue pair */ + RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, + RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, + + /* Delay before calling arbiter timer */ + RXE_NSEC_ARB_TIMER_DELAY = 200, +}; + +/* default/initial rxe port parameters */ +enum rxe_port_param { + RXE_PORT_STATE = IB_PORT_DOWN, + RXE_PORT_MAX_MTU = IB_MTU_4096, + RXE_PORT_ACTIVE_MTU = IB_MTU_256, + RXE_PORT_GID_TBL_LEN = 1024, + RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_MAX_MSG_SZ = 0x800000, + RXE_PORT_BAD_PKEY_CNTR = 0, + RXE_PORT_QKEY_VIOL_CNTR = 0, + RXE_PORT_LID = 0, + RXE_PORT_SM_LID = 0, + RXE_PORT_SM_SL = 0, + RXE_PORT_LMC = 0, + RXE_PORT_MAX_VL_NUM = 1, + RXE_PORT_SUBNET_TIMEOUT = 0, + RXE_PORT_INIT_TYPE_REPLY = 0, + RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, + RXE_PORT_ACTIVE_SPEED = 1, + RXE_PORT_PKEY_TBL_LEN = 64, + RXE_PORT_PHYS_STATE = 2, + RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, +}; + +/* default/initial port info parameters */ +enum rxe_port_info_param { + RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */ + RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */ + RXE_PORT_INFO_OPER_VL = 1, /* 1 */ +}; + +#endif /* RXE_PARAM_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c new file mode 100644 index 000000000000..6bac0717c540 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -0,0 +1,502 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +/* info about object pools + * note that mr and mw share a single index space + * so that one can map an lkey to the correct type of object + */ +struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { + [RXE_TYPE_UC] = { + .name = "rxe-uc", + .size = sizeof(struct rxe_ucontext), + }, + [RXE_TYPE_PD] = { + .name = "rxe-pd", + .size = sizeof(struct rxe_pd), + }, + [RXE_TYPE_AH] = { + .name = "rxe-ah", + .size = sizeof(struct rxe_ah), + .flags = RXE_POOL_ATOMIC, + }, + [RXE_TYPE_SRQ] = { + .name = "rxe-srq", + .size = sizeof(struct rxe_srq), + .flags = RXE_POOL_INDEX, + .min_index = RXE_MIN_SRQ_INDEX, + .max_index = RXE_MAX_SRQ_INDEX, + }, + [RXE_TYPE_QP] = { + .name = "rxe-qp", + .size = sizeof(struct rxe_qp), + .cleanup = rxe_qp_cleanup, + .flags = RXE_POOL_INDEX, + .min_index = RXE_MIN_QP_INDEX, + .max_index = RXE_MAX_QP_INDEX, + }, + [RXE_TYPE_CQ] = { + .name = "rxe-cq", + .size = sizeof(struct rxe_cq), + .cleanup = rxe_cq_cleanup, + }, + [RXE_TYPE_MR] = { + .name = "rxe-mr", + .size = sizeof(struct rxe_mem), + .cleanup = rxe_mem_cleanup, + .flags = RXE_POOL_INDEX, + .max_index = RXE_MAX_MR_INDEX, + .min_index = RXE_MIN_MR_INDEX, + }, + [RXE_TYPE_MW] = { + .name = "rxe-mw", + .size = sizeof(struct rxe_mem), + .flags = RXE_POOL_INDEX, + .max_index = RXE_MAX_MW_INDEX, + .min_index = RXE_MIN_MW_INDEX, + }, + [RXE_TYPE_MC_GRP] = { + .name = "rxe-mc_grp", + .size = sizeof(struct rxe_mc_grp), + .cleanup = rxe_mc_cleanup, + .flags = RXE_POOL_KEY, + .key_offset = offsetof(struct rxe_mc_grp, mgid), + .key_size = sizeof(union ib_gid), + }, + [RXE_TYPE_MC_ELEM] = { + .name = "rxe-mc_elem", + .size = sizeof(struct rxe_mc_elem), + .flags = RXE_POOL_ATOMIC, + }, +}; + +static inline char *pool_name(struct rxe_pool *pool) +{ + return rxe_type_info[pool->type].name; +} + +static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) +{ + return rxe_type_info[pool->type].cache; +} + +static inline enum rxe_elem_type rxe_type(void *arg) +{ + struct rxe_pool_entry *elem = arg; + + return elem->pool->type; +} + +int rxe_cache_init(void) +{ + int err; + int i; + size_t size; + struct rxe_type_info *type; + + for (i = 0; i < RXE_NUM_TYPES; i++) { + type = &rxe_type_info[i]; + size = ALIGN(type->size, RXE_POOL_ALIGN); + type->cache = kmem_cache_create(type->name, size, + RXE_POOL_ALIGN, + RXE_POOL_CACHE_FLAGS, NULL); + if (!type->cache) { + pr_err("Unable to init kmem cache for %s\n", + type->name); + err = -ENOMEM; + goto err1; + } + } + + return 0; + +err1: + while (--i >= 0) { + kmem_cache_destroy(type->cache); + type->cache = NULL; + } + + return err; +} + +void rxe_cache_exit(void) +{ + int i; + struct rxe_type_info *type; + + for (i = 0; i < RXE_NUM_TYPES; i++) { + type = &rxe_type_info[i]; + kmem_cache_destroy(type->cache); + type->cache = NULL; + } +} + +static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) +{ + int err = 0; + size_t size; + + if ((max - min + 1) < pool->max_elem) { + pr_warn("not enough indices for max_elem\n"); + err = -EINVAL; + goto out; + } + + pool->max_index = max; + pool->min_index = min; + + size = BITS_TO_LONGS(max - min + 1) * sizeof(long); + pool->table = kmalloc(size, GFP_KERNEL); + if (!pool->table) { + pr_warn("no memory for bit table\n"); + err = -ENOMEM; + goto out; + } + + pool->table_size = size; + bitmap_zero(pool->table, max - min + 1); + +out: + return err; +} + +int rxe_pool_init( + struct rxe_dev *rxe, + struct rxe_pool *pool, + enum rxe_elem_type type, + unsigned max_elem) +{ + int err = 0; + size_t size = rxe_type_info[type].size; + + memset(pool, 0, sizeof(*pool)); + + pool->rxe = rxe; + pool->type = type; + pool->max_elem = max_elem; + pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); + pool->flags = rxe_type_info[type].flags; + pool->tree = RB_ROOT; + pool->cleanup = rxe_type_info[type].cleanup; + + atomic_set(&pool->num_elem, 0); + + kref_init(&pool->ref_cnt); + + spin_lock_init(&pool->pool_lock); + + if (rxe_type_info[type].flags & RXE_POOL_INDEX) { + err = rxe_pool_init_index(pool, + rxe_type_info[type].max_index, + rxe_type_info[type].min_index); + if (err) + goto out; + } + + if (rxe_type_info[type].flags & RXE_POOL_KEY) { + pool->key_offset = rxe_type_info[type].key_offset; + pool->key_size = rxe_type_info[type].key_size; + } + + pool->state = rxe_pool_valid; + +out: + return err; +} + +static void rxe_pool_release(struct kref *kref) +{ + struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); + + pool->state = rxe_pool_invalid; + kfree(pool->table); +} + +static void rxe_pool_put(struct rxe_pool *pool) +{ + kref_put(&pool->ref_cnt, rxe_pool_release); +} + +int rxe_pool_cleanup(struct rxe_pool *pool) +{ + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + pool->state = rxe_pool_invalid; + if (atomic_read(&pool->num_elem) > 0) + pr_warn("%s pool destroyed with unfree'd elem\n", + pool_name(pool)); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + rxe_pool_put(pool); + + return 0; +} + +static u32 alloc_index(struct rxe_pool *pool) +{ + u32 index; + u32 range = pool->max_index - pool->min_index + 1; + + index = find_next_zero_bit(pool->table, range, pool->last); + if (index >= range) + index = find_first_zero_bit(pool->table, range); + + set_bit(index, pool->table); + pool->last = index; + return index + pool->min_index; +} + +static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) +{ + struct rb_node **link = &pool->tree.rb_node; + struct rb_node *parent = NULL; + struct rxe_pool_entry *elem; + + while (*link) { + parent = *link; + elem = rb_entry(parent, struct rxe_pool_entry, node); + + if (elem->index == new->index) { + pr_warn("element already exists!\n"); + goto out; + } + + if (elem->index > new->index) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &pool->tree); +out: + return; +} + +static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) +{ + struct rb_node **link = &pool->tree.rb_node; + struct rb_node *parent = NULL; + struct rxe_pool_entry *elem; + int cmp; + + while (*link) { + parent = *link; + elem = rb_entry(parent, struct rxe_pool_entry, node); + + cmp = memcmp((u8 *)elem + pool->key_offset, + (u8 *)new + pool->key_offset, pool->key_size); + + if (cmp == 0) { + pr_warn("key already exists!\n"); + goto out; + } + + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &pool->tree); +out: + return; +} + +void rxe_add_key(void *arg, void *key) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); + insert_key(pool, elem); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void rxe_drop_key(void *arg) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + rb_erase(&elem->node, &pool->tree); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void rxe_add_index(void *arg) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + elem->index = alloc_index(pool); + insert_index(pool, elem); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void rxe_drop_index(void *arg) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + clear_bit(elem->index - pool->min_index, pool->table); + rb_erase(&elem->node, &pool->tree); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void *rxe_alloc(struct rxe_pool *pool) +{ + struct rxe_pool_entry *elem; + unsigned long flags; + + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + + spin_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != rxe_pool_valid) { + spin_unlock_irqrestore(&pool->pool_lock, flags); + return NULL; + } + kref_get(&pool->ref_cnt); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + kref_get(&pool->rxe->ref_cnt); + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) { + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return NULL; + } + + elem = kmem_cache_zalloc(pool_cache(pool), + (pool->flags & RXE_POOL_ATOMIC) ? + GFP_ATOMIC : GFP_KERNEL); + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return elem; +} + +void rxe_elem_release(struct kref *kref) +{ + struct rxe_pool_entry *elem = + container_of(kref, struct rxe_pool_entry, ref_cnt); + struct rxe_pool *pool = elem->pool; + + if (pool->cleanup) + pool->cleanup(elem); + + kmem_cache_free(pool_cache(pool), elem); + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); +} + +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +{ + struct rb_node *node = NULL; + struct rxe_pool_entry *elem = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + + if (pool->state != rxe_pool_valid) + goto out; + + node = pool->tree.rb_node; + + while (node) { + elem = rb_entry(node, struct rxe_pool_entry, node); + + if (elem->index > index) + node = node->rb_left; + else if (elem->index < index) + node = node->rb_right; + else + break; + } + + if (node) + kref_get(&elem->ref_cnt); + +out: + spin_unlock_irqrestore(&pool->pool_lock, flags); + return node ? (void *)elem : NULL; +} + +void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +{ + struct rb_node *node = NULL; + struct rxe_pool_entry *elem = NULL; + int cmp; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + + if (pool->state != rxe_pool_valid) + goto out; + + node = pool->tree.rb_node; + + while (node) { + elem = rb_entry(node, struct rxe_pool_entry, node); + + cmp = memcmp((u8 *)elem + pool->key_offset, + key, pool->key_size); + + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; + } + + if (node) + kref_get(&elem->ref_cnt); + +out: + spin_unlock_irqrestore(&pool->pool_lock, flags); + return node ? ((void *)elem) : NULL; +} diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h new file mode 100644 index 000000000000..4d04830adcae --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_POOL_H +#define RXE_POOL_H + +#define RXE_POOL_ALIGN (16) +#define RXE_POOL_CACHE_FLAGS (0) + +enum rxe_pool_flags { + RXE_POOL_ATOMIC = BIT(0), + RXE_POOL_INDEX = BIT(1), + RXE_POOL_KEY = BIT(2), +}; + +enum rxe_elem_type { + RXE_TYPE_UC, + RXE_TYPE_PD, + RXE_TYPE_AH, + RXE_TYPE_SRQ, + RXE_TYPE_QP, + RXE_TYPE_CQ, + RXE_TYPE_MR, + RXE_TYPE_MW, + RXE_TYPE_MC_GRP, + RXE_TYPE_MC_ELEM, + RXE_NUM_TYPES, /* keep me last */ +}; + +struct rxe_type_info { + char *name; + size_t size; + void (*cleanup)(void *obj); + enum rxe_pool_flags flags; + u32 max_index; + u32 min_index; + size_t key_offset; + size_t key_size; + struct kmem_cache *cache; +}; + +extern struct rxe_type_info rxe_type_info[]; + +enum rxe_pool_state { + rxe_pool_invalid, + rxe_pool_valid, +}; + +struct rxe_pool_entry { + struct rxe_pool *pool; + struct kref ref_cnt; + struct list_head list; + + /* only used if indexed or keyed */ + struct rb_node node; + u32 index; +}; + +struct rxe_pool { + struct rxe_dev *rxe; + spinlock_t pool_lock; /* pool spinlock */ + size_t elem_size; + struct kref ref_cnt; + void (*cleanup)(void *obj); + enum rxe_pool_state state; + enum rxe_pool_flags flags; + enum rxe_elem_type type; + + unsigned int max_elem; + atomic_t num_elem; + + /* only used if indexed or keyed */ + struct rb_root tree; + unsigned long *table; + size_t table_size; + u32 max_index; + u32 min_index; + u32 last; + size_t key_offset; + size_t key_size; +}; + +/* initialize slab caches for managed objects */ +int rxe_cache_init(void); + +/* cleanup slab caches for managed objects */ +void rxe_cache_exit(void); + +/* initialize a pool of objects with given limit on + * number of elements. gets parameters from rxe_type_info + * pool elements will be allocated out of a slab cache + */ +int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type, u32 max_elem); + +/* free resources from object pool */ +int rxe_pool_cleanup(struct rxe_pool *pool); + +/* allocate an object from pool */ +void *rxe_alloc(struct rxe_pool *pool); + +/* assign an index to an indexed object and insert object into + * pool's rb tree + */ +void rxe_add_index(void *elem); + +/* drop an index and remove object from rb tree */ +void rxe_drop_index(void *elem); + +/* assign a key to a keyed object and insert object into + * pool's rb tree + */ +void rxe_add_key(void *elem, void *key); + +/* remove elem from rb tree */ +void rxe_drop_key(void *elem); + +/* lookup an indexed object from index. takes a reference on object */ +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); + +/* lookup keyed object from key. takes a reference on the object */ +void *rxe_pool_get_key(struct rxe_pool *pool, void *key); + +/* cleanup an object when all references are dropped */ +void rxe_elem_release(struct kref *kref); + +/* take a reference on an object */ +#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt) + +/* drop a reference on an object */ +#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release) + +#endif /* RXE_POOL_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c new file mode 100644 index 000000000000..22ba24f2a2c1 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -0,0 +1,851 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> +#include <linux/delay.h> +#include <linux/sched.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" +#include "rxe_task.h" + +char *rxe_qp_state_name[] = { + [QP_STATE_RESET] = "RESET", + [QP_STATE_INIT] = "INIT", + [QP_STATE_READY] = "READY", + [QP_STATE_DRAIN] = "DRAIN", + [QP_STATE_DRAINED] = "DRAINED", + [QP_STATE_ERROR] = "ERROR", +}; + +static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, + int has_srq) +{ + if (cap->max_send_wr > rxe->attr.max_qp_wr) { + pr_warn("invalid send wr = %d > %d\n", + cap->max_send_wr, rxe->attr.max_qp_wr); + goto err1; + } + + if (cap->max_send_sge > rxe->attr.max_sge) { + pr_warn("invalid send sge = %d > %d\n", + cap->max_send_sge, rxe->attr.max_sge); + goto err1; + } + + if (!has_srq) { + if (cap->max_recv_wr > rxe->attr.max_qp_wr) { + pr_warn("invalid recv wr = %d > %d\n", + cap->max_recv_wr, rxe->attr.max_qp_wr); + goto err1; + } + + if (cap->max_recv_sge > rxe->attr.max_sge) { + pr_warn("invalid recv sge = %d > %d\n", + cap->max_recv_sge, rxe->attr.max_sge); + goto err1; + } + } + + if (cap->max_inline_data > rxe->max_inline_data) { + pr_warn("invalid max inline data = %d > %d\n", + cap->max_inline_data, rxe->max_inline_data); + goto err1; + } + + return 0; + +err1: + return -EINVAL; +} + +int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) +{ + struct ib_qp_cap *cap = &init->cap; + struct rxe_port *port; + int port_num = init->port_num; + + if (!init->recv_cq || !init->send_cq) { + pr_warn("missing cq\n"); + goto err1; + } + + if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) + goto err1; + + if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + if (port_num != 1) { + pr_warn("invalid port = %d\n", port_num); + goto err1; + } + + port = &rxe->port; + + if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { + pr_warn("SMI QP exists for port %d\n", port_num); + goto err1; + } + + if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { + pr_warn("GSI QP exists for port %d\n", port_num); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n) +{ + qp->resp.res_head = 0; + qp->resp.res_tail = 0; + qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL); + + if (!qp->resp.resources) + return -ENOMEM; + + return 0; +} + +static void free_rd_atomic_resources(struct rxe_qp *qp) +{ + if (qp->resp.resources) { + int i; + + for (i = 0; i < qp->attr.max_rd_atomic; i++) { + struct resp_res *res = &qp->resp.resources[i]; + + free_rd_atomic_resource(qp, res); + } + kfree(qp->resp.resources); + qp->resp.resources = NULL; + } +} + +void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) +{ + if (res->type == RXE_ATOMIC_MASK) { + rxe_drop_ref(qp); + kfree_skb(res->atomic.skb); + } else if (res->type == RXE_READ_MASK) { + if (res->read.mr) + rxe_drop_ref(res->read.mr); + } + res->type = 0; +} + +static void cleanup_rd_atomic_resources(struct rxe_qp *qp) +{ + int i; + struct resp_res *res; + + if (qp->resp.resources) { + for (i = 0; i < qp->attr.max_rd_atomic; i++) { + res = &qp->resp.resources[i]; + free_rd_atomic_resource(qp, res); + } + } +} + +static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_init_attr *init) +{ + struct rxe_port *port; + u32 qpn; + + qp->sq_sig_type = init->sq_sig_type; + qp->attr.path_mtu = 1; + qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); + + qpn = qp->pelem.index; + port = &rxe->port; + + switch (init->qp_type) { + case IB_QPT_SMI: + qp->ibqp.qp_num = 0; + port->qp_smi_index = qpn; + qp->attr.port_num = init->port_num; + break; + + case IB_QPT_GSI: + qp->ibqp.qp_num = 1; + port->qp_gsi_index = qpn; + qp->attr.port_num = init->port_num; + break; + + default: + qp->ibqp.qp_num = qpn; + break; + } + + INIT_LIST_HEAD(&qp->grp_list); + + skb_queue_head_init(&qp->send_pkts); + + spin_lock_init(&qp->grp_lock); + spin_lock_init(&qp->state_lock); + + atomic_set(&qp->ssn, 0); + atomic_set(&qp->skb_out, 0); +} + +static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata) +{ + int err; + int wqe_size; + + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); + if (err < 0) + return err; + qp->sk->sk->sk_user_data = qp; + + qp->sq.max_wr = init->cap.max_send_wr; + qp->sq.max_sge = init->cap.max_send_sge; + qp->sq.max_inline = init->cap.max_inline_data; + + wqe_size = max_t(int, sizeof(struct rxe_send_wqe) + + qp->sq.max_sge * sizeof(struct ib_sge), + sizeof(struct rxe_send_wqe) + + qp->sq.max_inline); + + qp->sq.queue = rxe_queue_init(rxe, + &qp->sq.max_wr, + wqe_size); + if (!qp->sq.queue) + return -ENOMEM; + + err = do_mmap_info(rxe, udata, true, + context, qp->sq.queue->buf, + qp->sq.queue->buf_size, &qp->sq.queue->ip); + + if (err) { + kvfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + return err; + } + + qp->req.wqe_index = producer_index(qp->sq.queue); + qp->req.state = QP_STATE_RESET; + qp->req.opcode = -1; + qp->comp.opcode = -1; + + spin_lock_init(&qp->sq.sq_lock); + skb_queue_head_init(&qp->req_pkts); + + rxe_init_task(rxe, &qp->req.task, qp, + rxe_requester, "req"); + rxe_init_task(rxe, &qp->comp.task, qp, + rxe_completer, "comp"); + + init_timer(&qp->rnr_nak_timer); + qp->rnr_nak_timer.function = rnr_nak_timer; + qp->rnr_nak_timer.data = (unsigned long)qp; + + init_timer(&qp->retrans_timer); + qp->retrans_timer.function = retransmit_timer; + qp->retrans_timer.data = (unsigned long)qp; + qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ + + return 0; +} + +static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata) +{ + int err; + int wqe_size; + + if (!qp->srq) { + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + + wqe_size = rcv_wqe_size(qp->rq.max_sge); + + pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n", + qp->rq.max_wr, qp->rq.max_sge, wqe_size); + + qp->rq.queue = rxe_queue_init(rxe, + &qp->rq.max_wr, + wqe_size); + if (!qp->rq.queue) + return -ENOMEM; + + err = do_mmap_info(rxe, udata, false, context, + qp->rq.queue->buf, + qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + kvfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + return err; + } + } + + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); + + skb_queue_head_init(&qp->resp_pkts); + + rxe_init_task(rxe, &qp->resp.task, qp, + rxe_responder, "resp"); + + qp->resp.opcode = OPCODE_NONE; + qp->resp.msn = 0; + qp->resp.state = QP_STATE_RESET; + + return 0; +} + +/* called by the create qp verb */ +int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, + struct ib_qp_init_attr *init, struct ib_udata *udata, + struct ib_pd *ibpd) +{ + int err; + struct rxe_cq *rcq = to_rcq(init->recv_cq); + struct rxe_cq *scq = to_rcq(init->send_cq); + struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + + rxe_add_ref(pd); + rxe_add_ref(rcq); + rxe_add_ref(scq); + if (srq) + rxe_add_ref(srq); + + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; + + rxe_qp_init_misc(rxe, qp, init); + + err = rxe_qp_init_req(rxe, qp, init, context, udata); + if (err) + goto err1; + + err = rxe_qp_init_resp(rxe, qp, init, context, udata); + if (err) + goto err2; + + qp->attr.qp_state = IB_QPS_RESET; + qp->valid = 1; + + return 0; + +err2: + rxe_queue_cleanup(qp->sq.queue); +err1: + if (srq) + rxe_drop_ref(srq); + rxe_drop_ref(scq); + rxe_drop_ref(rcq); + rxe_drop_ref(pd); + + return err; +} + +/* called by the query qp verb */ +int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init) +{ + init->event_handler = qp->ibqp.event_handler; + init->qp_context = qp->ibqp.qp_context; + init->send_cq = qp->ibqp.send_cq; + init->recv_cq = qp->ibqp.recv_cq; + init->srq = qp->ibqp.srq; + + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + if (!qp->srq) { + init->cap.max_recv_wr = qp->rq.max_wr; + init->cap.max_recv_sge = qp->rq.max_sge; + } + + init->sq_sig_type = qp->sq_sig_type; + + init->qp_type = qp->ibqp.qp_type; + init->port_num = 1; + + return 0; +} + +/* called by the modify qp verb, this routine checks all the parameters before + * making any changes + */ +int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_attr *attr, int mask) +{ + enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : qp->attr.qp_state; + enum ib_qp_state new_state = (mask & IB_QP_STATE) ? + attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask, + IB_LINK_LAYER_ETHERNET)) { + pr_warn("invalid mask or state for qp\n"); + goto err1; + } + + if (mask & IB_QP_STATE) { + if (cur_state == IB_QPS_SQD) { + if (qp->req.state == QP_STATE_DRAIN && + new_state != IB_QPS_ERR) + goto err1; + } + } + + if (mask & IB_QP_PORT) { + if (attr->port_num != 1) { + pr_warn("invalid port %d\n", attr->port_num); + goto err1; + } + } + + if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) + goto err1; + + if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) + goto err1; + + if (mask & IB_QP_ALT_PATH) { + if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) + goto err1; + if (attr->alt_port_num != 1) { + pr_warn("invalid alt port %d\n", attr->alt_port_num); + goto err1; + } + if (attr->alt_timeout > 31) { + pr_warn("invalid QP alt timeout %d > 31\n", + attr->alt_timeout); + goto err1; + } + } + + if (mask & IB_QP_PATH_MTU) { + struct rxe_port *port = &rxe->port; + + enum ib_mtu max_mtu = port->attr.max_mtu; + enum ib_mtu mtu = attr->path_mtu; + + if (mtu > max_mtu) { + pr_debug("invalid mtu (%d) > (%d)\n", + ib_mtu_enum_to_int(mtu), + ib_mtu_enum_to_int(max_mtu)); + goto err1; + } + } + + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { + pr_warn("invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + rxe->attr.max_qp_rd_atom); + goto err1; + } + } + + if (mask & IB_QP_TIMEOUT) { + if (attr->timeout > 31) { + pr_warn("invalid QP timeout %d > 31\n", + attr->timeout); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +/* move the qp to the reset state */ +static void rxe_qp_reset(struct rxe_qp *qp) +{ + /* stop tasks from running */ + rxe_disable_task(&qp->resp.task); + + /* stop request/comp */ + if (qp->sq.queue) { + if (qp_type(qp) == IB_QPT_RC) + rxe_disable_task(&qp->comp.task); + rxe_disable_task(&qp->req.task); + } + + /* move qp to the reset state */ + qp->req.state = QP_STATE_RESET; + qp->resp.state = QP_STATE_RESET; + + /* let state machines reset themselves drain work and packet queues + * etc. + */ + __rxe_do_task(&qp->resp.task); + + if (qp->sq.queue) { + __rxe_do_task(&qp->comp.task); + __rxe_do_task(&qp->req.task); + } + + /* cleanup attributes */ + atomic_set(&qp->ssn, 0); + qp->req.opcode = -1; + qp->req.need_retry = 0; + qp->req.noack_pkts = 0; + qp->resp.msn = 0; + qp->resp.opcode = -1; + qp->resp.drop_msg = 0; + qp->resp.goto_error = 0; + qp->resp.sent_psn_nak = 0; + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + cleanup_rd_atomic_resources(qp); + + /* reenable tasks */ + rxe_enable_task(&qp->resp.task); + + if (qp->sq.queue) { + if (qp_type(qp) == IB_QPT_RC) + rxe_enable_task(&qp->comp.task); + + rxe_enable_task(&qp->req.task); + } +} + +/* drain the send queue */ +static void rxe_qp_drain(struct rxe_qp *qp) +{ + if (qp->sq.queue) { + if (qp->req.state != QP_STATE_DRAINED) { + qp->req.state = QP_STATE_DRAIN; + if (qp_type(qp) == IB_QPT_RC) + rxe_run_task(&qp->comp.task, 1); + else + __rxe_do_task(&qp->comp.task); + rxe_run_task(&qp->req.task, 1); + } + } +} + +/* move the qp to the error state */ +void rxe_qp_error(struct rxe_qp *qp) +{ + qp->req.state = QP_STATE_ERROR; + qp->resp.state = QP_STATE_ERROR; + + /* drain work and packet queues */ + rxe_run_task(&qp->resp.task, 1); + + if (qp_type(qp) == IB_QPT_RC) + rxe_run_task(&qp->comp.task, 1); + else + __rxe_do_task(&qp->comp.task); + rxe_run_task(&qp->req.task, 1); +} + +/* called by the modify qp verb */ +int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { + int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + + free_rd_atomic_resources(qp); + + err = alloc_rd_atomic_resources(qp, max_rd_atomic); + if (err) + return err; + + qp->attr.max_rd_atomic = max_rd_atomic; + atomic_set(&qp->req.rd_atomic, max_rd_atomic); + } + + if (mask & IB_QP_CUR_STATE) + qp->attr.cur_qp_state = attr->qp_state; + + if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; + + if (mask & IB_QP_ACCESS_FLAGS) + qp->attr.qp_access_flags = attr->qp_access_flags; + + if (mask & IB_QP_PKEY_INDEX) + qp->attr.pkey_index = attr->pkey_index; + + if (mask & IB_QP_PORT) + qp->attr.port_num = attr->port_num; + + if (mask & IB_QP_QKEY) + qp->attr.qkey = attr->qkey; + + if (mask & IB_QP_AV) { + ib_get_cached_gid(&rxe->ib_dev, 1, + attr->ah_attr.grh.sgid_index, &sgid, + &sgid_attr); + rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av, + &attr->ah_attr); + rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr, + &sgid_attr, &sgid); + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + } + + if (mask & IB_QP_ALT_PATH) { + ib_get_cached_gid(&rxe->ib_dev, 1, + attr->alt_ah_attr.grh.sgid_index, &sgid, + &sgid_attr); + + rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av, + &attr->alt_ah_attr); + rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr, + &sgid_attr, &sgid); + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + + qp->attr.alt_port_num = attr->alt_port_num; + qp->attr.alt_pkey_index = attr->alt_pkey_index; + qp->attr.alt_timeout = attr->alt_timeout; + } + + if (mask & IB_QP_PATH_MTU) { + qp->attr.path_mtu = attr->path_mtu; + qp->mtu = ib_mtu_enum_to_int(attr->path_mtu); + } + + if (mask & IB_QP_TIMEOUT) { + qp->attr.timeout = attr->timeout; + if (attr->timeout == 0) { + qp->qp_timeout_jiffies = 0; + } else { + /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */ + int j = nsecs_to_jiffies(4096ULL << attr->timeout); + + qp->qp_timeout_jiffies = j ? j : 1; + } + } + + if (mask & IB_QP_RETRY_CNT) { + qp->attr.retry_cnt = attr->retry_cnt; + qp->comp.retry_cnt = attr->retry_cnt; + pr_debug("set retry count = %d\n", attr->retry_cnt); + } + + if (mask & IB_QP_RNR_RETRY) { + qp->attr.rnr_retry = attr->rnr_retry; + qp->comp.rnr_retry = attr->rnr_retry; + pr_debug("set rnr retry count = %d\n", attr->rnr_retry); + } + + if (mask & IB_QP_RQ_PSN) { + qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); + qp->resp.psn = qp->attr.rq_psn; + pr_debug("set resp psn = 0x%x\n", qp->resp.psn); + } + + if (mask & IB_QP_MIN_RNR_TIMER) { + qp->attr.min_rnr_timer = attr->min_rnr_timer; + pr_debug("set min rnr timer = 0x%x\n", + attr->min_rnr_timer); + } + + if (mask & IB_QP_SQ_PSN) { + qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); + qp->req.psn = qp->attr.sq_psn; + qp->comp.psn = qp->attr.sq_psn; + pr_debug("set req psn = 0x%x\n", qp->req.psn); + } + + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { + qp->attr.max_dest_rd_atomic = + __roundup_pow_of_two(attr->max_dest_rd_atomic); + } + + if (mask & IB_QP_PATH_MIG_STATE) + qp->attr.path_mig_state = attr->path_mig_state; + + if (mask & IB_QP_DEST_QPN) + qp->attr.dest_qp_num = attr->dest_qp_num; + + if (mask & IB_QP_STATE) { + qp->attr.qp_state = attr->qp_state; + + switch (attr->qp_state) { + case IB_QPS_RESET: + pr_debug("qp state -> RESET\n"); + rxe_qp_reset(qp); + break; + + case IB_QPS_INIT: + pr_debug("qp state -> INIT\n"); + qp->req.state = QP_STATE_INIT; + qp->resp.state = QP_STATE_INIT; + break; + + case IB_QPS_RTR: + pr_debug("qp state -> RTR\n"); + qp->resp.state = QP_STATE_READY; + break; + + case IB_QPS_RTS: + pr_debug("qp state -> RTS\n"); + qp->req.state = QP_STATE_READY; + break; + + case IB_QPS_SQD: + pr_debug("qp state -> SQD\n"); + rxe_qp_drain(qp); + break; + + case IB_QPS_SQE: + pr_warn("qp state -> SQE !!?\n"); + /* Not possible from modify_qp. */ + break; + + case IB_QPS_ERR: + pr_debug("qp state -> ERR\n"); + rxe_qp_error(qp); + break; + } + } + + return 0; +} + +/* called by the query qp verb */ +int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + *attr = qp->attr; + + attr->rq_psn = qp->resp.psn; + attr->sq_psn = qp->req.psn; + + attr->cap.max_send_wr = qp->sq.max_wr; + attr->cap.max_send_sge = qp->sq.max_sge; + attr->cap.max_inline_data = qp->sq.max_inline; + + if (!qp->srq) { + attr->cap.max_recv_wr = qp->rq.max_wr; + attr->cap.max_recv_sge = qp->rq.max_sge; + } + + rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr); + rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr); + + if (qp->req.state == QP_STATE_DRAIN) { + attr->sq_draining = 1; + /* applications that get this state + * typically spin on it. yield the + * processor + */ + cond_resched(); + } else { + attr->sq_draining = 0; + } + + pr_debug("attr->sq_draining = %d\n", attr->sq_draining); + + return 0; +} + +/* called by the destroy qp verb */ +void rxe_qp_destroy(struct rxe_qp *qp) +{ + qp->valid = 0; + qp->qp_timeout_jiffies = 0; + rxe_cleanup_task(&qp->resp.task); + + del_timer_sync(&qp->retrans_timer); + del_timer_sync(&qp->rnr_nak_timer); + + rxe_cleanup_task(&qp->req.task); + if (qp_type(qp) == IB_QPT_RC) + rxe_cleanup_task(&qp->comp.task); + + /* flush out any receive wr's or pending requests */ + __rxe_do_task(&qp->req.task); + if (qp->sq.queue) { + __rxe_do_task(&qp->comp.task); + __rxe_do_task(&qp->req.task); + } +} + +/* called when the last reference to the qp is dropped */ +void rxe_qp_cleanup(void *arg) +{ + struct rxe_qp *qp = arg; + + rxe_drop_all_mcast_groups(qp); + + if (qp->sq.queue) + rxe_queue_cleanup(qp->sq.queue); + + if (qp->srq) + rxe_drop_ref(qp->srq); + + if (qp->rq.queue) + rxe_queue_cleanup(qp->rq.queue); + + if (qp->scq) + rxe_drop_ref(qp->scq); + if (qp->rcq) + rxe_drop_ref(qp->rcq); + if (qp->pd) + rxe_drop_ref(qp->pd); + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + free_rd_atomic_resources(qp); + + kernel_sock_shutdown(qp->sk, SHUT_RDWR); +} diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c new file mode 100644 index 000000000000..08274254eb88 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must retailuce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/vmalloc.h> +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int do_mmap_info(struct rxe_dev *rxe, + struct ib_udata *udata, + bool is_req, + struct ib_ucontext *context, + struct rxe_queue_buf *buf, + size_t buf_size, + struct rxe_mmap_info **ip_p) +{ + int err; + u32 len, offset; + struct rxe_mmap_info *ip = NULL; + + if (udata) { + if (is_req) { + len = udata->outlen - sizeof(struct mminfo); + offset = sizeof(struct mminfo); + } else { + len = udata->outlen; + offset = 0; + } + + if (len < sizeof(ip->info)) + goto err1; + + ip = rxe_create_mmap_info(rxe, buf_size, context, buf); + if (!ip) + goto err1; + + err = copy_to_user(udata->outbuf + offset, &ip->info, + sizeof(ip->info)); + if (err) + goto err2; + + spin_lock_bh(&rxe->pending_lock); + list_add(&ip->pending_mmaps, &rxe->pending_mmaps); + spin_unlock_bh(&rxe->pending_lock); + } + + *ip_p = ip; + + return 0; + +err2: + kfree(ip); +err1: + return -EINVAL; +} + +struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, + int *num_elem, + unsigned int elem_size) +{ + struct rxe_queue *q; + size_t buf_size; + unsigned int num_slots; + + /* num_elem == 0 is allowed, but uninteresting */ + if (*num_elem < 0) + goto err1; + + q = kmalloc(sizeof(*q), GFP_KERNEL); + if (!q) + goto err1; + + q->rxe = rxe; + + /* used in resize, only need to copy used part of queue */ + q->elem_size = elem_size; + + /* pad element up to at least a cacheline and always a power of 2 */ + if (elem_size < cache_line_size()) + elem_size = cache_line_size(); + elem_size = roundup_pow_of_two(elem_size); + + q->log2_elem_size = order_base_2(elem_size); + + num_slots = *num_elem + 1; + num_slots = roundup_pow_of_two(num_slots); + q->index_mask = num_slots - 1; + + buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size; + + q->buf = vmalloc_user(buf_size); + if (!q->buf) + goto err2; + + q->buf->log2_elem_size = q->log2_elem_size; + q->buf->index_mask = q->index_mask; + + q->buf_size = buf_size; + + *num_elem = num_slots - 1; + return q; + +err2: + kfree(q); +err1: + return NULL; +} + +/* copies elements from original q to new q and then swaps the contents of the + * two q headers. This is so that if anyone is holding a pointer to q it will + * still work + */ +static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, + unsigned int num_elem) +{ + if (!queue_empty(q) && (num_elem < queue_count(q))) + return -EINVAL; + + while (!queue_empty(q)) { + memcpy(producer_addr(new_q), consumer_addr(q), + new_q->elem_size); + advance_producer(new_q); + advance_consumer(q); + } + + swap(*q, *new_q); + + return 0; +} + +int rxe_queue_resize(struct rxe_queue *q, + unsigned int *num_elem_p, + unsigned int elem_size, + struct ib_ucontext *context, + struct ib_udata *udata, + spinlock_t *producer_lock, + spinlock_t *consumer_lock) +{ + struct rxe_queue *new_q; + unsigned int num_elem = *num_elem_p; + int err; + unsigned long flags = 0, flags1; + + new_q = rxe_queue_init(q->rxe, &num_elem, elem_size); + if (!new_q) + return -ENOMEM; + + err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf, + new_q->buf_size, &new_q->ip); + if (err) { + vfree(new_q->buf); + kfree(new_q); + goto err1; + } + + spin_lock_irqsave(consumer_lock, flags1); + + if (producer_lock) { + spin_lock_irqsave(producer_lock, flags); + err = resize_finish(q, new_q, num_elem); + spin_unlock_irqrestore(producer_lock, flags); + } else { + err = resize_finish(q, new_q, num_elem); + } + + spin_unlock_irqrestore(consumer_lock, flags1); + + rxe_queue_cleanup(new_q); /* new/old dep on err */ + if (err) + goto err1; + + *num_elem_p = num_elem; + return 0; + +err1: + return err; +} + +void rxe_queue_cleanup(struct rxe_queue *q) +{ + if (q->ip) + kref_put(&q->ip->ref, rxe_mmap_release); + else + vfree(q->buf); + + kfree(q); +} diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h new file mode 100644 index 000000000000..239fd609c31e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_QUEUE_H +#define RXE_QUEUE_H + +/* implements a simple circular buffer that can optionally be + * shared between user space and the kernel and can be resized + + * the requested element size is rounded up to a power of 2 + * and the number of elements in the buffer is also rounded + * up to a power of 2. Since the queue is empty when the + * producer and consumer indices match the maximum capacity + * of the queue is one less than the number of element slots + */ + +/* this data structure is shared between user space and kernel + * space for those cases where the queue is shared. It contains + * the producer and consumer indices. Is also contains a copy + * of the queue size parameters for user space to use but the + * kernel must use the parameters in the rxe_queue struct + * this MUST MATCH the corresponding librxe struct + * for performance reasons arrange to have producer and consumer + * pointers in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[0]; +}; + +struct rxe_queue { + struct rxe_dev *rxe; + struct rxe_queue_buf *buf; + struct rxe_mmap_info *ip; + size_t buf_size; + size_t elem_size; + unsigned int log2_elem_size; + unsigned int index_mask; +}; + +int do_mmap_info(struct rxe_dev *rxe, + struct ib_udata *udata, + bool is_req, + struct ib_ucontext *context, + struct rxe_queue_buf *buf, + size_t buf_size, + struct rxe_mmap_info **ip_p); + +struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, + int *num_elem, + unsigned int elem_size); + +int rxe_queue_resize(struct rxe_queue *q, + unsigned int *num_elem_p, + unsigned int elem_size, + struct ib_ucontext *context, + struct ib_udata *udata, + /* Protect producers while resizing queue */ + spinlock_t *producer_lock, + /* Protect consumers while resizing queue */ + spinlock_t *consumer_lock); + +void rxe_queue_cleanup(struct rxe_queue *queue); + +static inline int next_index(struct rxe_queue *q, int index) +{ + return (index + 1) & q->buf->index_mask; +} + +static inline int queue_empty(struct rxe_queue *q) +{ + return ((q->buf->producer_index - q->buf->consumer_index) + & q->index_mask) == 0; +} + +static inline int queue_full(struct rxe_queue *q) +{ + return ((q->buf->producer_index + 1 - q->buf->consumer_index) + & q->index_mask) == 0; +} + +static inline void advance_producer(struct rxe_queue *q) +{ + q->buf->producer_index = (q->buf->producer_index + 1) + & q->index_mask; +} + +static inline void advance_consumer(struct rxe_queue *q) +{ + q->buf->consumer_index = (q->buf->consumer_index + 1) + & q->index_mask; +} + +static inline void *producer_addr(struct rxe_queue *q) +{ + return q->buf->data + ((q->buf->producer_index & q->index_mask) + << q->log2_elem_size); +} + +static inline void *consumer_addr(struct rxe_queue *q) +{ + return q->buf->data + ((q->buf->consumer_index & q->index_mask) + << q->log2_elem_size); +} + +static inline unsigned int producer_index(struct rxe_queue *q) +{ + return q->buf->producer_index; +} + +static inline unsigned int consumer_index(struct rxe_queue *q) +{ + return q->buf->consumer_index; +} + +static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) +{ + return q->buf->data + ((index & q->index_mask) + << q->buf->log2_elem_size); +} + +static inline unsigned int index_from_addr(const struct rxe_queue *q, + const void *addr) +{ + return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) + & q->index_mask; +} + +static inline unsigned int queue_count(const struct rxe_queue *q) +{ + return (q->buf->producer_index - q->buf->consumer_index) + & q->index_mask; +} + +static inline void *queue_head(struct rxe_queue *q) +{ + return queue_empty(q) ? NULL : consumer_addr(q); +} + +#endif /* RXE_QUEUE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c new file mode 100644 index 000000000000..3d464c23e08b --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" + +static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct rxe_qp *qp) +{ + if (unlikely(!qp->valid)) + goto err1; + + switch (qp_type(qp)) { + case IB_QPT_RC: + if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + case IB_QPT_UC: + if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + default: + pr_warn_ratelimited("unsupported qp type\n"); + goto err1; + } + + if (pkt->mask & RXE_REQ_MASK) { + if (unlikely(qp->resp.state != QP_STATE_READY)) + goto err1; + } else if (unlikely(qp->req.state < QP_STATE_READY || + qp->req.state > QP_STATE_DRAINED)) { + goto err1; + } + + return 0; + +err1: + return -EINVAL; +} + +static void set_bad_pkey_cntr(struct rxe_port *port) +{ + spin_lock_bh(&port->port_lock); + port->attr.bad_pkey_cntr = min((u32)0xffff, + port->attr.bad_pkey_cntr + 1); + spin_unlock_bh(&port->port_lock); +} + +static void set_qkey_viol_cntr(struct rxe_port *port) +{ + spin_lock_bh(&port->port_lock); + port->attr.qkey_viol_cntr = min((u32)0xffff, + port->attr.qkey_viol_cntr + 1); + spin_unlock_bh(&port->port_lock); +} + +static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + u32 qpn, struct rxe_qp *qp) +{ + int i; + int found_pkey = 0; + struct rxe_port *port = &rxe->port; + u16 pkey = bth_pkey(pkt); + + pkt->pkey_index = 0; + + if (qpn == 1) { + for (i = 0; i < port->attr.pkey_tbl_len; i++) { + if (pkey_match(pkey, port->pkey_tbl[i])) { + pkt->pkey_index = i; + found_pkey = 1; + break; + } + } + + if (!found_pkey) { + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; + } + } else if (qpn != 0) { + if (unlikely(!pkey_match(pkey, + port->pkey_tbl[qp->attr.pkey_index] + ))) { + pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; + } + pkt->pkey_index = qp->attr.pkey_index; + } + + if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && + qpn != 0 && pkt->mask) { + u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; + + if (unlikely(deth_qkey(pkt) != qkey)) { + pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", + deth_qkey(pkt), qkey, qpn); + set_qkey_viol_cntr(port); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct rxe_qp *qp) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) + goto done; + + if (unlikely(pkt->port_num != qp->attr.port_num)) { + pr_warn_ratelimited("port %d != qp port %d\n", + pkt->port_num, qp->attr.port_num); + goto err1; + } + + if (skb->protocol == htons(ETH_P_IP)) { + struct in_addr *saddr = + &qp->pri_av.sgid_addr._sockaddr_in.sin_addr; + struct in_addr *daddr = + &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; + + if (ip_hdr(skb)->daddr != saddr->s_addr) { + pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", + &ip_hdr(skb)->daddr, + &saddr->s_addr); + goto err1; + } + + if (ip_hdr(skb)->saddr != daddr->s_addr) { + pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", + &ip_hdr(skb)->saddr, + &daddr->s_addr); + goto err1; + } + + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct in6_addr *saddr = + &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr; + struct in6_addr *daddr = + &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; + + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { + pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", + &ipv6_hdr(skb)->daddr, saddr); + goto err1; + } + + if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { + pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", + &ipv6_hdr(skb)->saddr, daddr); + goto err1; + } + } + +done: + return 0; + +err1: + return -EINVAL; +} + +static int hdr_check(struct rxe_pkt_info *pkt) +{ + struct rxe_dev *rxe = pkt->rxe; + struct rxe_port *port = &rxe->port; + struct rxe_qp *qp = NULL; + u32 qpn = bth_qpn(pkt); + int index; + int err; + + if (unlikely(bth_tver(pkt) != BTH_TVER)) { + pr_warn_ratelimited("bad tver\n"); + goto err1; + } + + if (qpn != IB_MULTICAST_QPN) { + index = (qpn == 0) ? port->qp_smi_index : + ((qpn == 1) ? port->qp_gsi_index : qpn); + qp = rxe_pool_get_index(&rxe->qp_pool, index); + if (unlikely(!qp)) { + pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + goto err1; + } + + err = check_type_state(rxe, pkt, qp); + if (unlikely(err)) + goto err2; + + err = check_addr(rxe, pkt, qp); + if (unlikely(err)) + goto err2; + + err = check_keys(rxe, pkt, qpn, qp); + if (unlikely(err)) + goto err2; + } else { + if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) { + pr_warn_ratelimited("no grh for mcast qpn\n"); + goto err1; + } + } + + pkt->qp = qp; + return 0; + +err2: + if (qp) + rxe_drop_ref(qp); +err1: + return -EINVAL; +} + +static inline void rxe_rcv_pkt(struct rxe_dev *rxe, + struct rxe_pkt_info *pkt, + struct sk_buff *skb) +{ + if (pkt->mask & RXE_REQ_MASK) + rxe_resp_queue_pkt(rxe, pkt->qp, skb); + else + rxe_comp_queue_pkt(rxe, pkt->qp, skb); +} + +static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) +{ + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + struct rxe_mc_grp *mcg; + struct sk_buff *skb_copy; + struct rxe_mc_elem *mce; + struct rxe_qp *qp; + union ib_gid dgid; + int err; + + if (skb->protocol == htons(ETH_P_IP)) + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); + + /* lookup mcast group corresponding to mgid, takes a ref */ + mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); + if (!mcg) + goto err1; /* mcast group not registered */ + + spin_lock_bh(&mcg->mcg_lock); + + list_for_each_entry(mce, &mcg->qp_list, qp_list) { + qp = mce->qp; + pkt = SKB_TO_PKT(skb); + + /* validate qp for incoming packet */ + err = check_type_state(rxe, pkt, qp); + if (err) + continue; + + err = check_keys(rxe, pkt, bth_qpn(pkt), qp); + if (err) + continue; + + /* if *not* the last qp in the list + * make a copy of the skb to post to the next qp + */ + skb_copy = (mce->qp_list.next != &mcg->qp_list) ? + skb_clone(skb, GFP_KERNEL) : NULL; + + pkt->qp = qp; + rxe_add_ref(qp); + rxe_rcv_pkt(rxe, pkt, skb); + + skb = skb_copy; + if (!skb) + break; + } + + spin_unlock_bh(&mcg->mcg_lock); + + rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + +err1: + if (skb) + kfree_skb(skb); +} + +static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) +{ + union ib_gid dgid; + union ib_gid *pdgid; + u16 index; + + if (skb->protocol == htons(ETH_P_IP)) { + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + pdgid = &dgid; + } else { + pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; + } + + return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, + IB_GID_TYPE_ROCE_UDP_ENCAP, + 1, rxe->ndev, &index); +} + +/* rxe_rcv is called from the interface driver */ +int rxe_rcv(struct sk_buff *skb) +{ + int err; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + struct rxe_dev *rxe = pkt->rxe; + __be32 *icrcp; + u32 calc_icrc, pack_icrc; + + pkt->offset = 0; + + if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) + goto drop; + + if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + pr_warn_ratelimited("failed matching dgid\n"); + goto drop; + } + + pkt->opcode = bth_opcode(pkt); + pkt->psn = bth_psn(pkt); + pkt->qp = NULL; + pkt->mask |= rxe_opcode[pkt->opcode].mask; + + if (unlikely(skb->len < header_size(pkt))) + goto drop; + + err = hdr_check(pkt); + if (unlikely(err)) + goto drop; + + /* Verify ICRC */ + icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); + pack_icrc = be32_to_cpu(*icrcp); + + calc_icrc = rxe_icrc_hdr(pkt, skb); + calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); + calc_icrc = cpu_to_be32(~calc_icrc); + if (unlikely(calc_icrc != pack_icrc)) { + char saddr[sizeof(struct in6_addr)]; + + if (skb->protocol == htons(ETH_P_IPV6)) + sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IP)) + sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); + else + sprintf(saddr, "unknown"); + + pr_warn_ratelimited("bad ICRC from %s\n", saddr); + goto drop; + } + + if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN)) + rxe_rcv_mcast_pkt(rxe, skb); + else + rxe_rcv_pkt(rxe, pkt, skb); + + return 0; + +drop: + if (pkt->qp) + rxe_drop_ref(pkt->qp); + + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(rxe_rcv); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c new file mode 100644 index 000000000000..33b2d9d77021 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -0,0 +1,726 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + unsigned opcode); + +static inline void retry_first_write_send(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + unsigned mask, int npsn) +{ + int i; + + for (i = 0; i < npsn; i++) { + int to_send = (wqe->dma.resid > qp->mtu) ? + qp->mtu : wqe->dma.resid; + + qp->req.opcode = next_opcode(qp, wqe, + wqe->wr.opcode); + + if (wqe->wr.send_flags & IB_SEND_INLINE) { + wqe->dma.resid -= to_send; + wqe->dma.sge_offset += to_send; + } else { + advance_dma_data(&wqe->dma, to_send); + } + if (mask & WR_WRITE_MASK) + wqe->iova += qp->mtu; + } +} + +static void req_retry(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe; + unsigned int wqe_index; + unsigned int mask; + int npsn; + int first = 1; + + wqe = queue_head(qp->sq.queue); + npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; + + qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.psn = qp->comp.psn; + qp->req.opcode = -1; + + for (wqe_index = consumer_index(qp->sq.queue); + wqe_index != producer_index(qp->sq.queue); + wqe_index = next_index(qp->sq.queue, wqe_index)) { + wqe = addr_from_index(qp->sq.queue, wqe_index); + mask = wr_opcode_mask(wqe->wr.opcode, qp); + + if (wqe->state == wqe_state_posted) + break; + + if (wqe->state == wqe_state_done) + continue; + + wqe->iova = (mask & WR_ATOMIC_MASK) ? + wqe->wr.wr.atomic.remote_addr : + (mask & WR_READ_OR_WRITE_MASK) ? + wqe->wr.wr.rdma.remote_addr : + 0; + + if (!first || (mask & WR_READ_MASK) == 0) { + wqe->dma.resid = wqe->dma.length; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + } + + if (first) { + first = 0; + + if (mask & WR_WRITE_OR_SEND_MASK) + retry_first_write_send(qp, wqe, mask, npsn); + + if (mask & WR_READ_MASK) + wqe->iova += npsn * qp->mtu; + } + + wqe->state = wqe_state_posted; + } +} + +void rnr_nak_timer(unsigned long data) +{ + struct rxe_qp *qp = (struct rxe_qp *)data; + + pr_debug("rnr nak timer fired\n"); + rxe_run_task(&qp->req.task, 1); +} + +static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe = queue_head(qp->sq.queue); + unsigned long flags; + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* check to see if we are drained; + * state_lock used by requester and completer + */ + spin_lock_irqsave(&qp->state_lock, flags); + do { + if (qp->req.state != QP_STATE_DRAIN) { + /* comp just finished */ + spin_unlock_irqrestore(&qp->state_lock, + flags); + break; + } + + if (wqe && ((qp->req.wqe_index != + consumer_index(qp->sq.queue)) || + (wqe->state != wqe_state_posted))) { + /* comp not done yet */ + spin_unlock_irqrestore(&qp->state_lock, + flags); + break; + } + + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } while (0); + } + + if (qp->req.wqe_index == producer_index(qp->sq.queue)) + return NULL; + + wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index); + + if (unlikely((qp->req.state == QP_STATE_DRAIN || + qp->req.state == QP_STATE_DRAINED) && + (wqe->state != wqe_state_processing))) + return NULL; + + if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) && + (qp->req.wqe_index != consumer_index(qp->sq.queue)))) { + qp->req.wait_fence = 1; + return NULL; + } + + wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); + return wqe; +} + +static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_RC_RDMA_WRITE_LAST : + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_RC_RDMA_WRITE_ONLY : + IB_OPCODE_RC_RDMA_WRITE_FIRST; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE : + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : + IB_OPCODE_RC_RDMA_WRITE_FIRST; + + case IB_WR_SEND: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? + IB_OPCODE_RC_SEND_LAST : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_RC_SEND_ONLY : + IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_SEND_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? + IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : + IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_RDMA_READ: + return IB_OPCODE_RC_RDMA_READ_REQUEST; + + case IB_WR_ATOMIC_CMP_AND_SWP: + return IB_OPCODE_RC_COMPARE_SWAP; + + case IB_WR_ATOMIC_FETCH_AND_ADD: + return IB_OPCODE_RC_FETCH_ADD; + + case IB_WR_SEND_WITH_INV: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : + IB_OPCODE_RC_SEND_FIRST; + case IB_WR_REG_MR: + case IB_WR_LOCAL_INV: + return opcode; + } + + return -EINVAL; +} + +static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_UC_RDMA_WRITE_LAST : + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_UC_RDMA_WRITE_ONLY : + IB_OPCODE_UC_RDMA_WRITE_FIRST; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE : + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : + IB_OPCODE_UC_RDMA_WRITE_FIRST; + + case IB_WR_SEND: + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) + return fits ? + IB_OPCODE_UC_SEND_LAST : + IB_OPCODE_UC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_UC_SEND_ONLY : + IB_OPCODE_UC_SEND_FIRST; + + case IB_WR_SEND_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) + return fits ? + IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE : + IB_OPCODE_UC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE : + IB_OPCODE_UC_SEND_FIRST; + } + + return -EINVAL; +} + +static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + unsigned opcode) +{ + int fits = (wqe->dma.resid <= qp->mtu); + + switch (qp_type(qp)) { + case IB_QPT_RC: + return next_opcode_rc(qp, opcode, fits); + + case IB_QPT_UC: + return next_opcode_uc(qp, opcode, fits); + + case IB_QPT_SMI: + case IB_QPT_UD: + case IB_QPT_GSI: + switch (opcode) { + case IB_WR_SEND: + return IB_OPCODE_UD_SEND_ONLY; + + case IB_WR_SEND_WITH_IMM: + return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + } + break; + + default: + break; + } + + return -EINVAL; +} + +static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + int depth; + + if (wqe->has_rd_atomic) + return 0; + + qp->req.need_rd_atomic = 1; + depth = atomic_dec_return(&qp->req.rd_atomic); + + if (depth >= 0) { + qp->req.need_rd_atomic = 0; + wqe->has_rd_atomic = 1; + return 0; + } + + atomic_inc(&qp->req.rd_atomic); + return -EAGAIN; +} + +static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_port *port; + struct rxe_av *av; + + if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) + return qp->mtu; + + av = &wqe->av; + port = &rxe->port; + + return port->mtu_cap; +} + +static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + int opcode, int payload, + struct rxe_pkt_info *pkt) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_port *port = &rxe->port; + struct sk_buff *skb; + struct rxe_send_wr *ibwr = &wqe->wr; + struct rxe_av *av; + int pad = (-payload) & 0x3; + int paylen; + int solicited; + u16 pkey; + u32 qp_num; + int ack_req; + + /* length from start of bth to end of icrc */ + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; + + /* pkt->hdr, rxe, port_num and mask are initialized in ifc + * layer + */ + pkt->opcode = opcode; + pkt->qp = qp; + pkt->psn = qp->req.psn; + pkt->mask = rxe_opcode[opcode].mask; + pkt->paylen = paylen; + pkt->offset = 0; + pkt->wqe = wqe; + + /* init skb */ + av = rxe_get_av(pkt); + skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt); + if (unlikely(!skb)) + return NULL; + + /* init bth */ + solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && + (pkt->mask & RXE_END_MASK) && + ((pkt->mask & (RXE_SEND_MASK)) || + (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == + (RXE_WRITE_MASK | RXE_IMMDT_MASK)); + + pkey = (qp_type(qp) == IB_QPT_GSI) ? + port->pkey_tbl[ibwr->wr.ud.pkey_index] : + port->pkey_tbl[qp->attr.pkey_index]; + + qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : + qp->attr.dest_qp_num; + + ack_req = ((pkt->mask & RXE_END_MASK) || + (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); + if (ack_req) + qp->req.noack_pkts = 0; + + bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, + ack_req, pkt->psn); + + /* init optional headers */ + if (pkt->mask & RXE_RETH_MASK) { + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + reth_set_va(pkt, wqe->iova); + reth_set_len(pkt, wqe->dma.length); + } + + if (pkt->mask & RXE_IMMDT_MASK) + immdt_set_imm(pkt, ibwr->ex.imm_data); + + if (pkt->mask & RXE_IETH_MASK) + ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey); + + if (pkt->mask & RXE_ATMETH_MASK) { + atmeth_set_va(pkt, wqe->iova); + if (opcode == IB_OPCODE_RC_COMPARE_SWAP || + opcode == IB_OPCODE_RD_COMPARE_SWAP) { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); + atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); + } else { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add); + } + atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey); + } + + if (pkt->mask & RXE_DETH_MASK) { + if (qp->ibqp.qp_num == 1) + deth_set_qkey(pkt, GSI_QKEY); + else + deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey); + deth_set_sqp(pkt, qp->ibqp.qp_num); + } + + return skb; +} + +static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, struct sk_buff *skb, + int paylen) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u32 crc = 0; + u32 *p; + int err; + + err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc); + if (err) + return err; + + if (pkt->mask & RXE_WRITE_OR_SEND) { + if (wqe->wr.send_flags & IB_SEND_INLINE) { + u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; + + crc = crc32_le(crc, tmp, paylen); + + memcpy(payload_addr(pkt), tmp, paylen); + + wqe->dma.resid -= paylen; + wqe->dma.sge_offset += paylen; + } else { + err = copy_data(rxe, qp->pd, 0, &wqe->dma, + payload_addr(pkt), paylen, + from_mem_obj, + &crc); + if (err) + return err; + } + } + p = payload_addr(pkt) + paylen + bth_pad(pkt); + + *p = ~crc; + + return 0; +} + +static void update_wqe_state(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + enum wqe_state *prev_state) +{ + enum wqe_state prev_state_ = wqe->state; + + if (pkt->mask & RXE_END_MASK) { + if (qp_type(qp) == IB_QPT_RC) + wqe->state = wqe_state_pending; + } else { + wqe->state = wqe_state_processing; + } + + *prev_state = prev_state_; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + /* number of packets left to send including current one */ + int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; + + /* handle zero length packet case */ + if (num_pkt == 0) + num_pkt = 1; + + if (pkt->mask & RXE_START_MASK) { + wqe->first_psn = qp->req.psn; + wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK; + } + + if (pkt->mask & RXE_READ_MASK) + qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; + else + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; + + qp->req.opcode = pkt->opcode; + + + if (pkt->mask & RXE_END_MASK) + qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + + qp->need_req_skb = 0; + + if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer)) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); +} + +int rxe_requester(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_pkt_info pkt; + struct sk_buff *skb; + struct rxe_send_wqe *wqe; + unsigned mask; + int payload; + int mtu; + int opcode; + int ret; + enum wqe_state prev_state; + +next_wqe: + if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) + goto exit; + + if (unlikely(qp->req.state == QP_STATE_RESET)) { + qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.opcode = -1; + qp->req.need_rd_atomic = 0; + qp->req.wait_psn = 0; + qp->req.need_retry = 0; + goto exit; + } + + if (unlikely(qp->req.need_retry)) { + req_retry(qp); + qp->req.need_retry = 0; + } + + wqe = req_next_wqe(qp); + if (unlikely(!wqe)) + goto exit; + + if (wqe->mask & WR_REG_MASK) { + if (wqe->wr.opcode == IB_WR_LOCAL_INV) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mem *rmr; + + rmr = rxe_pool_get_index(&rxe->mr_pool, + wqe->wr.ex.invalidate_rkey >> 8); + if (!rmr) { + pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); + wqe->state = wqe_state_error; + wqe->status = IB_WC_MW_BIND_ERR; + goto exit; + } + rmr->state = RXE_MEM_STATE_FREE; + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + } else if (wqe->wr.opcode == IB_WR_REG_MR) { + struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr); + + rmr->state = RXE_MEM_STATE_VALID; + rmr->access = wqe->wr.wr.reg.access; + rmr->lkey = wqe->wr.wr.reg.key; + rmr->rkey = wqe->wr.wr.reg.key; + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + } else { + goto exit; + } + qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index); + goto next_wqe; + } + + if (unlikely(qp_type(qp) == IB_QPT_RC && + qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + qp->req.wait_psn = 1; + goto exit; + } + + /* Limit the number of inflight SKBs per QP */ + if (unlikely(atomic_read(&qp->skb_out) > + RXE_INFLIGHT_SKBS_PER_QP_HIGH)) { + qp->need_req_skb = 1; + goto exit; + } + + opcode = next_opcode(qp, wqe, wqe->wr.opcode); + if (unlikely(opcode < 0)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto exit; + } + + mask = rxe_opcode[opcode].mask; + if (unlikely(mask & RXE_READ_OR_ATOMIC)) { + if (check_init_depth(qp, wqe)) + goto exit; + } + + mtu = get_mtu(qp, wqe); + payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; + if (payload > mtu) { + if (qp_type(qp) == IB_QPT_UD) { + /* C10-93.1.1: If the total sum of all the buffer lengths specified for a + * UD message exceeds the MTU of the port as returned by QueryHCA, the CI + * shall not emit any packets for this message. Further, the CI shall not + * generate an error due to this condition. + */ + + /* fake a successful UD send */ + wqe->first_psn = qp->req.psn; + wqe->last_psn = qp->req.psn; + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; + qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; + qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index); + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + goto complete; + } + payload = mtu; + } + + skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + if (unlikely(!skb)) { + pr_err("Failed allocating skb\n"); + goto err; + } + + if (fill_packet(qp, wqe, &pkt, skb, payload)) { + pr_debug("Error during fill packet\n"); + goto err; + } + + update_wqe_state(qp, wqe, &pkt, &prev_state); + ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); + if (ret) { + qp->need_req_skb = 1; + kfree_skb(skb); + + wqe->state = prev_state; + + if (ret == -EAGAIN) { + rxe_run_task(&qp->req.task, 1); + goto exit; + } + + goto err; + } + + update_state(qp, wqe, &pkt, payload); + + goto next_wqe; + +err: + kfree_skb(skb); + wqe->status = IB_WC_LOC_PROT_ERR; + wqe->state = wqe_state_error; + +complete: + if (qp_type(qp) != IB_QPT_RC) { + while (rxe_completer(qp) == 0) + ; + } + + return 0; + +exit: + return -EAGAIN; +} diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c new file mode 100644 index 000000000000..ebb03b46e2ad --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -0,0 +1,1380 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +enum resp_states { + RESPST_NONE, + RESPST_GET_REQ, + RESPST_CHK_PSN, + RESPST_CHK_OP_SEQ, + RESPST_CHK_OP_VALID, + RESPST_CHK_RESOURCE, + RESPST_CHK_LENGTH, + RESPST_CHK_RKEY, + RESPST_EXECUTE, + RESPST_READ_REPLY, + RESPST_COMPLETE, + RESPST_ACKNOWLEDGE, + RESPST_CLEANUP, + RESPST_DUPLICATE_REQUEST, + RESPST_ERR_MALFORMED_WQE, + RESPST_ERR_UNSUPPORTED_OPCODE, + RESPST_ERR_MISALIGNED_ATOMIC, + RESPST_ERR_PSN_OUT_OF_SEQ, + RESPST_ERR_MISSING_OPCODE_FIRST, + RESPST_ERR_MISSING_OPCODE_LAST_C, + RESPST_ERR_MISSING_OPCODE_LAST_D1E, + RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, + RESPST_ERR_RNR, + RESPST_ERR_RKEY_VIOLATION, + RESPST_ERR_LENGTH, + RESPST_ERR_CQ_OVERFLOW, + RESPST_ERROR, + RESPST_RESET, + RESPST_DONE, + RESPST_EXIT, +}; + +static char *resp_state_name[] = { + [RESPST_NONE] = "NONE", + [RESPST_GET_REQ] = "GET_REQ", + [RESPST_CHK_PSN] = "CHK_PSN", + [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", + [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", + [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", + [RESPST_CHK_LENGTH] = "CHK_LENGTH", + [RESPST_CHK_RKEY] = "CHK_RKEY", + [RESPST_EXECUTE] = "EXECUTE", + [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_COMPLETE] = "COMPLETE", + [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", + [RESPST_CLEANUP] = "CLEANUP", + [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", + [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", + [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", + [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", + [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", + [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", + [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", + [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", + [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", + [RESPST_ERR_RNR] = "ERR_RNR", + [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", + [RESPST_ERR_LENGTH] = "ERR_LENGTH", + [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", + [RESPST_ERROR] = "ERROR", + [RESPST_RESET] = "RESET", + [RESPST_DONE] = "DONE", + [RESPST_EXIT] = "EXIT", +}; + +/* rxe_recv calls here to add a request packet to the input queue */ +void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + skb_queue_tail(&qp->req_pkts, skb); + + must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || + (skb_queue_len(&qp->req_pkts) > 1); + + rxe_run_task(&qp->resp.task, must_sched); +} + +static inline enum resp_states get_req(struct rxe_qp *qp, + struct rxe_pkt_info **pkt_p) +{ + struct sk_buff *skb; + + if (qp->resp.state == QP_STATE_ERROR) { + skb = skb_dequeue(&qp->req_pkts); + if (skb) { + /* drain request packet queue */ + rxe_drop_ref(qp); + kfree_skb(skb); + return RESPST_GET_REQ; + } + + /* go drain recv wr queue */ + return RESPST_CHK_RESOURCE; + } + + skb = skb_peek(&qp->req_pkts); + if (!skb) + return RESPST_EXIT; + + *pkt_p = SKB_TO_PKT(skb); + + return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; +} + +static enum resp_states check_psn(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + int diff = psn_compare(pkt->psn, qp->resp.psn); + + switch (qp_type(qp)) { + case IB_QPT_RC: + if (diff > 0) { + if (qp->resp.sent_psn_nak) + return RESPST_CLEANUP; + + qp->resp.sent_psn_nak = 1; + return RESPST_ERR_PSN_OUT_OF_SEQ; + + } else if (diff < 0) { + return RESPST_DUPLICATE_REQUEST; + } + + if (qp->resp.sent_psn_nak) + qp->resp.sent_psn_nak = 0; + + break; + + case IB_QPT_UC: + if (qp->resp.drop_msg || diff != 0) { + if (pkt->mask & RXE_START_MASK) { + qp->resp.drop_msg = 0; + return RESPST_CHK_OP_SEQ; + } + + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + } + break; + default: + break; + } + + return RESPST_CHK_OP_SEQ; +} + +static enum resp_states check_op_seq(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + switch (qp->resp.opcode) { + case IB_OPCODE_RC_SEND_FIRST: + case IB_OPCODE_RC_SEND_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_RC_SEND_MIDDLE: + case IB_OPCODE_RC_SEND_LAST: + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_C; + } + + case IB_OPCODE_RC_RDMA_WRITE_FIRST: + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_RC_RDMA_WRITE_LAST: + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_C; + } + + default: + switch (pkt->opcode) { + case IB_OPCODE_RC_SEND_MIDDLE: + case IB_OPCODE_RC_SEND_LAST: + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_RC_RDMA_WRITE_LAST: + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_ERR_MISSING_OPCODE_FIRST; + default: + return RESPST_CHK_OP_VALID; + } + } + break; + + case IB_QPT_UC: + switch (qp->resp.opcode) { + case IB_OPCODE_UC_SEND_FIRST: + case IB_OPCODE_UC_SEND_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_UC_SEND_MIDDLE: + case IB_OPCODE_UC_SEND_LAST: + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; + } + + case IB_OPCODE_UC_RDMA_WRITE_FIRST: + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_UC_RDMA_WRITE_LAST: + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; + } + + default: + switch (pkt->opcode) { + case IB_OPCODE_UC_SEND_MIDDLE: + case IB_OPCODE_UC_SEND_LAST: + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_UC_RDMA_WRITE_LAST: + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + default: + return RESPST_CHK_OP_VALID; + } + } + break; + + default: + return RESPST_CHK_OP_VALID; + } +} + +static enum resp_states check_op_valid(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + if (((pkt->mask & RXE_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & RXE_WRITE_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & RXE_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + return RESPST_ERR_UNSUPPORTED_OPCODE; + } + + break; + + case IB_QPT_UC: + if ((pkt->mask & RXE_WRITE_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + } + + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + break; + + default: + WARN_ON(1); + break; + } + + return RESPST_CHK_RESOURCE; +} + +static enum resp_states get_srq_wqe(struct rxe_qp *qp) +{ + struct rxe_srq *srq = qp->srq; + struct rxe_queue *q = srq->rq.queue; + struct rxe_recv_wqe *wqe; + struct ib_event ev; + + if (srq->error) + return RESPST_ERR_RNR; + + spin_lock_bh(&srq->rq.consumer_lock); + + wqe = queue_head(q); + if (!wqe) { + spin_unlock_bh(&srq->rq.consumer_lock); + return RESPST_ERR_RNR; + } + + /* note kernel and user space recv wqes have same size */ + memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe)); + + qp->resp.wqe = &qp->resp.srq_wqe.wqe; + advance_consumer(q); + + if (srq->limit && srq->ibsrq.event_handler && + (queue_count(q) < srq->limit)) { + srq->limit = 0; + goto event; + } + + spin_unlock_bh(&srq->rq.consumer_lock); + return RESPST_CHK_LENGTH; + +event: + spin_unlock_bh(&srq->rq.consumer_lock); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); + return RESPST_CHK_LENGTH; +} + +static enum resp_states check_resource(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_srq *srq = qp->srq; + + if (qp->resp.state == QP_STATE_ERROR) { + if (qp->resp.wqe) { + qp->resp.status = IB_WC_WR_FLUSH_ERR; + return RESPST_COMPLETE; + } else if (!srq) { + qp->resp.wqe = queue_head(qp->rq.queue); + if (qp->resp.wqe) { + qp->resp.status = IB_WC_WR_FLUSH_ERR; + return RESPST_COMPLETE; + } else { + return RESPST_EXIT; + } + } else { + return RESPST_EXIT; + } + } + + if (pkt->mask & RXE_READ_OR_ATOMIC) { + /* it is the requesters job to not send + * too many read/atomic ops, we just + * recycle the responder resource queue + */ + if (likely(qp->attr.max_rd_atomic > 0)) + return RESPST_CHK_LENGTH; + else + return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; + } + + if (pkt->mask & RXE_RWR_MASK) { + if (srq) + return get_srq_wqe(qp); + + qp->resp.wqe = queue_head(qp->rq.queue); + return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; + } + + return RESPST_CHK_LENGTH; +} + +static enum resp_states check_length(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + return RESPST_CHK_RKEY; + + case IB_QPT_UC: + return RESPST_CHK_RKEY; + + default: + return RESPST_CHK_RKEY; + } +} + +static enum resp_states check_rkey(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_mem *mem; + u64 va; + u32 rkey; + u32 resid; + u32 pktlen; + int mtu = qp->mtu; + enum resp_states state; + int access; + + if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { + if (pkt->mask & RXE_RETH_MASK) { + qp->resp.va = reth_va(pkt); + qp->resp.rkey = reth_rkey(pkt); + qp->resp.resid = reth_len(pkt); + } + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ + : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & RXE_ATOMIC_MASK) { + qp->resp.va = atmeth_va(pkt); + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); + access = IB_ACCESS_REMOTE_ATOMIC; + } else { + return RESPST_EXECUTE; + } + + va = qp->resp.va; + rkey = qp->resp.rkey; + resid = qp->resp.resid; + pktlen = payload_size(pkt); + + mem = lookup_mem(qp->pd, access, rkey, lookup_remote); + if (!mem) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err1; + } + + if (unlikely(mem->state == RXE_MEM_STATE_FREE)) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err1; + } + + if (mem_check_range(mem, va, resid)) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err2; + } + + if (pkt->mask & RXE_WRITE_MASK) { + if (resid > mtu) { + if (pktlen != mtu || bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; + goto err2; + } + + resid = mtu; + } else { + if (pktlen != resid) { + state = RESPST_ERR_LENGTH; + goto err2; + } + if ((bth_pad(pkt) != (0x3 & (-resid)))) { + /* This case may not be exactly that + * but nothing else fits. + */ + state = RESPST_ERR_LENGTH; + goto err2; + } + } + } + + WARN_ON(qp->resp.mr); + + qp->resp.mr = mem; + return RESPST_EXECUTE; + +err2: + rxe_drop_ref(mem); +err1: + return state; +} + +static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, + int data_len) +{ + int err; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, + data_addr, data_len, to_mem_obj, NULL); + if (unlikely(err)) + return (err == -ENOSPC) ? RESPST_ERR_LENGTH + : RESPST_ERR_MALFORMED_WQE; + + return RESPST_NONE; +} + +static enum resp_states write_data_in(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + enum resp_states rc = RESPST_NONE; + int err; + int data_len = payload_size(pkt); + + err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), + data_len, to_mem_obj, NULL); + if (err) { + rc = RESPST_ERR_RKEY_VIOLATION; + goto out; + } + + qp->resp.va += data_len; + qp->resp.resid -= data_len; + +out: + return rc; +} + +/* Guarantee atomicity of atomic operations at the machine level. */ +static DEFINE_SPINLOCK(atomic_ops_lock); + +static enum resp_states process_atomic(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 iova = atmeth_va(pkt); + u64 *vaddr; + enum resp_states ret; + struct rxe_mem *mr = qp->resp.mr; + + if (mr->state != RXE_MEM_STATE_VALID) { + ret = RESPST_ERR_RKEY_VIOLATION; + goto out; + } + + vaddr = iova_to_vaddr(mr, iova, sizeof(u64)); + + /* check vaddr is 8 bytes aligned. */ + if (!vaddr || (uintptr_t)vaddr & 7) { + ret = RESPST_ERR_MISALIGNED_ATOMIC; + goto out; + } + + spin_lock_bh(&atomic_ops_lock); + + qp->resp.atomic_orig = *vaddr; + + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || + pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (*vaddr == atmeth_comp(pkt)) + *vaddr = atmeth_swap_add(pkt); + } else { + *vaddr += atmeth_swap_add(pkt); + } + + spin_unlock_bh(&atomic_ops_lock); + + ret = RESPST_NONE; +out: + return ret; +} + +static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_pkt_info *ack, + int opcode, + int payload, + u32 psn, + u8 syndrome, + u32 *crcp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct sk_buff *skb; + u32 crc = 0; + u32 *p; + int paylen; + int pad; + int err; + + /* + * allocate packet + */ + pad = (-payload) & 0x3; + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; + + skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack); + if (!skb) + return NULL; + + ack->qp = qp; + ack->opcode = opcode; + ack->mask = rxe_opcode[opcode].mask; + ack->offset = pkt->offset; + ack->paylen = paylen; + + /* fill in bth using the request packet headers */ + memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); + + bth_set_opcode(ack, opcode); + bth_set_qpn(ack, qp->attr.dest_qp_num); + bth_set_pad(ack, pad); + bth_set_se(ack, 0); + bth_set_psn(ack, psn); + bth_set_ack(ack, 0); + ack->psn = psn; + + if (ack->mask & RXE_AETH_MASK) { + aeth_set_syn(ack, syndrome); + aeth_set_msn(ack, qp->resp.msn); + } + + if (ack->mask & RXE_ATMACK_MASK) + atmack_set_orig(ack, qp->resp.atomic_orig); + + err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc); + if (err) { + kfree_skb(skb); + return NULL; + } + + if (crcp) { + /* CRC computation will be continued by the caller */ + *crcp = crc; + } else { + p = payload_addr(ack) + payload + bth_pad(ack); + *p = ~crc; + } + + return skb; +} + +/* RDMA read response. If res is not NULL, then we have a current RDMA request + * being processed or replayed. + */ +static enum resp_states read_reply(struct rxe_qp *qp, + struct rxe_pkt_info *req_pkt) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_pkt_info ack_pkt; + struct sk_buff *skb; + int mtu = qp->mtu; + enum resp_states state; + int payload; + int opcode; + int err; + struct resp_res *res = qp->resp.res; + u32 icrc; + u32 *p; + + if (!res) { + /* This is the first time we process that request. Get a + * resource + */ + res = &qp->resp.resources[qp->resp.res_head]; + + free_rd_atomic_resource(qp, res); + rxe_advance_resp_resource(qp); + + res->type = RXE_READ_MASK; + + res->read.va = qp->resp.va; + res->read.va_org = qp->resp.va; + + res->first_psn = req_pkt->psn; + res->last_psn = req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1; + res->cur_psn = req_pkt->psn; + + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + /* note res inherits the reference to mr from qp */ + res->read.mr = qp->resp.mr; + qp->resp.mr = NULL; + + qp->resp.res = res; + res->state = rdatm_res_state_new; + } + + if (res->state == rdatm_res_state_new) { + if (res->read.resid <= mtu) + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; + else + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; + } else { + if (res->read.resid > mtu) + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; + else + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; + } + + res->state = rdatm_res_state_next; + + payload = min_t(int, res->read.resid, mtu); + + skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, + res->cur_psn, AETH_ACK_UNLIMITED, &icrc); + if (!skb) + return RESPST_ERR_RNR; + + err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), + payload, from_mem_obj, &icrc); + if (err) + pr_err("Failed copying memory\n"); + + p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); + *p = ~icrc; + + err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + if (err) { + pr_err("Failed sending RDMA reply.\n"); + kfree_skb(skb); + return RESPST_ERR_RNR; + } + + res->read.va += payload; + res->read.resid -= payload; + res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; + + if (res->read.resid > 0) { + state = RESPST_DONE; + } else { + qp->resp.res = NULL; + qp->resp.opcode = -1; + qp->resp.psn = res->cur_psn; + state = RESPST_CLEANUP; + } + + return state; +} + +/* Executes a new request. A retried request never reach that function (send + * and writes are discarded, and reads and atomics are retried elsewhere. + */ +static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + enum resp_states err; + + if (pkt->mask & RXE_SEND_MASK) { + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) { + union rdma_network_hdr hdr; + struct sk_buff *skb = PKT_TO_SKB(pkt); + + memset(&hdr, 0, sizeof(hdr)); + if (skb->protocol == htons(ETH_P_IP)) + memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); + + err = send_data_in(qp, &hdr, sizeof(hdr)); + if (err) + return err; + } + err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); + if (err) + return err; + } else if (pkt->mask & RXE_WRITE_MASK) { + err = write_data_in(qp, pkt); + if (err) + return err; + } else if (pkt->mask & RXE_READ_MASK) { + /* For RDMA Read we can increment the msn now. See C9-148. */ + qp->resp.msn++; + return RESPST_READ_REPLY; + } else if (pkt->mask & RXE_ATOMIC_MASK) { + err = process_atomic(qp, pkt); + if (err) + return err; + } else + /* Unreachable */ + WARN_ON(1); + + /* We successfully processed this new request. */ + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + if (pkt->mask & RXE_COMP_MASK) + return RESPST_COMPLETE; + else if (qp_type(qp) == IB_QPT_RC) + return RESPST_ACKNOWLEDGE; + else + return RESPST_CLEANUP; +} + +static enum resp_states do_complete(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_cqe cqe; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + struct rxe_recv_wqe *wqe = qp->resp.wqe; + + if (unlikely(!wqe)) + return RESPST_CLEANUP; + + memset(&cqe, 0, sizeof(cqe)); + + wc->wr_id = wqe->wr_id; + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + + /* fields after status are not required for errors */ + if (wc->status == IB_WC_SUCCESS) { + wc->opcode = (pkt->mask & RXE_IMMDT_MASK && + pkt->mask & RXE_WRITE_MASK) ? + IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; + wc->vendor_err = 0; + wc->byte_len = wqe->dma.length - wqe->dma.resid; + + /* fields after byte_len are different between kernel and user + * space + */ + if (qp->rcq->is_user) { + uwc->wc_flags = IB_WC_GRH; + + if (pkt->mask & RXE_IMMDT_MASK) { + uwc->wc_flags |= IB_WC_WITH_IMM; + uwc->ex.imm_data = + (__u32 __force)immdt_imm(pkt); + } + + if (pkt->mask & RXE_IETH_MASK) { + uwc->wc_flags |= IB_WC_WITH_INVALIDATE; + uwc->ex.invalidate_rkey = ieth_rkey(pkt); + } + + uwc->qp_num = qp->ibqp.qp_num; + + if (pkt->mask & RXE_DETH_MASK) + uwc->src_qp = deth_sqp(pkt); + + uwc->port_num = qp->attr.port_num; + } else { + struct sk_buff *skb = PKT_TO_SKB(pkt); + + wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; + if (skb->protocol == htons(ETH_P_IP)) + wc->network_hdr_type = RDMA_NETWORK_IPV4; + else + wc->network_hdr_type = RDMA_NETWORK_IPV6; + + if (pkt->mask & RXE_IMMDT_MASK) { + wc->wc_flags |= IB_WC_WITH_IMM; + wc->ex.imm_data = immdt_imm(pkt); + } + + if (pkt->mask & RXE_IETH_MASK) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mem *rmr; + + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = ieth_rkey(pkt); + + rmr = rxe_pool_get_index(&rxe->mr_pool, + wc->ex.invalidate_rkey >> 8); + if (unlikely(!rmr)) { + pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey); + return RESPST_ERROR; + } + rmr->state = RXE_MEM_STATE_FREE; + } + + wc->qp = &qp->ibqp; + + if (pkt->mask & RXE_DETH_MASK) + wc->src_qp = deth_sqp(pkt); + + wc->port_num = qp->attr.port_num; + } + } + + /* have copy for srq and reference for !srq */ + if (!qp->srq) + advance_consumer(qp->rq.queue); + + qp->resp.wqe = NULL; + + if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) + return RESPST_ERR_CQ_OVERFLOW; + + if (qp->resp.state == QP_STATE_ERROR) + return RESPST_CHK_RESOURCE; + + if (!pkt) + return RESPST_DONE; + else if (qp_type(qp) == IB_QPT_RC) + return RESPST_ACKNOWLEDGE; + else + return RESPST_CLEANUP; +} + +static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + u8 syndrome, u32 psn) +{ + int err = 0; + struct rxe_pkt_info ack_pkt; + struct sk_buff *skb; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, + 0, psn, syndrome, NULL); + if (!skb) { + err = -ENOMEM; + goto err1; + } + + err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + if (err) { + pr_err_ratelimited("Failed sending ack\n"); + kfree_skb(skb); + } + +err1: + return err; +} + +static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + u8 syndrome) +{ + int rc = 0; + struct rxe_pkt_info ack_pkt; + struct sk_buff *skb; + struct sk_buff *skb_copy; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct resp_res *res; + + skb = prepare_ack_packet(qp, pkt, &ack_pkt, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, + syndrome, NULL); + if (!skb) { + rc = -ENOMEM; + goto out; + } + + skb_copy = skb_clone(skb, GFP_ATOMIC); + if (skb_copy) + rxe_add_ref(qp); /* for the new SKB */ + else { + pr_warn("Could not clone atomic response\n"); + rc = -ENOMEM; + goto out; + } + + res = &qp->resp.resources[qp->resp.res_head]; + free_rd_atomic_resource(qp, res); + rxe_advance_resp_resource(qp); + + res->type = RXE_ATOMIC_MASK; + res->atomic.skb = skb; + res->first_psn = qp->resp.psn; + res->last_psn = qp->resp.psn; + res->cur_psn = qp->resp.psn; + + rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); + if (rc) { + pr_err_ratelimited("Failed sending ack\n"); + rxe_drop_ref(qp); + kfree_skb(skb_copy); + } + +out: + return rc; +} + +static enum resp_states acknowledge(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + if (qp_type(qp) != IB_QPT_RC) + return RESPST_CLEANUP; + + if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) + send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); + else if (pkt->mask & RXE_ATOMIC_MASK) + send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); + else if (bth_ack(pkt)) + send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); + + return RESPST_CLEANUP; +} + +static enum resp_states cleanup(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb; + + if (pkt) { + skb = skb_dequeue(&qp->req_pkts); + rxe_drop_ref(qp); + kfree_skb(skb); + } + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + return RESPST_DONE; +} + +static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) +{ + int i; + + for (i = 0; i < qp->attr.max_rd_atomic; i++) { + struct resp_res *res = &qp->resp.resources[i]; + + if (res->type == 0) + continue; + + if (psn_compare(psn, res->first_psn) >= 0 && + psn_compare(psn, res->last_psn) <= 0) { + return res; + } + } + + return NULL; +} + +static enum resp_states duplicate_request(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + enum resp_states rc; + + if (pkt->mask & RXE_SEND_MASK || + pkt->mask & RXE_WRITE_MASK) { + /* SEND. Ack again and cleanup. C9-105. */ + if (bth_ack(pkt)) + send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1); + rc = RESPST_CLEANUP; + goto out; + } else if (pkt->mask & RXE_READ_MASK) { + struct resp_res *res; + + res = find_resource(qp, pkt->psn); + if (!res) { + /* Resource not found. Class D error. Drop the + * request. + */ + rc = RESPST_CLEANUP; + goto out; + } else { + /* Ensure this new request is the same as the previous + * one or a subset of it. + */ + u64 iova = reth_va(pkt); + u32 resid = reth_len(pkt); + + if (iova < res->read.va_org || + resid > res->read.length || + (iova + resid) > (res->read.va_org + + res->read.length)) { + rc = RESPST_CLEANUP; + goto out; + } + + if (reth_rkey(pkt) != res->read.rkey) { + rc = RESPST_CLEANUP; + goto out; + } + + res->cur_psn = pkt->psn; + res->state = (pkt->psn == res->first_psn) ? + rdatm_res_state_new : + rdatm_res_state_replay; + + /* Reset the resource, except length. */ + res->read.va_org = iova; + res->read.va = iova; + res->read.resid = resid; + + /* Replay the RDMA read reply. */ + qp->resp.res = res; + rc = RESPST_READ_REPLY; + goto out; + } + } else { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + struct sk_buff *skb_copy; + + skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC); + if (skb_copy) { + rxe_add_ref(qp); /* for the new SKB */ + } else { + pr_warn("Couldn't clone atomic resp\n"); + rc = RESPST_CLEANUP; + goto out; + } + bth_set_psn(SKB_TO_PKT(skb_copy), + qp->resp.psn - 1); + /* Resend the result. */ + rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, + pkt, skb_copy); + if (rc) { + pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + kfree_skb(skb_copy); + rc = RESPST_CLEANUP; + goto out; + } + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; + } +out: + return rc; +} + +/* Process a class A or C. Both are treated the same in this implementation. */ +static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, + enum ib_wc_status status) +{ + qp->resp.aeth_syndrome = syndrome; + qp->resp.status = status; + + /* indicate that we should go through the ERROR state */ + qp->resp.goto_error = 1; +} + +static enum resp_states do_class_d1e_error(struct rxe_qp *qp) +{ + /* UC */ + if (qp->srq) { + /* Class E */ + qp->resp.drop_msg = 1; + if (qp->resp.wqe) { + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + return RESPST_COMPLETE; + } else { + return RESPST_CLEANUP; + } + } else { + /* Class D1. This packet may be the start of a + * new message and could be valid. The previous + * message is invalid and ignored. reset the + * recv wr to its original state + */ + if (qp->resp.wqe) { + qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; + qp->resp.wqe->dma.cur_sge = 0; + qp->resp.wqe->dma.sge_offset = 0; + qp->resp.opcode = -1; + } + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + return RESPST_CLEANUP; + } +} + +int rxe_responder(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + enum resp_states state; + struct rxe_pkt_info *pkt = NULL; + int ret = 0; + + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; + + if (!qp->valid) { + ret = -EINVAL; + goto done; + } + + switch (qp->resp.state) { + case QP_STATE_RESET: + state = RESPST_RESET; + break; + + default: + state = RESPST_GET_REQ; + break; + } + + while (1) { + pr_debug("state = %s\n", resp_state_name[state]); + switch (state) { + case RESPST_GET_REQ: + state = get_req(qp, &pkt); + break; + case RESPST_CHK_PSN: + state = check_psn(qp, pkt); + break; + case RESPST_CHK_OP_SEQ: + state = check_op_seq(qp, pkt); + break; + case RESPST_CHK_OP_VALID: + state = check_op_valid(qp, pkt); + break; + case RESPST_CHK_RESOURCE: + state = check_resource(qp, pkt); + break; + case RESPST_CHK_LENGTH: + state = check_length(qp, pkt); + break; + case RESPST_CHK_RKEY: + state = check_rkey(qp, pkt); + break; + case RESPST_EXECUTE: + state = execute(qp, pkt); + break; + case RESPST_COMPLETE: + state = do_complete(qp, pkt); + break; + case RESPST_READ_REPLY: + state = read_reply(qp, pkt); + break; + case RESPST_ACKNOWLEDGE: + state = acknowledge(qp, pkt); + break; + case RESPST_CLEANUP: + state = cleanup(qp, pkt); + break; + case RESPST_DUPLICATE_REQUEST: + state = duplicate_request(qp, pkt); + break; + case RESPST_ERR_PSN_OUT_OF_SEQ: + /* RC only - Class B. Drop packet. */ + send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); + state = RESPST_CLEANUP; + break; + + case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: + case RESPST_ERR_MISSING_OPCODE_FIRST: + case RESPST_ERR_MISSING_OPCODE_LAST_C: + case RESPST_ERR_UNSUPPORTED_OPCODE: + case RESPST_ERR_MISALIGNED_ATOMIC: + /* RC Only - Class C. */ + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, + IB_WC_REM_INV_REQ_ERR); + state = RESPST_COMPLETE; + break; + + case RESPST_ERR_MISSING_OPCODE_LAST_D1E: + state = do_class_d1e_error(qp); + break; + case RESPST_ERR_RNR: + if (qp_type(qp) == IB_QPT_RC) { + /* RC - class B */ + send_ack(qp, pkt, AETH_RNR_NAK | + (~AETH_TYPE_MASK & + qp->attr.min_rnr_timer), + pkt->psn); + } else { + /* UD/UC - class D */ + qp->resp.drop_msg = 1; + } + state = RESPST_CLEANUP; + break; + + case RESPST_ERR_RKEY_VIOLATION: + if (qp_type(qp) == IB_QPT_RC) { + /* Class C */ + do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, + IB_WC_REM_ACCESS_ERR); + state = RESPST_COMPLETE; + } else { + qp->resp.drop_msg = 1; + if (qp->srq) { + /* UC/SRQ Class D */ + qp->resp.status = IB_WC_REM_ACCESS_ERR; + state = RESPST_COMPLETE; + } else { + /* UC/non-SRQ Class E. */ + state = RESPST_CLEANUP; + } + } + break; + + case RESPST_ERR_LENGTH: + if (qp_type(qp) == IB_QPT_RC) { + /* Class C */ + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, + IB_WC_REM_INV_REQ_ERR); + state = RESPST_COMPLETE; + } else if (qp->srq) { + /* UC/UD - class E */ + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + state = RESPST_COMPLETE; + } else { + /* UC/UD - class D */ + qp->resp.drop_msg = 1; + state = RESPST_CLEANUP; + } + break; + + case RESPST_ERR_MALFORMED_WQE: + /* All, Class A. */ + do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, + IB_WC_LOC_QP_OP_ERR); + state = RESPST_COMPLETE; + break; + + case RESPST_ERR_CQ_OVERFLOW: + /* All - Class G */ + state = RESPST_ERROR; + break; + + case RESPST_DONE: + if (qp->resp.goto_error) { + state = RESPST_ERROR; + break; + } + + goto done; + + case RESPST_EXIT: + if (qp->resp.goto_error) { + state = RESPST_ERROR; + break; + } + + goto exit; + + case RESPST_RESET: { + struct sk_buff *skb; + + while ((skb = skb_dequeue(&qp->req_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + + while (!qp->srq && qp->rq.queue && + queue_head(qp->rq.queue)) + advance_consumer(qp->rq.queue); + + qp->resp.wqe = NULL; + goto exit; + } + + case RESPST_ERROR: + qp->resp.goto_error = 0; + pr_warn("qp#%d moved to error state\n", qp_num(qp)); + rxe_qp_error(qp); + goto exit; + + default: + WARN_ON(1); + } + } + +exit: + ret = -EAGAIN; +done: + return ret; +} diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c new file mode 100644 index 000000000000..2a6e3cd2d4e8 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +{ + if (srq && srq->error) { + pr_warn("srq in error state\n"); + goto err1; + } + + if (mask & IB_SRQ_MAX_WR) { + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; + } + + if (srq && srq->limit && (attr->max_wr < srq->limit)) { + pr_warn("max_wr (%d) < srq->limit (%d)\n", + attr->max_wr, srq->limit); + goto err1; + } + + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; + } + + if (mask & IB_SRQ_LIMIT) { + if (attr->srq_limit > rxe->attr.max_srq_wr) { + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + attr->srq_limit, rxe->attr.max_srq_wr); + goto err1; + } + + if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { + pr_warn("srq_limit (%d) > cur limit(%d)\n", + attr->srq_limit, + srq->rq.queue->buf->index_mask); + goto err1; + } + } + + if (mask == IB_SRQ_INIT_MASK) { + if (attr->max_sge > rxe->attr.max_srq_sge) { + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + attr->max_sge, rxe->attr.max_srq_sge); + goto err1; + } + + if (attr->max_sge < RXE_MIN_SRQ_SGE) + attr->max_sge = RXE_MIN_SRQ_SGE; + } + + return 0; + +err1: + return -EINVAL; +} + +int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata) +{ + int err; + int srq_wqe_size; + struct rxe_queue *q; + + srq->ibsrq.event_handler = init->event_handler; + srq->ibsrq.srq_context = init->srq_context; + srq->limit = init->attr.srq_limit; + srq->srq_num = srq->pelem.index; + srq->rq.max_wr = init->attr.max_wr; + srq->rq.max_sge = init->attr.max_sge; + + srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); + + spin_lock_init(&srq->rq.producer_lock); + spin_lock_init(&srq->rq.consumer_lock); + + q = rxe_queue_init(rxe, &srq->rq.max_wr, + srq_wqe_size); + if (!q) { + pr_warn("unable to allocate queue for srq\n"); + return -ENOMEM; + } + + srq->rq.queue = q; + + err = do_mmap_info(rxe, udata, false, context, q->buf, + q->buf_size, &q->ip); + if (err) + return err; + + if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) { + if (copy_to_user(udata->outbuf + sizeof(struct mminfo), + &srq->srq_num, sizeof(u32))) + return -EFAULT; + } + return 0; +} + +int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, + struct ib_udata *udata) +{ + int err; + struct rxe_queue *q = srq->rq.queue; + struct mminfo mi = { .offset = 1, .size = 0}; + + if (mask & IB_SRQ_MAX_WR) { + /* Check that we can write the mminfo struct to user space */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 mi_addr; + + /* Get address of user space mminfo struct */ + err = ib_copy_from_udata(&mi_addr, udata, + sizeof(mi_addr)); + if (err) + goto err1; + + udata->outbuf = (void __user *)(unsigned long)mi_addr; + udata->outlen = sizeof(mi); + + if (!access_ok(VERIFY_WRITE, + (void __user *)udata->outbuf, + udata->outlen)) { + err = -EFAULT; + goto err1; + } + } + + err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, + rcv_wqe_size(srq->rq.max_sge), + srq->rq.queue->ip ? + srq->rq.queue->ip->context : + NULL, + udata, &srq->rq.producer_lock, + &srq->rq.consumer_lock); + if (err) + goto err2; + } + + if (mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + + return 0; + +err2: + rxe_queue_cleanup(q); + srq->rq.queue = NULL; +err1: + return err; +} diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c new file mode 100644 index 000000000000..cf8e77800046 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_net.h" + +/* Copy argument and remove trailing CR. Return the new length. */ +static int sanitize_arg(const char *val, char *intf, int intf_len) +{ + int len; + + if (!val) + return 0; + + /* Remove newline. */ + for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++) + intf[len] = val[len]; + intf[len] = 0; + + if (len == 0 || (val[len] != 0 && val[len] != '\n')) + return 0; + + return len; +} + +static void rxe_set_port_state(struct net_device *ndev) +{ + struct rxe_dev *rxe = net_to_rxe(ndev); + bool is_up = netif_running(ndev) && netif_carrier_ok(ndev); + + if (!rxe) + goto out; + + if (is_up) + rxe_port_up(rxe); + else + rxe_port_down(rxe); /* down for unknown state */ +out: + return; +} + +static int rxe_param_set_add(const char *val, const struct kernel_param *kp) +{ + int len; + int err = 0; + char intf[32]; + struct net_device *ndev = NULL; + struct rxe_dev *rxe; + + len = sanitize_arg(val, intf, sizeof(intf)); + if (!len) { + pr_err("rxe: add: invalid interface name\n"); + err = -EINVAL; + goto err; + } + + ndev = dev_get_by_name(&init_net, intf); + if (!ndev) { + pr_err("interface %s not found\n", intf); + err = -EINVAL; + goto err; + } + + if (net_to_rxe(ndev)) { + pr_err("rxe: already configured on %s\n", intf); + err = -EINVAL; + goto err; + } + + rxe = rxe_net_add(ndev); + if (!rxe) { + pr_err("rxe: failed to add %s\n", intf); + err = -EINVAL; + goto err; + } + + rxe_set_port_state(ndev); + pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf); +err: + if (ndev) + dev_put(ndev); + return err; +} + +static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) +{ + int len; + char intf[32]; + struct rxe_dev *rxe; + + len = sanitize_arg(val, intf, sizeof(intf)); + if (!len) { + pr_err("rxe: add: invalid interface name\n"); + return -EINVAL; + } + + if (strncmp("all", intf, len) == 0) { + pr_info("rxe_sys: remove all"); + rxe_remove_all(); + return 0; + } + + rxe = get_rxe_by_name(intf); + + if (!rxe) { + pr_err("rxe: not configured on %s\n", intf); + return -EINVAL; + } + + list_del(&rxe->list); + rxe_remove(rxe); + + return 0; +} + +static const struct kernel_param_ops rxe_add_ops = { + .set = rxe_param_set_add, +}; + +static const struct kernel_param_ops rxe_remove_ops = { + .set = rxe_param_set_remove, +}; + +module_param_cb(add, &rxe_add_ops, NULL, 0200); +MODULE_PARM_DESC(add, "Create RXE device over network interface"); +module_param_cb(remove, &rxe_remove_ops, NULL, 0200); +MODULE_PARM_DESC(remove, "Remove RXE device over network interface"); diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c new file mode 100644 index 000000000000..1e19bf828a6e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/hardirq.h> + +#include "rxe_task.h" + +int __rxe_do_task(struct rxe_task *task) + +{ + int ret; + + while ((ret = task->func(task->arg)) == 0) + ; + + task->ret = ret; + + return ret; +} + +/* + * this locking is due to a potential race where + * a second caller finds the task already running + * but looks just after the last call to func + */ +void rxe_do_task(unsigned long data) +{ + int cont; + int ret; + unsigned long flags; + struct rxe_task *task = (struct rxe_task *)data; + + spin_lock_irqsave(&task->state_lock, flags); + switch (task->state) { + case TASK_STATE_START: + task->state = TASK_STATE_BUSY; + spin_unlock_irqrestore(&task->state_lock, flags); + break; + + case TASK_STATE_BUSY: + task->state = TASK_STATE_ARMED; + /* fall through to */ + case TASK_STATE_ARMED: + spin_unlock_irqrestore(&task->state_lock, flags); + return; + + default: + spin_unlock_irqrestore(&task->state_lock, flags); + pr_warn("bad state = %d in rxe_do_task\n", task->state); + return; + } + + do { + cont = 0; + ret = task->func(task->arg); + + spin_lock_irqsave(&task->state_lock, flags); + switch (task->state) { + case TASK_STATE_BUSY: + if (ret) + task->state = TASK_STATE_START; + else + cont = 1; + break; + + /* soneone tried to run the task since the last time we called + * func, so we will call one more time regardless of the + * return value + */ + case TASK_STATE_ARMED: + task->state = TASK_STATE_BUSY; + cont = 1; + break; + + default: + pr_warn("bad state = %d in rxe_do_task\n", + task->state); + } + spin_unlock_irqrestore(&task->state_lock, flags); + } while (cont); + + task->ret = ret; +} + +int rxe_init_task(void *obj, struct rxe_task *task, + void *arg, int (*func)(void *), char *name) +{ + task->obj = obj; + task->arg = arg; + task->func = func; + snprintf(task->name, sizeof(task->name), "%s", name); + + tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); + + task->state = TASK_STATE_START; + spin_lock_init(&task->state_lock); + + return 0; +} + +void rxe_cleanup_task(struct rxe_task *task) +{ + tasklet_kill(&task->tasklet); +} + +void rxe_run_task(struct rxe_task *task, int sched) +{ + if (sched) + tasklet_schedule(&task->tasklet); + else + rxe_do_task((unsigned long)task); +} + +void rxe_disable_task(struct rxe_task *task) +{ + tasklet_disable(&task->tasklet); +} + +void rxe_enable_task(struct rxe_task *task) +{ + tasklet_enable(&task->tasklet); +} diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h new file mode 100644 index 000000000000..d14aa6daed05 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_TASK_H +#define RXE_TASK_H + +enum { + TASK_STATE_START = 0, + TASK_STATE_BUSY = 1, + TASK_STATE_ARMED = 2, +}; + +/* + * data structure to describe a 'task' which is a short + * function that returns 0 as long as it needs to be + * called again. + */ +struct rxe_task { + void *obj; + struct tasklet_struct tasklet; + int state; + spinlock_t state_lock; /* spinlock for task state */ + void *arg; + int (*func)(void *arg); + int ret; + char name[16]; +}; + +/* + * init rxe_task structure + * arg => parameter to pass to fcn + * fcn => function to call until it returns != 0 + */ +int rxe_init_task(void *obj, struct rxe_task *task, + void *arg, int (*func)(void *), char *name); + +/* cleanup task */ +void rxe_cleanup_task(struct rxe_task *task); + +/* + * raw call to func in loop without any checking + * can call when tasklets are disabled + */ +int __rxe_do_task(struct rxe_task *task); + +/* + * common function called by any of the main tasklets + * If there is any chance that there is additional + * work to do someone must reschedule the task before + * leaving + */ +void rxe_do_task(unsigned long data); + +/* run a task, else schedule it to run as a tasklet, The decision + * to run or schedule tasklet is based on the parameter sched. + */ +void rxe_run_task(struct rxe_task *task, int sched); + +/* keep a task from scheduling */ +void rxe_disable_task(struct rxe_task *task); + +/* allow task to run */ +void rxe_enable_task(struct rxe_task *task); + +#endif /* RXE_TASK_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c new file mode 100644 index 000000000000..4552be960c6a --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -0,0 +1,1330 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +static int rxe_query_device(struct ib_device *dev, + struct ib_device_attr *attr, + struct ib_udata *uhw) +{ + struct rxe_dev *rxe = to_rdev(dev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + *attr = rxe->attr; + return 0; +} + +static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, + u8 *active_width) +{ + if (speed <= 1000) { + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_SDR; + } else if (speed <= 10000) { + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_FDR10; + } else if (speed <= 20000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_DDR; + } else if (speed <= 30000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + } else if (speed <= 40000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_FDR10; + } else { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_EDR; + } +} + +static int rxe_query_port(struct ib_device *dev, + u8 port_num, struct ib_port_attr *attr) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_port *port; + u32 speed; + + if (unlikely(port_num != 1)) { + pr_warn("invalid port_number %d\n", port_num); + goto err1; + } + + port = &rxe->port; + + *attr = port->attr; + + mutex_lock(&rxe->usdev_lock); + if (rxe->ndev->ethtool_ops->get_link_ksettings) { + struct ethtool_link_ksettings ks; + + rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); + speed = ks.base.speed; + } else if (rxe->ndev->ethtool_ops->get_settings) { + struct ethtool_cmd cmd; + + rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); + speed = cmd.speed; + } else { + pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); + speed = 1000; + } + rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); + mutex_unlock(&rxe->usdev_lock); + + return 0; + +err1: + return -EINVAL; +} + +static int rxe_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid) +{ + int ret; + + if (index > RXE_PORT_GID_TBL_LEN) + return -EINVAL; + + ret = ib_get_cached_gid(device, port_num, index, gid, NULL); + if (ret == -EAGAIN) { + memcpy(gid, &zgid, sizeof(*gid)); + return 0; + } + + return ret; +} + +static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int + index, const union ib_gid *gid, + const struct ib_gid_attr *attr, void **context) +{ + if (index >= RXE_PORT_GID_TBL_LEN) + return -EINVAL; + return 0; +} + +static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int + index, void **context) +{ + if (index >= RXE_PORT_GID_TBL_LEN) + return -EINVAL; + return 0; +} + +static struct net_device *rxe_get_netdev(struct ib_device *device, + u8 port_num) +{ + struct rxe_dev *rxe = to_rdev(device); + + if (rxe->ndev) { + dev_hold(rxe->ndev); + return rxe->ndev; + } + + return NULL; +} + +static int rxe_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey) +{ + struct rxe_dev *rxe = to_rdev(device); + struct rxe_port *port; + + if (unlikely(port_num != 1)) { + dev_warn(device->dma_device, "invalid port_num = %d\n", + port_num); + goto err1; + } + + port = &rxe->port; + + if (unlikely(index >= port->attr.pkey_tbl_len)) { + dev_warn(device->dma_device, "invalid index = %d\n", + index); + goto err1; + } + + *pkey = port->pkey_tbl[index]; + return 0; + +err1: + return -EINVAL; +} + +static int rxe_modify_device(struct ib_device *dev, + int mask, struct ib_device_modify *attr) +{ + struct rxe_dev *rxe = to_rdev(dev); + + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) + rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + memcpy(rxe->ib_dev.node_desc, + attr->node_desc, sizeof(rxe->ib_dev.node_desc)); + } + + return 0; +} + +static int rxe_modify_port(struct ib_device *dev, + u8 port_num, int mask, struct ib_port_modify *attr) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_port *port; + + if (unlikely(port_num != 1)) { + pr_warn("invalid port_num = %d\n", port_num); + goto err1; + } + + port = &rxe->port; + + port->attr.port_cap_flags |= attr->set_port_cap_mask; + port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; + + if (mask & IB_PORT_RESET_QKEY_CNTR) + port->attr.qkey_viol_cntr = 0; + + return 0; + +err1: + return -EINVAL; +} + +static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, + u8 port_num) +{ + struct rxe_dev *rxe = to_rdev(dev); + + return rxe->ifc_ops->link_layer(rxe, port_num); +} + +static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, + struct ib_udata *udata) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_ucontext *uc; + + uc = rxe_alloc(&rxe->uc_pool); + return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); +} + +static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) +{ + struct rxe_ucontext *uc = to_ruc(ibuc); + + rxe_drop_ref(uc); + return 0; +} + +static int rxe_port_immutable(struct ib_device *dev, u8 port_num, + struct ib_port_immutable *immutable) +{ + int err; + struct ib_port_attr attr; + + err = rxe_query_port(dev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_pd *pd; + + pd = rxe_alloc(&rxe->pd_pool); + return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); +} + +static int rxe_dealloc_pd(struct ib_pd *ibpd) +{ + struct rxe_pd *pd = to_rpd(ibpd); + + rxe_drop_ref(pd); + return 0; +} + +static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, + struct rxe_av *av) +{ + int err; + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + + err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num, + attr->grh.sgid_index, &sgid, + &sgid_attr); + if (err) { + pr_err("Failed to query sgid. err = %d\n", err); + return err; + } + + err = rxe_av_from_attr(rxe, attr->port_num, av, attr); + if (!err) + err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid); + + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + return err; +} + +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_ah *ah; + + err = rxe_av_chk_attr(rxe, attr); + if (err) + goto err1; + + ah = rxe_alloc(&rxe->ah_pool); + if (!ah) { + err = -ENOMEM; + goto err1; + } + + rxe_add_ref(pd); + ah->pd = pd; + + err = rxe_init_av(rxe, attr, &ah->av); + if (err) + goto err2; + + return &ah->ibah; + +err2: + rxe_drop_ref(pd); + rxe_drop_ref(ah); +err1: + return ERR_PTR(err); +} + +static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibah->device); + struct rxe_ah *ah = to_rah(ibah); + + err = rxe_av_chk_attr(rxe, attr); + if (err) + return err; + + err = rxe_init_av(rxe, attr, &ah->av); + if (err) + return err; + + return 0; +} + +static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct rxe_dev *rxe = to_rdev(ibah->device); + struct rxe_ah *ah = to_rah(ibah); + + rxe_av_to_attr(rxe, &ah->av, attr); + return 0; +} + +static int rxe_destroy_ah(struct ib_ah *ibah) +{ + struct rxe_ah *ah = to_rah(ibah); + + rxe_drop_ref(ah->pd); + rxe_drop_ref(ah); + return 0; +} + +static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) +{ + int err; + int i; + u32 length; + struct rxe_recv_wqe *recv_wqe; + int num_sge = ibwr->num_sge; + + if (unlikely(queue_full(rq->queue))) { + err = -ENOMEM; + goto err1; + } + + if (unlikely(num_sge > rq->max_sge)) { + err = -EINVAL; + goto err1; + } + + length = 0; + for (i = 0; i < num_sge; i++) + length += ibwr->sg_list[i].length; + + recv_wqe = producer_addr(rq->queue); + recv_wqe->wr_id = ibwr->wr_id; + recv_wqe->num_sge = num_sge; + + memcpy(recv_wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + + recv_wqe->dma.length = length; + recv_wqe->dma.resid = length; + recv_wqe->dma.num_sge = num_sge; + recv_wqe->dma.cur_sge = 0; + recv_wqe->dma.sge_offset = 0; + + /* make sure all changes to the work queue are written before we + * update the producer pointer + */ + smp_wmb(); + + advance_producer(rq->queue); + return 0; + +err1: + return err; +} + +static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *init, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_srq *srq; + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + + err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); + if (err) + goto err1; + + srq = rxe_alloc(&rxe->srq_pool); + if (!srq) { + err = -ENOMEM; + goto err1; + } + + rxe_add_index(srq); + rxe_add_ref(pd); + srq->pd = pd; + + err = rxe_srq_from_init(rxe, srq, init, context, udata); + if (err) + goto err2; + + return &srq->ibsrq; + +err2: + rxe_drop_ref(pd); + rxe_drop_index(srq); + rxe_drop_ref(srq); +err1: + return ERR_PTR(err); +} + +static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask mask, + struct ib_udata *udata) +{ + int err; + struct rxe_srq *srq = to_rsrq(ibsrq); + struct rxe_dev *rxe = to_rdev(ibsrq->device); + + err = rxe_srq_chk_attr(rxe, srq, attr, mask); + if (err) + goto err1; + + err = rxe_srq_from_attr(rxe, srq, attr, mask, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} + +static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct rxe_srq *srq = to_rsrq(ibsrq); + + if (srq->error) + return -EINVAL; + + attr->max_wr = srq->rq.queue->buf->index_mask; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; +} + +static int rxe_destroy_srq(struct ib_srq *ibsrq) +{ + struct rxe_srq *srq = to_rsrq(ibsrq); + + if (srq->rq.queue) + rxe_queue_cleanup(srq->rq.queue); + + rxe_drop_ref(srq->pd); + rxe_drop_index(srq); + rxe_drop_ref(srq); + + return 0; +} + +static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + unsigned long flags; + struct rxe_srq *srq = to_rsrq(ibsrq); + + spin_lock_irqsave(&srq->rq.producer_lock, flags); + + while (wr) { + err = post_one_recv(&srq->rq, wr); + if (unlikely(err)) + break; + wr = wr->next; + } + + spin_unlock_irqrestore(&srq->rq.producer_lock, flags); + + if (err) + *bad_wr = wr; + + return err; +} + +static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_qp *qp; + + err = rxe_qp_chk_init(rxe, init); + if (err) + goto err1; + + qp = rxe_alloc(&rxe->qp_pool); + if (!qp) { + err = -ENOMEM; + goto err1; + } + + if (udata) { + if (udata->inlen) { + err = -EINVAL; + goto err1; + } + qp->is_user = 1; + } + + rxe_add_index(qp); + + err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); + if (err) + goto err2; + + return &qp->ibqp; + +err2: + rxe_drop_index(qp); + rxe_drop_ref(qp); +err1: + return ERR_PTR(err); +} + +static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + + err = rxe_qp_chk_attr(rxe, qp, attr, mask); + if (err) + goto err1; + + err = rxe_qp_from_attr(qp, attr, mask, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} + +static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_qp_init_attr *init) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + rxe_qp_to_init(qp, init); + rxe_qp_to_attr(qp, attr, mask); + + return 0; +} + +static int rxe_destroy_qp(struct ib_qp *ibqp) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + rxe_qp_destroy(qp); + rxe_drop_index(qp); + rxe_drop_ref(qp); + return 0; +} + +static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, + unsigned int mask, unsigned int length) +{ + int num_sge = ibwr->num_sge; + struct rxe_sq *sq = &qp->sq; + + if (unlikely(num_sge > sq->max_sge)) + goto err1; + + if (unlikely(mask & WR_ATOMIC_MASK)) { + if (length < 8) + goto err1; + + if (atomic_wr(ibwr)->remote_addr & 0x7) + goto err1; + } + + if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && + (length > sq->max_inline))) + goto err1; + + return 0; + +err1: + return -EINVAL; +} + +static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, + struct ib_send_wr *ibwr) +{ + wr->wr_id = ibwr->wr_id; + wr->num_sge = ibwr->num_sge; + wr->opcode = ibwr->opcode; + wr->send_flags = ibwr->send_flags; + + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) { + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; + wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; + if (qp_type(qp) == IB_QPT_GSI) + wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; + if (wr->opcode == IB_WR_SEND_WITH_IMM) + wr->ex.imm_data = ibwr->ex.imm_data; + } else { + switch (wr->opcode) { + case IB_WR_RDMA_WRITE_WITH_IMM: + wr->ex.imm_data = ibwr->ex.imm_data; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; + wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; + break; + case IB_WR_SEND_WITH_IMM: + wr->ex.imm_data = ibwr->ex.imm_data; + break; + case IB_WR_SEND_WITH_INV: + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + wr->wr.atomic.remote_addr = + atomic_wr(ibwr)->remote_addr; + wr->wr.atomic.compare_add = + atomic_wr(ibwr)->compare_add; + wr->wr.atomic.swap = atomic_wr(ibwr)->swap; + wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; + break; + case IB_WR_LOCAL_INV: + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; + break; + case IB_WR_REG_MR: + wr->wr.reg.mr = reg_wr(ibwr)->mr; + wr->wr.reg.key = reg_wr(ibwr)->key; + wr->wr.reg.access = reg_wr(ibwr)->access; + break; + default: + break; + } + } +} + +static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, + unsigned int mask, unsigned int length, + struct rxe_send_wqe *wqe) +{ + int num_sge = ibwr->num_sge; + struct ib_sge *sge; + int i; + u8 *p; + + init_send_wr(qp, &wqe->wr, ibwr); + + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) + memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); + + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { + p = wqe->dma.inline_data; + + sge = ibwr->sg_list; + for (i = 0; i < num_sge; i++, sge++) { + if (qp->is_user && copy_from_user(p, (__user void *) + (uintptr_t)sge->addr, sge->length)) + return -EFAULT; + + else if (!qp->is_user) + memcpy(p, (void *)(uintptr_t)sge->addr, + sge->length); + + p += sge->length; + } + } else if (mask & WR_REG_MASK) { + wqe->mask = mask; + wqe->state = wqe_state_posted; + return 0; + } else + memcpy(wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + + wqe->iova = (mask & WR_ATOMIC_MASK) ? + atomic_wr(ibwr)->remote_addr : + rdma_wr(ibwr)->remote_addr; + wqe->mask = mask; + wqe->dma.length = length; + wqe->dma.resid = length; + wqe->dma.num_sge = num_sge; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + wqe->state = wqe_state_posted; + wqe->ssn = atomic_add_return(1, &qp->ssn); + + return 0; +} + +static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, + unsigned mask, u32 length) +{ + int err; + struct rxe_sq *sq = &qp->sq; + struct rxe_send_wqe *send_wqe; + unsigned long flags; + + err = validate_send_wr(qp, ibwr, mask, length); + if (err) + return err; + + spin_lock_irqsave(&qp->sq.sq_lock, flags); + + if (unlikely(queue_full(sq->queue))) { + err = -ENOMEM; + goto err1; + } + + send_wqe = producer_addr(sq->queue); + + err = init_send_wqe(qp, ibwr, mask, length, send_wqe); + if (unlikely(err)) + goto err1; + + /* + * make sure all changes to the work queue are + * written before we update the producer pointer + */ + smp_wmb(); + + advance_producer(sq->queue); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + + return 0; + +err1: + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + return err; +} + +static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int err = 0; + struct rxe_qp *qp = to_rqp(ibqp); + unsigned int mask; + unsigned int length = 0; + int i; + int must_sched; + + if (unlikely(!qp->valid)) { + *bad_wr = wr; + return -EINVAL; + } + + if (unlikely(qp->req.state < QP_STATE_READY)) { + *bad_wr = wr; + return -EINVAL; + } + + while (wr) { + mask = wr_opcode_mask(wr->opcode, qp); + if (unlikely(!mask)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely((wr->send_flags & IB_SEND_INLINE) && + !(mask & WR_INLINE_MASK))) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + length = 0; + for (i = 0; i < wr->num_sge; i++) + length += wr->sg_list[i].length; + + err = post_one_send(qp, wr, mask, length); + + if (err) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + + /* + * Must sched in case of GSI QP because ib_send_mad() hold irq lock, + * and the requester call ip_local_out_sk() that takes spin_lock_bh. + */ + must_sched = (qp_type(qp) == IB_QPT_GSI) || + (queue_count(qp->sq.queue) > 1); + + rxe_run_task(&qp->req.task, must_sched); + + return err; +} + +static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_rq *rq = &qp->rq; + unsigned long flags; + + if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { + *bad_wr = wr; + err = -EINVAL; + goto err1; + } + + if (unlikely(qp->srq)) { + *bad_wr = wr; + err = -EINVAL; + goto err1; + } + + spin_lock_irqsave(&rq->producer_lock, flags); + + while (wr) { + err = post_one_recv(rq, wr); + if (unlikely(err)) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + + spin_unlock_irqrestore(&rq->producer_lock, flags); + +err1: + return err; +} + +static struct ib_cq *rxe_create_cq(struct ib_device *dev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_cq *cq; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata); + if (err) + goto err1; + + cq = rxe_alloc(&rxe->cq_pool); + if (!cq) { + err = -ENOMEM; + goto err1; + } + + err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, + context, udata); + if (err) + goto err2; + + return &cq->ibcq; + +err2: + rxe_drop_ref(cq); +err1: + return ERR_PTR(err); +} + +static int rxe_destroy_cq(struct ib_cq *ibcq) +{ + struct rxe_cq *cq = to_rcq(ibcq); + + rxe_drop_ref(cq); + return 0; +} + +static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + int err; + struct rxe_cq *cq = to_rcq(ibcq); + struct rxe_dev *rxe = to_rdev(ibcq->device); + + err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata); + if (err) + goto err1; + + err = rxe_cq_resize_queue(cq, cqe, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} + +static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + int i; + struct rxe_cq *cq = to_rcq(ibcq); + struct rxe_cqe *cqe; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (i = 0; i < num_entries; i++) { + cqe = queue_head(cq->queue); + if (!cqe) + break; + + memcpy(wc++, &cqe->ibwc, sizeof(*wc)); + advance_consumer(cq->queue); + } + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return i; +} + +static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) +{ + struct rxe_cq *cq = to_rcq(ibcq); + int count = queue_count(cq->queue); + + return (count > wc_cnt) ? wc_cnt : count; +} + +static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct rxe_cq *cq = to_rcq(ibcq); + + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = flags & IB_CQ_SOLICITED_MASK; + + return 0; +} + +static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) +{ + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_mem *mr; + int err; + + mr = rxe_alloc(&rxe->mr_pool); + if (!mr) { + err = -ENOMEM; + goto err1; + } + + rxe_add_index(mr); + + rxe_add_ref(pd); + + err = rxe_mem_init_dma(rxe, pd, access, mr); + if (err) + goto err2; + + return &mr->ibmr; + +err2: + rxe_drop_ref(pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); +err1: + return ERR_PTR(err); +} + +static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, + u64 start, + u64 length, + u64 iova, + int access, struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_mem *mr; + + mr = rxe_alloc(&rxe->mr_pool); + if (!mr) { + err = -ENOMEM; + goto err2; + } + + rxe_add_index(mr); + + rxe_add_ref(pd); + + err = rxe_mem_init_user(rxe, pd, start, length, iova, + access, udata, mr); + if (err) + goto err3; + + return &mr->ibmr; + +err3: + rxe_drop_ref(pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); +err2: + return ERR_PTR(err); +} + +static int rxe_dereg_mr(struct ib_mr *ibmr) +{ + struct rxe_mem *mr = to_rmr(ibmr); + + mr->state = RXE_MEM_STATE_ZOMBIE; + rxe_drop_ref(mr->pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); + return 0; +} + +static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_mem *mr; + int err; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = rxe_alloc(&rxe->mr_pool); + if (!mr) { + err = -ENOMEM; + goto err1; + } + + rxe_add_index(mr); + + rxe_add_ref(pd); + + err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr); + if (err) + goto err2; + + return &mr->ibmr; + +err2: + rxe_drop_ref(pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); +err1: + return ERR_PTR(err); +} + +static int rxe_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rxe_mem *mr = to_rmr(ibmr); + struct rxe_map *map; + struct rxe_phys_buf *buf; + + if (unlikely(mr->nbuf == mr->num_buf)) + return -ENOMEM; + + map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; + buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; + + buf->addr = addr; + buf->size = ibmr->page_size; + mr->nbuf++; + + return 0; +} + +static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct rxe_mem *mr = to_rmr(ibmr); + int n; + + mr->nbuf = 0; + + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); + + mr->va = ibmr->iova; + mr->iova = ibmr->iova; + mr->length = ibmr->length; + mr->page_shift = ilog2(ibmr->page_size); + mr->page_mask = ibmr->page_size - 1; + mr->offset = mr->iova & mr->page_mask; + + return n; +} + +static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mc_grp *grp; + + /* takes a ref on grp if successful */ + err = rxe_mcast_get_grp(rxe, mgid, &grp); + if (err) + return err; + + err = rxe_mcast_add_grp_elem(rxe, qp, grp); + + rxe_drop_ref(grp); + return err; +} + +static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) +{ + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + + return rxe_mcast_drop_grp_elem(rxe, qp, mgid); +} + +static ssize_t rxe_show_parent(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct rxe_dev *rxe = container_of(device, struct rxe_dev, + ib_dev.dev); + char *name; + + name = rxe->ifc_ops->parent_name(rxe, 1); + return snprintf(buf, 16, "%s\n", name); +} + +static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); + +static struct device_attribute *rxe_dev_attributes[] = { + &dev_attr_parent, +}; + +int rxe_register_device(struct rxe_dev *rxe) +{ + int err; + int i; + struct ib_device *dev = &rxe->ib_dev; + + strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); + strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); + + dev->owner = THIS_MODULE; + dev->node_type = RDMA_NODE_IB_CA; + dev->phys_port_cnt = 1; + dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; + dev->dma_device = rxe->ifc_ops->dma_device(rxe); + dev->local_dma_lkey = 0; + dev->node_guid = rxe->ifc_ops->node_guid(rxe); + dev->dma_ops = &rxe_dma_mapping_ops; + + dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; + dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) + | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) + | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) + | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) + | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) + | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) + | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) + | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) + ; + + dev->query_device = rxe_query_device; + dev->modify_device = rxe_modify_device; + dev->query_port = rxe_query_port; + dev->modify_port = rxe_modify_port; + dev->get_link_layer = rxe_get_link_layer; + dev->query_gid = rxe_query_gid; + dev->get_netdev = rxe_get_netdev; + dev->add_gid = rxe_add_gid; + dev->del_gid = rxe_del_gid; + dev->query_pkey = rxe_query_pkey; + dev->alloc_ucontext = rxe_alloc_ucontext; + dev->dealloc_ucontext = rxe_dealloc_ucontext; + dev->mmap = rxe_mmap; + dev->get_port_immutable = rxe_port_immutable; + dev->alloc_pd = rxe_alloc_pd; + dev->dealloc_pd = rxe_dealloc_pd; + dev->create_ah = rxe_create_ah; + dev->modify_ah = rxe_modify_ah; + dev->query_ah = rxe_query_ah; + dev->destroy_ah = rxe_destroy_ah; + dev->create_srq = rxe_create_srq; + dev->modify_srq = rxe_modify_srq; + dev->query_srq = rxe_query_srq; + dev->destroy_srq = rxe_destroy_srq; + dev->post_srq_recv = rxe_post_srq_recv; + dev->create_qp = rxe_create_qp; + dev->modify_qp = rxe_modify_qp; + dev->query_qp = rxe_query_qp; + dev->destroy_qp = rxe_destroy_qp; + dev->post_send = rxe_post_send; + dev->post_recv = rxe_post_recv; + dev->create_cq = rxe_create_cq; + dev->destroy_cq = rxe_destroy_cq; + dev->resize_cq = rxe_resize_cq; + dev->poll_cq = rxe_poll_cq; + dev->peek_cq = rxe_peek_cq; + dev->req_notify_cq = rxe_req_notify_cq; + dev->get_dma_mr = rxe_get_dma_mr; + dev->reg_user_mr = rxe_reg_user_mr; + dev->dereg_mr = rxe_dereg_mr; + dev->alloc_mr = rxe_alloc_mr; + dev->map_mr_sg = rxe_map_mr_sg; + dev->attach_mcast = rxe_attach_mcast; + dev->detach_mcast = rxe_detach_mcast; + + err = ib_register_device(dev, NULL); + if (err) { + pr_warn("rxe_register_device failed, err = %d\n", err); + goto err1; + } + + for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { + err = device_create_file(&dev->dev, rxe_dev_attributes[i]); + if (err) { + pr_warn("device_create_file failed, i = %d, err = %d\n", + i, err); + goto err2; + } + } + + return 0; + +err2: + ib_unregister_device(dev); +err1: + return err; +} + +int rxe_unregister_device(struct rxe_dev *rxe) +{ + int i; + struct ib_device *dev = &rxe->ib_dev; + + for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) + device_remove_file(&dev->dev, rxe_dev_attributes[i]); + + ib_unregister_device(dev); + + return 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h new file mode 100644 index 000000000000..cac1d52a08f0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_VERBS_H +#define RXE_VERBS_H + +#include <linux/interrupt.h> +#include <rdma/rdma_user_rxe.h> +#include "rxe_pool.h" +#include "rxe_task.h" + +static inline int pkey_match(u16 key1, u16 key2) +{ + return (((key1 & 0x7fff) != 0) && + ((key1 & 0x7fff) == (key2 & 0x7fff)) && + ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0; +} + +/* Return >0 if psn_a > psn_b + * 0 if psn_a == psn_b + * <0 if psn_a < psn_b + */ +static inline int psn_compare(u32 psn_a, u32 psn_b) +{ + s32 diff; + + diff = (psn_a - psn_b) << 8; + return diff; +} + +struct rxe_ucontext { + struct rxe_pool_entry pelem; + struct ib_ucontext ibuc; +}; + +struct rxe_pd { + struct rxe_pool_entry pelem; + struct ib_pd ibpd; +}; + +struct rxe_ah { + struct rxe_pool_entry pelem; + struct ib_ah ibah; + struct rxe_pd *pd; + struct rxe_av av; +}; + +struct rxe_cqe { + union { + struct ib_wc ibwc; + struct ib_uverbs_wc uibwc; + }; +}; + +struct rxe_cq { + struct rxe_pool_entry pelem; + struct ib_cq ibcq; + struct rxe_queue *queue; + spinlock_t cq_lock; + u8 notify; + int is_user; + struct tasklet_struct comp_task; +}; + +enum wqe_state { + wqe_state_posted, + wqe_state_processing, + wqe_state_pending, + wqe_state_done, + wqe_state_error, +}; + +struct rxe_sq { + int max_wr; + int max_sge; + int max_inline; + spinlock_t sq_lock; /* guard queue */ + struct rxe_queue *queue; +}; + +struct rxe_rq { + int max_wr; + int max_sge; + spinlock_t producer_lock; /* guard queue producer */ + spinlock_t consumer_lock; /* guard queue consumer */ + struct rxe_queue *queue; +}; + +struct rxe_srq { + struct rxe_pool_entry pelem; + struct ib_srq ibsrq; + struct rxe_pd *pd; + struct rxe_rq rq; + u32 srq_num; + + int limit; + int error; +}; + +enum rxe_qp_state { + QP_STATE_RESET, + QP_STATE_INIT, + QP_STATE_READY, + QP_STATE_DRAIN, /* req only */ + QP_STATE_DRAINED, /* req only */ + QP_STATE_ERROR +}; + +extern char *rxe_qp_state_name[]; + +struct rxe_req_info { + enum rxe_qp_state state; + int wqe_index; + u32 psn; + int opcode; + atomic_t rd_atomic; + int wait_fence; + int need_rd_atomic; + int wait_psn; + int need_retry; + int noack_pkts; + struct rxe_task task; +}; + +struct rxe_comp_info { + u32 psn; + int opcode; + int timeout; + int timeout_retry; + u32 retry_cnt; + u32 rnr_retry; + struct rxe_task task; +}; + +enum rdatm_res_state { + rdatm_res_state_next, + rdatm_res_state_new, + rdatm_res_state_replay, +}; + +struct resp_res { + int type; + u32 first_psn; + u32 last_psn; + u32 cur_psn; + enum rdatm_res_state state; + + union { + struct { + struct sk_buff *skb; + } atomic; + struct { + struct rxe_mem *mr; + u64 va_org; + u32 rkey; + u32 length; + u64 va; + u32 resid; + } read; + }; +}; + +struct rxe_resp_info { + enum rxe_qp_state state; + u32 msn; + u32 psn; + int opcode; + int drop_msg; + int goto_error; + int sent_psn_nak; + enum ib_wc_status status; + u8 aeth_syndrome; + + /* Receive only */ + struct rxe_recv_wqe *wqe; + + /* RDMA read / atomic only */ + u64 va; + struct rxe_mem *mr; + u32 resid; + u32 rkey; + u64 atomic_orig; + + /* SRQ only */ + struct { + struct rxe_recv_wqe wqe; + struct ib_sge sge[RXE_MAX_SGE]; + } srq_wqe; + + /* Responder resources. It's a circular list where the oldest + * resource is dropped first. + */ + struct resp_res *resources; + unsigned int res_head; + unsigned int res_tail; + struct resp_res *res; + struct rxe_task task; +}; + +struct rxe_qp { + struct rxe_pool_entry pelem; + struct ib_qp ibqp; + struct ib_qp_attr attr; + unsigned int valid; + unsigned int mtu; + int is_user; + + struct rxe_pd *pd; + struct rxe_srq *srq; + struct rxe_cq *scq; + struct rxe_cq *rcq; + + enum ib_sig_type sq_sig_type; + + struct rxe_sq sq; + struct rxe_rq rq; + + struct socket *sk; + + struct rxe_av pri_av; + struct rxe_av alt_av; + + /* list of mcast groups qp has joined (for cleanup) */ + struct list_head grp_list; + spinlock_t grp_lock; /* guard grp_list */ + + struct sk_buff_head req_pkts; + struct sk_buff_head resp_pkts; + struct sk_buff_head send_pkts; + + struct rxe_req_info req; + struct rxe_comp_info comp; + struct rxe_resp_info resp; + + atomic_t ssn; + atomic_t skb_out; + int need_req_skb; + + /* Timer for retranmitting packet when ACKs have been lost. RC + * only. The requester sets it when it is not already + * started. The responder resets it whenever an ack is + * received. + */ + struct timer_list retrans_timer; + u64 qp_timeout_jiffies; + + /* Timer for handling RNR NAKS. */ + struct timer_list rnr_nak_timer; + + spinlock_t state_lock; /* guard requester and completer */ +}; + +enum rxe_mem_state { + RXE_MEM_STATE_ZOMBIE, + RXE_MEM_STATE_INVALID, + RXE_MEM_STATE_FREE, + RXE_MEM_STATE_VALID, +}; + +enum rxe_mem_type { + RXE_MEM_TYPE_NONE, + RXE_MEM_TYPE_DMA, + RXE_MEM_TYPE_MR, + RXE_MEM_TYPE_FMR, + RXE_MEM_TYPE_MW, +}; + +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) + +struct rxe_phys_buf { + u64 addr; + u64 size; +}; + +struct rxe_map { + struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; +}; + +struct rxe_mem { + struct rxe_pool_entry pelem; + union { + struct ib_mr ibmr; + struct ib_mw ibmw; + }; + + struct rxe_pd *pd; + struct ib_umem *umem; + + u32 lkey; + u32 rkey; + + enum rxe_mem_state state; + enum rxe_mem_type type; + u64 va; + u64 iova; + size_t length; + u32 offset; + int access; + + int page_shift; + int page_mask; + int map_shift; + int map_mask; + + u32 num_buf; + u32 nbuf; + + u32 max_buf; + u32 num_map; + + struct rxe_map **map; +}; + +struct rxe_mc_grp { + struct rxe_pool_entry pelem; + spinlock_t mcg_lock; /* guard group */ + struct rxe_dev *rxe; + struct list_head qp_list; + union ib_gid mgid; + int num_qp; + u32 qkey; + u16 pkey; +}; + +struct rxe_mc_elem { + struct rxe_pool_entry pelem; + struct list_head qp_list; + struct list_head grp_list; + struct rxe_qp *qp; + struct rxe_mc_grp *grp; +}; + +struct rxe_port { + struct ib_port_attr attr; + u16 *pkey_tbl; + __be64 port_guid; + __be64 subnet_prefix; + spinlock_t port_lock; /* guard port */ + unsigned int mtu_cap; + /* special QPs */ + u32 qp_smi_index; + u32 qp_gsi_index; +}; + +/* callbacks from rdma_rxe to network interface layer */ +struct rxe_ifc_ops { + void (*release)(struct rxe_dev *rxe); + __be64 (*node_guid)(struct rxe_dev *rxe); + __be64 (*port_guid)(struct rxe_dev *rxe); + struct device *(*dma_device)(struct rxe_dev *rxe); + int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid); + int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid); + int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 *crc); + int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb); + int (*loopback)(struct sk_buff *skb); + struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av, + int paylen, struct rxe_pkt_info *pkt); + char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num); + enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe, + unsigned int port_num); +}; + +struct rxe_dev { + struct ib_device ib_dev; + struct ib_device_attr attr; + int max_ucontext; + int max_inline_data; + struct kref ref_cnt; + struct mutex usdev_lock; + + struct rxe_ifc_ops *ifc_ops; + + struct net_device *ndev; + + int xmit_errors; + + struct rxe_pool uc_pool; + struct rxe_pool pd_pool; + struct rxe_pool ah_pool; + struct rxe_pool srq_pool; + struct rxe_pool qp_pool; + struct rxe_pool cq_pool; + struct rxe_pool mr_pool; + struct rxe_pool mw_pool; + struct rxe_pool mc_grp_pool; + struct rxe_pool mc_elem_pool; + + spinlock_t pending_lock; /* guard pending_mmaps */ + struct list_head pending_mmaps; + + spinlock_t mmap_offset_lock; /* guard mmap_offset */ + int mmap_offset; + + struct rxe_port port; + struct list_head list; +}; + +static inline struct rxe_dev *to_rdev(struct ib_device *dev) +{ + return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL; +} + +static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc) +{ + return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL; +} + +static inline struct rxe_pd *to_rpd(struct ib_pd *pd) +{ + return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL; +} + +static inline struct rxe_ah *to_rah(struct ib_ah *ah) +{ + return ah ? container_of(ah, struct rxe_ah, ibah) : NULL; +} + +static inline struct rxe_srq *to_rsrq(struct ib_srq *srq) +{ + return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL; +} + +static inline struct rxe_qp *to_rqp(struct ib_qp *qp) +{ + return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL; +} + +static inline struct rxe_cq *to_rcq(struct ib_cq *cq) +{ + return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL; +} + +static inline struct rxe_mem *to_rmr(struct ib_mr *mr) +{ + return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL; +} + +static inline struct rxe_mem *to_rmw(struct ib_mw *mw) +{ + return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL; +} + +int rxe_register_device(struct rxe_dev *rxe); +int rxe_unregister_device(struct rxe_dev *rxe); + +void rxe_mc_cleanup(void *arg); + +#endif /* RXE_VERBS_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 1502199c8e56..7b6d40ff1acf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -62,10 +62,8 @@ static void ipoib_get_drvinfo(struct net_device *netdev, { struct ipoib_dev_priv *priv = netdev_priv(netdev); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32), - (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff, - (int)priv->ca->attrs.fw_ver & 0xffff); + ib_get_device_fw_str(priv->ca, drvinfo->fw_version, + sizeof(drvinfo->fw_version)); strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), sizeof(drvinfo->bus_info)); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5f58c41ef787..74bcaa064226 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1967,8 +1967,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) priv->hca_caps = hca->attrs.device_cap_flags; if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { - priv->dev->hw_features = NETIF_F_SG | - NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; if (priv->hca_caps & IB_DEVICE_UD_TSO) priv->dev->hw_features |= NETIF_F_TSO; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 1e7cbbaa15bd..c55ecb2c3736 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -135,7 +135,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, - .max_send_sge = 1, + .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, + MAX_SKB_FRAGS + 1), .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_ALL_WR, @@ -205,10 +206,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; - if (dev->features & NETIF_F_SG) - init_attr.cap.max_send_sge = - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); - priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); @@ -234,6 +231,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge; + if (init_attr.cap.max_send_sge > 1) + dev->features |= NETIF_F_SG; + priv->max_send_sge = init_attr.cap.max_send_sge; return 0; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a990c04208c9..ba6be060a476 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -137,8 +137,6 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; attr.cap.max_send_sge = device->ib_device->attrs.max_sge; - isert_conn->max_sge = min(device->ib_device->attrs.max_sge, - device->ib_device->attrs.max_sge_rd); attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index e512ba941f2f..fc791efe3a10 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -138,7 +138,6 @@ struct isert_conn { u32 responder_resources; u32 initiator_depth; bool pi_support; - u32 max_sge; struct iser_rx_desc *login_req_buf; char *login_rsp_buf; u64 login_req_dma; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4a4155640d51..dfa23b075a88 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1601,6 +1601,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct ib_qp_init_attr *qp_init; struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; + const struct ib_device_attr *attrs = &sdev->device->attrs; u32 srp_sq_size = sport->port_attrib.srp_sq_size; int ret; @@ -1638,7 +1639,7 @@ retry: */ qp_init->cap.max_send_wr = srp_sq_size / 2; qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; - qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; + qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; ch->qp = ib_create_qp(sdev->pd, qp_init); @@ -2261,7 +2262,7 @@ static void srpt_queue_response(struct se_cmd *cmd) container_of(cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; struct srpt_device *sdev = ch->sport->sdev; - struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr; + struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; struct ib_sge sge; enum srpt_command_state state; unsigned long flags; @@ -2302,11 +2303,8 @@ static void srpt_queue_response(struct se_cmd *cmd) struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, - ch->sport->port, NULL, - first_wr ? first_wr : &send_wr); + ch->sport->port, NULL, first_wr); } - } else { - first_wr = &send_wr; } if (state != SRPT_STATE_MGMT) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 389030487da7..581878782854 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -106,7 +106,11 @@ enum { SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, SRPT_DEF_SG_TABLESIZE = 128, - SRPT_DEF_SG_PER_WQE = 16, + /* + * An experimentally determined value that avoids that QP creation + * fails due to "swiotlb buffer is full" on systems using the swiotlb. + */ + SRPT_MAX_SG_PER_WQE = 16, MIN_SRPT_SQ_SIZE = 16, DEF_SRPT_SQ_SIZE = 4096, diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index a529a4535457..83af17ad0f1f 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -115,6 +115,10 @@ static bool sticks_to_null; module_param(sticks_to_null, bool, S_IRUGO); MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads"); +static bool auto_poweroff = true; +module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend"); + static const struct xpad_device { u16 idVendor; u16 idProduct; @@ -1248,6 +1252,36 @@ static void xpad_stop_input(struct usb_xpad *xpad) usb_kill_urb(xpad->irq_in); } +static void xpad360w_poweroff_controller(struct usb_xpad *xpad) +{ + unsigned long flags; + struct xpad_output_packet *packet = + &xpad->out_packets[XPAD_OUT_CMD_IDX]; + + spin_lock_irqsave(&xpad->odata_lock, flags); + + packet->data[0] = 0x00; + packet->data[1] = 0x00; + packet->data[2] = 0x08; + packet->data[3] = 0xC0; + packet->data[4] = 0x00; + packet->data[5] = 0x00; + packet->data[6] = 0x00; + packet->data[7] = 0x00; + packet->data[8] = 0x00; + packet->data[9] = 0x00; + packet->data[10] = 0x00; + packet->data[11] = 0x00; + packet->len = 12; + packet->pending = true; + + /* Reset the sequence so we send out poweroff now */ + xpad->last_out_packet = -1; + xpad_try_sending_next_out_packet(xpad); + + spin_unlock_irqrestore(&xpad->odata_lock, flags); +} + static int xpad360w_start_input(struct usb_xpad *xpad) { int error; @@ -1590,6 +1624,15 @@ static int xpad_suspend(struct usb_interface *intf, pm_message_t message) * or goes away. */ xpad360w_stop_input(xpad); + + /* + * The wireless adapter is going off now, so the + * gamepads are going to become disconnected. + * Unless explicitly disabled, power them down + * so they don't just sit there flashing. + */ + if (auto_poweroff && xpad->pad_present) + xpad360w_poweroff_controller(xpad); } else { mutex_lock(&input->mutex); if (input->users) diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index b01966dc7eb3..4b0878f35471 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -186,7 +186,7 @@ static irqreturn_t cros_ec_keyb_irq(int irq, void *data) if (ret >= 0) cros_ec_keyb_process(ckdev, kb_state, ret); else - dev_err(ec->dev, "failed to get keyboard state: %d\n", ret); + dev_err(ckdev->dev, "failed to get keyboard state: %d\n", ret); return IRQ_HANDLED; } @@ -236,7 +236,7 @@ static void cros_ec_keyb_compute_valid_keys(struct cros_ec_keyb *ckdev) static int cros_ec_keyb_probe(struct platform_device *pdev) { struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent); - struct device *dev = ec->dev; + struct device *dev = &pdev->dev; struct cros_ec_keyb *ckdev; struct input_dev *idev; struct device_node *np; @@ -246,23 +246,22 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) if (!np) return -ENODEV; - ckdev = devm_kzalloc(&pdev->dev, sizeof(*ckdev), GFP_KERNEL); + ckdev = devm_kzalloc(dev, sizeof(*ckdev), GFP_KERNEL); if (!ckdev) return -ENOMEM; - err = matrix_keypad_parse_of_params(&pdev->dev, &ckdev->rows, - &ckdev->cols); + err = matrix_keypad_parse_of_params(dev, &ckdev->rows, &ckdev->cols); if (err) return err; - ckdev->valid_keys = devm_kzalloc(&pdev->dev, ckdev->cols, GFP_KERNEL); + ckdev->valid_keys = devm_kzalloc(dev, ckdev->cols, GFP_KERNEL); if (!ckdev->valid_keys) return -ENOMEM; - ckdev->old_kb_state = devm_kzalloc(&pdev->dev, ckdev->cols, GFP_KERNEL); + ckdev->old_kb_state = devm_kzalloc(dev, ckdev->cols, GFP_KERNEL); if (!ckdev->old_kb_state) return -ENOMEM; - idev = devm_input_allocate_device(&pdev->dev); + idev = devm_input_allocate_device(dev); if (!idev) return -ENOMEM; @@ -273,7 +272,7 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) ckdev->ec = ec; ckdev->dev = dev; - dev_set_drvdata(&pdev->dev, ckdev); + dev_set_drvdata(dev, ckdev); idev->name = CROS_EC_DEV_NAME; idev->phys = ec->phys_name; @@ -282,7 +281,7 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) idev->id.bustype = BUS_VIRTUAL; idev->id.version = 1; idev->id.product = 0; - idev->dev.parent = &pdev->dev; + idev->dev.parent = dev; idev->open = cros_ec_keyb_open; idev->close = cros_ec_keyb_close; diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c index c7fc8d4fb080..1588aecafff7 100644 --- a/drivers/input/misc/rotary_encoder.c +++ b/drivers/input/misc/rotary_encoder.c @@ -28,6 +28,11 @@ #define DRV_NAME "rotary-encoder" +enum rotary_encoder_encoding { + ROTENC_GRAY, + ROTENC_BINARY, +}; + struct rotary_encoder { struct input_dev *input; @@ -37,6 +42,7 @@ struct rotary_encoder { u32 axis; bool relative_axis; bool rollover; + enum rotary_encoder_encoding encoding; unsigned int pos; @@ -57,8 +63,9 @@ static unsigned int rotary_encoder_get_state(struct rotary_encoder *encoder) for (i = 0; i < encoder->gpios->ndescs; ++i) { int val = gpiod_get_value_cansleep(encoder->gpios->desc[i]); + /* convert from gray encoding to normal */ - if (ret & 1) + if (encoder->encoding == ROTENC_GRAY && ret & 1) val = !val; ret = ret << 1 | val; @@ -213,6 +220,20 @@ static int rotary_encoder_probe(struct platform_device *pdev) encoder->rollover = device_property_read_bool(dev, "rotary-encoder,rollover"); + if (!device_property_present(dev, "rotary-encoder,encoding") || + !device_property_match_string(dev, "rotary-encoder,encoding", + "gray")) { + dev_info(dev, "gray"); + encoder->encoding = ROTENC_GRAY; + } else if (!device_property_match_string(dev, "rotary-encoder,encoding", + "binary")) { + dev_info(dev, "binary"); + encoder->encoding = ROTENC_BINARY; + } else { + dev_err(dev, "unknown encoding setting\n"); + return -EINVAL; + } + device_property_read_u32(dev, "linux,axis", &encoder->axis); encoder->relative_axis = device_property_read_bool(dev, "rotary-encoder,relative-axis"); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 2f589857a039..d15b33813021 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -4,7 +4,8 @@ * Copyright (c) 2013 ELAN Microelectronics Corp. * * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw> - * Version: 1.6.0 + * Author: KT Liao <kt.liao@emc.com.tw> + * Version: 1.6.2 * * Based on cyapa driver: * copyright (c) 2011-2012 Cypress Semiconductor, Inc. @@ -40,7 +41,7 @@ #include "elan_i2c.h" #define DRIVER_NAME "elan_i2c" -#define ELAN_DRIVER_VERSION "1.6.1" +#define ELAN_DRIVER_VERSION "1.6.2" #define ELAN_VENDOR_ID 0x04f3 #define ETP_MAX_PRESSURE 255 #define ETP_FWIDTH_REDUCE 90 @@ -199,9 +200,41 @@ static int elan_sleep(struct elan_tp_data *data) return error; } +static int elan_query_product(struct elan_tp_data *data) +{ + int error; + + error = data->ops->get_product_id(data->client, &data->product_id); + if (error) + return error; + + error = data->ops->get_sm_version(data->client, &data->ic_type, + &data->sm_version); + if (error) + return error; + + return 0; +} + +static int elan_check_ASUS_special_fw(struct elan_tp_data *data) +{ + if (data->ic_type != 0x0E) + return false; + + switch (data->product_id) { + case 0x05 ... 0x07: + case 0x09: + case 0x13: + return true; + default: + return false; + } +} + static int __elan_initialize(struct elan_tp_data *data) { struct i2c_client *client = data->client; + bool woken_up = false; int error; error = data->ops->initialize(client); @@ -210,6 +243,27 @@ static int __elan_initialize(struct elan_tp_data *data) return error; } + error = elan_query_product(data); + if (error) + return error; + + /* + * Some ASUS devices were shipped with firmware that requires + * touchpads to be woken up first, before attempting to switch + * them into absolute reporting mode. + */ + if (elan_check_ASUS_special_fw(data)) { + error = data->ops->sleep_control(client, false); + if (error) { + dev_err(&client->dev, + "failed to wake device up: %d\n", error); + return error; + } + + msleep(200); + woken_up = true; + } + data->mode |= ETP_ENABLE_ABS; error = data->ops->set_mode(client, data->mode); if (error) { @@ -218,11 +272,13 @@ static int __elan_initialize(struct elan_tp_data *data) return error; } - error = data->ops->sleep_control(client, false); - if (error) { - dev_err(&client->dev, - "failed to wake device up: %d\n", error); - return error; + if (!woken_up) { + error = data->ops->sleep_control(client, false); + if (error) { + dev_err(&client->dev, + "failed to wake device up: %d\n", error); + return error; + } } return 0; @@ -248,10 +304,6 @@ static int elan_query_device_info(struct elan_tp_data *data) { int error; - error = data->ops->get_product_id(data->client, &data->product_id); - if (error) - return error; - error = data->ops->get_version(data->client, false, &data->fw_version); if (error) return error; @@ -261,11 +313,6 @@ static int elan_query_device_info(struct elan_tp_data *data) if (error) return error; - error = data->ops->get_sm_version(data->client, &data->ic_type, - &data->sm_version); - if (error) - return error; - error = data->ops->get_version(data->client, true, &data->iap_version); if (error) return error; diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 615d23ec0d8e..08e252a42480 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -222,12 +222,8 @@ static int elantech_write_reg(struct psmouse *psmouse, unsigned char reg, */ static void elantech_packet_dump(struct psmouse *psmouse) { - int i; - - psmouse_printk(KERN_DEBUG, psmouse, "PS/2 packet ["); - for (i = 0; i < psmouse->pktsize; i++) - printk("%s0x%02x ", i ? ", " : " ", psmouse->packet[i]); - printk("]\n"); + psmouse_printk(KERN_DEBUG, psmouse, "PS/2 packet [%*ph]\n", + psmouse->pktsize, psmouse->packet); } /* diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index 253df96be427..a73580654c6b 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -232,10 +232,7 @@ err_put_device: void rmi_unregister_function(struct rmi_function *fn) { device_del(&fn->dev); - - if (fn->dev.of_node) - of_node_put(fn->dev.of_node); - + of_node_put(fn->dev.of_node); put_device(&fn->dev); } diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 454195709a82..b4d34086e73f 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1277,6 +1277,7 @@ static int __init i8042_create_kbd_port(void) serio->start = i8042_start; serio->stop = i8042_stop; serio->close = i8042_port_close; + serio->ps2_cmd_mutex = &i8042_mutex; serio->port_data = port; serio->dev.parent = &i8042_platform_device->dev; strlcpy(serio->name, "i8042 KBD port", sizeof(serio->name)); @@ -1373,21 +1374,6 @@ static void i8042_unregister_ports(void) } } -/* - * Checks whether port belongs to i8042 controller. - */ -bool i8042_check_port_owner(const struct serio *port) -{ - int i; - - for (i = 0; i < I8042_NUM_PORTS; i++) - if (i8042_ports[i].serio == port) - return true; - - return false; -} -EXPORT_SYMBOL(i8042_check_port_owner); - static void i8042_free_irqs(void) { if (i8042_aux_irq_registered) diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 316f2c897101..83e9c663aa67 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -56,19 +56,17 @@ EXPORT_SYMBOL(ps2_sendbyte); void ps2_begin_command(struct ps2dev *ps2dev) { - mutex_lock(&ps2dev->cmd_mutex); + struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; - if (i8042_check_port_owner(ps2dev->serio)) - i8042_lock_chip(); + mutex_lock(m); } EXPORT_SYMBOL(ps2_begin_command); void ps2_end_command(struct ps2dev *ps2dev) { - if (i8042_check_port_owner(ps2dev->serio)) - i8042_unlock_chip(); + struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; - mutex_unlock(&ps2dev->cmd_mutex); + mutex_unlock(m); } EXPORT_SYMBOL(ps2_end_command); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index ee02dc7422bd..2fb1f430a431 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -1059,6 +1059,31 @@ config TOUCHSCREEN_RM_TS To compile this driver as a module, choose M here: the module will be called raydium_i2c_ts. +config TOUCHSCREEN_SILEAD + tristate "Silead I2C touchscreen" + depends on I2C + help + Say Y here if you have the Silead touchscreen connected to + your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called silead. + +config TOUCHSCREEN_SIS_I2C + tristate "SiS 9200 family I2C touchscreen" + depends on I2C + select CRC_ITU_T + depends on GPIOLIB || COMPILE_TEST + help + This enables support for SiS 9200 family over I2C based touchscreens. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called sis_i2c. + config TOUCHSCREEN_ST1232 tristate "Sitronix ST1232 touchscreen controllers" depends on I2C diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 3315882905f7..b4373d6be402 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -64,6 +64,8 @@ obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o obj-$(CONFIG_TOUCHSCREEN_PIXCIR) += pixcir_i2c_ts.o obj-$(CONFIG_TOUCHSCREEN_RM_TS) += raydium_i2c_ts.o obj-$(CONFIG_TOUCHSCREEN_S3C2410) += s3c2410_ts.o +obj-$(CONFIG_TOUCHSCREEN_SILEAD) += silead.o +obj-$(CONFIG_TOUCHSCREEN_SIS_I2C) += sis_i2c.o obj-$(CONFIG_TOUCHSCREEN_ST1232) += st1232.o obj-$(CONFIG_TOUCHSCREEN_STMPE) += stmpe-ts.o obj-$(CONFIG_TOUCHSCREEN_SUN4I) += sun4i-ts.o diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index ddf694b9fffc..fe4848bd1f4c 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -169,7 +169,7 @@ static ssize_t ili210x_calibrate(struct device *dev, return count; } -static DEVICE_ATTR(calibrate, 0644, NULL, ili210x_calibrate); +static DEVICE_ATTR(calibrate, S_IWUSR, NULL, ili210x_calibrate); static struct attribute *ili210x_attributes[] = { &dev_attr_calibrate.attr, diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c new file mode 100644 index 000000000000..7379fe153cf9 --- /dev/null +++ b/drivers/input/touchscreen/silead.c @@ -0,0 +1,565 @@ +/* ------------------------------------------------------------------------- + * Copyright (C) 2014-2015, Intel Corporation + * + * Derived from: + * gslX68X.c + * Copyright (C) 2010-2015, Shanghai Sileadinc Co.Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * ------------------------------------------------------------------------- + */ + +#include <linux/i2c.h> +#include <linux/module.h> +#include <linux/acpi.h> +#include <linux/interrupt.h> +#include <linux/gpio/consumer.h> +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/input.h> +#include <linux/input/mt.h> +#include <linux/input/touchscreen.h> +#include <linux/pm.h> +#include <linux/irq.h> + +#include <asm/unaligned.h> + +#define SILEAD_TS_NAME "silead_ts" + +#define SILEAD_REG_RESET 0xE0 +#define SILEAD_REG_DATA 0x80 +#define SILEAD_REG_TOUCH_NR 0x80 +#define SILEAD_REG_POWER 0xBC +#define SILEAD_REG_CLOCK 0xE4 +#define SILEAD_REG_STATUS 0xB0 +#define SILEAD_REG_ID 0xFC +#define SILEAD_REG_MEM_CHECK 0xB0 + +#define SILEAD_STATUS_OK 0x5A5A5A5A +#define SILEAD_TS_DATA_LEN 44 +#define SILEAD_CLOCK 0x04 + +#define SILEAD_CMD_RESET 0x88 +#define SILEAD_CMD_START 0x00 + +#define SILEAD_POINT_DATA_LEN 0x04 +#define SILEAD_POINT_Y_OFF 0x00 +#define SILEAD_POINT_Y_MSB_OFF 0x01 +#define SILEAD_POINT_X_OFF 0x02 +#define SILEAD_POINT_X_MSB_OFF 0x03 +#define SILEAD_TOUCH_ID_MASK 0xF0 + +#define SILEAD_CMD_SLEEP_MIN 10000 +#define SILEAD_CMD_SLEEP_MAX 20000 +#define SILEAD_POWER_SLEEP 20 +#define SILEAD_STARTUP_SLEEP 30 + +#define SILEAD_MAX_FINGERS 10 + +enum silead_ts_power { + SILEAD_POWER_ON = 1, + SILEAD_POWER_OFF = 0 +}; + +struct silead_ts_data { + struct i2c_client *client; + struct gpio_desc *gpio_power; + struct input_dev *input; + char fw_name[64]; + struct touchscreen_properties prop; + u32 max_fingers; + u32 chip_id; + struct input_mt_pos pos[SILEAD_MAX_FINGERS]; + int slots[SILEAD_MAX_FINGERS]; + int id[SILEAD_MAX_FINGERS]; +}; + +struct silead_fw_data { + u32 offset; + u32 val; +}; + +static int silead_ts_request_input_dev(struct silead_ts_data *data) +{ + struct device *dev = &data->client->dev; + int error; + + data->input = devm_input_allocate_device(dev); + if (!data->input) { + dev_err(dev, + "Failed to allocate input device\n"); + return -ENOMEM; + } + + input_set_abs_params(data->input, ABS_MT_POSITION_X, 0, 4095, 0, 0); + input_set_abs_params(data->input, ABS_MT_POSITION_Y, 0, 4095, 0, 0); + touchscreen_parse_properties(data->input, true, &data->prop); + + input_mt_init_slots(data->input, data->max_fingers, + INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED | + INPUT_MT_TRACK); + + data->input->name = SILEAD_TS_NAME; + data->input->phys = "input/ts"; + data->input->id.bustype = BUS_I2C; + + error = input_register_device(data->input); + if (error) { + dev_err(dev, "Failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +static void silead_ts_set_power(struct i2c_client *client, + enum silead_ts_power state) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + + if (data->gpio_power) { + gpiod_set_value_cansleep(data->gpio_power, state); + msleep(SILEAD_POWER_SLEEP); + } +} + +static void silead_ts_read_data(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + struct input_dev *input = data->input; + struct device *dev = &client->dev; + u8 *bufp, buf[SILEAD_TS_DATA_LEN]; + int touch_nr, error, i; + + error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_DATA, + SILEAD_TS_DATA_LEN, buf); + if (error < 0) { + dev_err(dev, "Data read error %d\n", error); + return; + } + + touch_nr = buf[0]; + if (touch_nr > data->max_fingers) { + dev_warn(dev, "More touches reported then supported %d > %d\n", + touch_nr, data->max_fingers); + touch_nr = data->max_fingers; + } + + bufp = buf + SILEAD_POINT_DATA_LEN; + for (i = 0; i < touch_nr; i++, bufp += SILEAD_POINT_DATA_LEN) { + /* Bits 4-7 are the touch id */ + data->id[i] = (bufp[SILEAD_POINT_X_MSB_OFF] & + SILEAD_TOUCH_ID_MASK) >> 4; + touchscreen_set_mt_pos(&data->pos[i], &data->prop, + get_unaligned_le16(&bufp[SILEAD_POINT_X_OFF]) & 0xfff, + get_unaligned_le16(&bufp[SILEAD_POINT_Y_OFF]) & 0xfff); + } + + input_mt_assign_slots(input, data->slots, data->pos, touch_nr, 0); + + for (i = 0; i < touch_nr; i++) { + input_mt_slot(input, data->slots[i]); + input_mt_report_slot_state(input, MT_TOOL_FINGER, true); + input_report_abs(input, ABS_MT_POSITION_X, data->pos[i].x); + input_report_abs(input, ABS_MT_POSITION_Y, data->pos[i].y); + + dev_dbg(dev, "x=%d y=%d hw_id=%d sw_id=%d\n", data->pos[i].x, + data->pos[i].y, data->id[i], data->slots[i]); + } + + input_mt_sync_frame(input); + input_sync(input); +} + +static int silead_ts_init(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + int error; + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, + SILEAD_CMD_RESET); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_TOUCH_NR, + data->max_fingers); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_CLOCK, + SILEAD_CLOCK); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, + SILEAD_CMD_START); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + return 0; + +i2c_write_err: + dev_err(&client->dev, "Registers clear error %d\n", error); + return error; +} + +static int silead_ts_reset(struct i2c_client *client) +{ + int error; + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, + SILEAD_CMD_RESET); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_CLOCK, + SILEAD_CLOCK); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_POWER, + SILEAD_CMD_START); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + return 0; + +i2c_write_err: + dev_err(&client->dev, "Chip reset error %d\n", error); + return error; +} + +static int silead_ts_startup(struct i2c_client *client) +{ + int error; + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, 0x00); + if (error) { + dev_err(&client->dev, "Startup error %d\n", error); + return error; + } + + msleep(SILEAD_STARTUP_SLEEP); + + return 0; +} + +static int silead_ts_load_fw(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct silead_ts_data *data = i2c_get_clientdata(client); + unsigned int fw_size, i; + const struct firmware *fw; + struct silead_fw_data *fw_data; + int error; + + dev_dbg(dev, "Firmware file name: %s", data->fw_name); + + error = request_firmware(&fw, data->fw_name, dev); + if (error) { + dev_err(dev, "Firmware request error %d\n", error); + return error; + } + + fw_size = fw->size / sizeof(*fw_data); + fw_data = (struct silead_fw_data *)fw->data; + + for (i = 0; i < fw_size; i++) { + error = i2c_smbus_write_i2c_block_data(client, + fw_data[i].offset, + 4, + (u8 *)&fw_data[i].val); + if (error) { + dev_err(dev, "Firmware load error %d\n", error); + break; + } + } + + release_firmware(fw); + return error ?: 0; +} + +static u32 silead_ts_get_status(struct i2c_client *client) +{ + int error; + __le32 status; + + error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_STATUS, + sizeof(status), (u8 *)&status); + if (error < 0) { + dev_err(&client->dev, "Status read error %d\n", error); + return error; + } + + return le32_to_cpu(status); +} + +static int silead_ts_get_id(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + __le32 chip_id; + int error; + + error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_ID, + sizeof(chip_id), (u8 *)&chip_id); + if (error < 0) { + dev_err(&client->dev, "Chip ID read error %d\n", error); + return error; + } + + data->chip_id = le32_to_cpu(chip_id); + dev_info(&client->dev, "Silead chip ID: 0x%8X", data->chip_id); + + return 0; +} + +static int silead_ts_setup(struct i2c_client *client) +{ + int error; + u32 status; + + silead_ts_set_power(client, SILEAD_POWER_OFF); + silead_ts_set_power(client, SILEAD_POWER_ON); + + error = silead_ts_get_id(client); + if (error) + return error; + + error = silead_ts_init(client); + if (error) + return error; + + error = silead_ts_reset(client); + if (error) + return error; + + error = silead_ts_load_fw(client); + if (error) + return error; + + error = silead_ts_startup(client); + if (error) + return error; + + status = silead_ts_get_status(client); + if (status != SILEAD_STATUS_OK) { + dev_err(&client->dev, + "Initialization error, status: 0x%X\n", status); + return -ENODEV; + } + + return 0; +} + +static irqreturn_t silead_ts_threaded_irq_handler(int irq, void *id) +{ + struct silead_ts_data *data = id; + struct i2c_client *client = data->client; + + silead_ts_read_data(client); + + return IRQ_HANDLED; +} + +static void silead_ts_read_props(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + struct device *dev = &client->dev; + const char *str; + int error; + + error = device_property_read_u32(dev, "silead,max-fingers", + &data->max_fingers); + if (error) { + dev_dbg(dev, "Max fingers read error %d\n", error); + data->max_fingers = 5; /* Most devices handle up-to 5 fingers */ + } + + error = device_property_read_string(dev, "touchscreen-fw-name", &str); + if (!error) + snprintf(data->fw_name, sizeof(data->fw_name), "%s", str); + else + dev_dbg(dev, "Firmware file name read error. Using default."); +} + +#ifdef CONFIG_ACPI +static int silead_ts_set_default_fw_name(struct silead_ts_data *data, + const struct i2c_device_id *id) +{ + const struct acpi_device_id *acpi_id; + struct device *dev = &data->client->dev; + int i; + + if (ACPI_HANDLE(dev)) { + acpi_id = acpi_match_device(dev->driver->acpi_match_table, dev); + if (!acpi_id) + return -ENODEV; + + snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", + acpi_id->id); + + for (i = 0; i < strlen(data->fw_name); i++) + data->fw_name[i] = tolower(data->fw_name[i]); + } else { + snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", + id->name); + } + + return 0; +} +#else +static int silead_ts_set_default_fw_name(struct silead_ts_data *data, + const struct i2c_device_id *id) +{ + snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name); + return 0; +} +#endif + +static int silead_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct silead_ts_data *data; + struct device *dev = &client->dev; + int error; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_I2C | + I2C_FUNC_SMBUS_READ_I2C_BLOCK | + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) { + dev_err(dev, "I2C functionality check failed\n"); + return -ENXIO; + } + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + i2c_set_clientdata(client, data); + data->client = client; + + error = silead_ts_set_default_fw_name(data, id); + if (error) + return error; + + silead_ts_read_props(client); + + /* We must have the IRQ provided by DT or ACPI subsytem */ + if (client->irq <= 0) + return -ENODEV; + + /* Power GPIO pin */ + data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); + if (IS_ERR(data->gpio_power)) { + if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER) + dev_err(dev, "Shutdown GPIO request failed\n"); + return PTR_ERR(data->gpio_power); + } + + error = silead_ts_setup(client); + if (error) + return error; + + error = silead_ts_request_input_dev(data); + if (error) + return error; + + error = devm_request_threaded_irq(dev, client->irq, + NULL, silead_ts_threaded_irq_handler, + IRQF_ONESHOT, client->name, data); + if (error) { + if (error != -EPROBE_DEFER) + dev_err(dev, "IRQ request failed %d\n", error); + return error; + } + + return 0; +} + +static int __maybe_unused silead_ts_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + silead_ts_set_power(client, SILEAD_POWER_OFF); + return 0; +} + +static int __maybe_unused silead_ts_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int error, status; + + silead_ts_set_power(client, SILEAD_POWER_ON); + + error = silead_ts_reset(client); + if (error) + return error; + + error = silead_ts_startup(client); + if (error) + return error; + + status = silead_ts_get_status(client); + if (status != SILEAD_STATUS_OK) { + dev_err(dev, "Resume error, status: 0x%02x\n", status); + return -ENODEV; + } + + return 0; +} + +static SIMPLE_DEV_PM_OPS(silead_ts_pm, silead_ts_suspend, silead_ts_resume); + +static const struct i2c_device_id silead_ts_id[] = { + { "gsl1680", 0 }, + { "gsl1688", 0 }, + { "gsl3670", 0 }, + { "gsl3675", 0 }, + { "gsl3692", 0 }, + { "mssl1680", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, silead_ts_id); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id silead_ts_acpi_match[] = { + { "GSL1680", 0 }, + { "GSL1688", 0 }, + { "GSL3670", 0 }, + { "GSL3675", 0 }, + { "GSL3692", 0 }, + { "MSSL1680", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, silead_ts_acpi_match); +#endif + +static struct i2c_driver silead_ts_driver = { + .probe = silead_ts_probe, + .id_table = silead_ts_id, + .driver = { + .name = SILEAD_TS_NAME, + .acpi_match_table = ACPI_PTR(silead_ts_acpi_match), + .pm = &silead_ts_pm, + }, +}; +module_i2c_driver(silead_ts_driver); + +MODULE_AUTHOR("Robert Dolca <robert.dolca@intel.com>"); +MODULE_DESCRIPTION("Silead I2C touchscreen driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/sis_i2c.c b/drivers/input/touchscreen/sis_i2c.c new file mode 100644 index 000000000000..8d93f8c9a403 --- /dev/null +++ b/drivers/input/touchscreen/sis_i2c.c @@ -0,0 +1,413 @@ +/* + * Touch Screen driver for SiS 9200 family I2C Touch panels + * + * Copyright (C) 2015 SiS, Inc. + * Copyright (C) 2016 Nextfour Group + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/crc-itu-t.h> +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/input/mt.h> +#include <linux/interrupt.h> +#include <linux/gpio/consumer.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <asm/unaligned.h> + +#define SIS_I2C_NAME "sis_i2c_ts" + +/* + * The I2C packet format: + * le16 byte count + * u8 Report ID + * <contact data - variable length> + * u8 Number of contacts + * le16 Scan Time (optional) + * le16 CRC + * + * One touch point information consists of 6+ bytes, the order is: + * u8 contact state + * u8 finger id + * le16 x axis + * le16 y axis + * u8 contact width (optional) + * u8 contact height (optional) + * u8 pressure (optional) + * + * Maximum amount of data transmitted in one shot is 64 bytes, if controller + * needs to report more contacts than fit in one packet it will send true + * number of contacts in first packet and 0 as number of contacts in second + * packet. + */ + +#define SIS_MAX_PACKET_SIZE 64 + +#define SIS_PKT_LEN_OFFSET 0 +#define SIS_PKT_REPORT_OFFSET 2 /* Report ID/type */ +#define SIS_PKT_CONTACT_OFFSET 3 /* First contact */ + +#define SIS_SCAN_TIME_LEN 2 + +/* Supported report types */ +#define SIS_ALL_IN_ONE_PACKAGE 0x10 +#define SIS_PKT_IS_TOUCH(x) (((x) & 0x0f) == 0x01) +#define SIS_PKT_IS_HIDI2C(x) (((x) & 0x0f) == 0x06) + +/* Contact properties within report */ +#define SIS_PKT_HAS_AREA(x) ((x) & BIT(4)) +#define SIS_PKT_HAS_PRESSURE(x) ((x) & BIT(5)) +#define SIS_PKT_HAS_SCANTIME(x) ((x) & BIT(6)) + +/* Contact size */ +#define SIS_BASE_LEN_PER_CONTACT 6 +#define SIS_AREA_LEN_PER_CONTACT 2 +#define SIS_PRESSURE_LEN_PER_CONTACT 1 + +/* Offsets within contact data */ +#define SIS_CONTACT_STATUS_OFFSET 0 +#define SIS_CONTACT_ID_OFFSET 1 /* Contact ID */ +#define SIS_CONTACT_X_OFFSET 2 +#define SIS_CONTACT_Y_OFFSET 4 +#define SIS_CONTACT_WIDTH_OFFSET 6 +#define SIS_CONTACT_HEIGHT_OFFSET 7 +#define SIS_CONTACT_PRESSURE_OFFSET(id) (SIS_PKT_HAS_AREA(id) ? 8 : 6) + +/* Individual contact state */ +#define SIS_STATUS_UP 0x0 +#define SIS_STATUS_DOWN 0x3 + +/* Touchscreen parameters */ +#define SIS_MAX_FINGERS 10 +#define SIS_MAX_X 4095 +#define SIS_MAX_Y 4095 +#define SIS_MAX_PRESSURE 255 + +/* Resolution diagonal */ +#define SIS_AREA_LENGTH_LONGER 5792 +/*((SIS_MAX_X^2) + (SIS_MAX_Y^2))^0.5*/ +#define SIS_AREA_LENGTH_SHORT 5792 +#define SIS_AREA_UNIT (5792 / 32) + +struct sis_ts_data { + struct i2c_client *client; + struct input_dev *input; + + struct gpio_desc *attn_gpio; + struct gpio_desc *reset_gpio; + + u8 packet[SIS_MAX_PACKET_SIZE]; +}; + +static int sis_read_packet(struct i2c_client *client, u8 *buf, + unsigned int *num_contacts, + unsigned int *contact_size) +{ + int count_idx; + int ret; + u16 len; + u16 crc, pkg_crc; + u8 report_id; + + ret = i2c_master_recv(client, buf, SIS_MAX_PACKET_SIZE); + if (ret <= 0) + return -EIO; + + len = get_unaligned_le16(&buf[SIS_PKT_LEN_OFFSET]); + if (len > SIS_MAX_PACKET_SIZE) { + dev_err(&client->dev, + "%s: invalid packet length (%d vs %d)\n", + __func__, len, SIS_MAX_PACKET_SIZE); + return -E2BIG; + } + + if (len < 10) + return -EINVAL; + + report_id = buf[SIS_PKT_REPORT_OFFSET]; + count_idx = len - 1; + *contact_size = SIS_BASE_LEN_PER_CONTACT; + + if (report_id != SIS_ALL_IN_ONE_PACKAGE) { + if (SIS_PKT_IS_TOUCH(report_id)) { + /* + * Calculate CRC ignoring packet length + * in the beginning and CRC transmitted + * at the end of the packet. + */ + crc = crc_itu_t(0, buf + 2, len - 2 - 2); + pkg_crc = get_unaligned_le16(&buf[len - 2]); + + if (crc != pkg_crc) { + dev_err(&client->dev, + "%s: CRC Error (%d vs %d)\n", + __func__, crc, pkg_crc); + return -EINVAL; + } + + count_idx -= 2; + + } else if (!SIS_PKT_IS_HIDI2C(report_id)) { + dev_err(&client->dev, + "%s: invalid packet ID %#02x\n", + __func__, report_id); + return -EINVAL; + } + + if (SIS_PKT_HAS_SCANTIME(report_id)) + count_idx -= SIS_SCAN_TIME_LEN; + + if (SIS_PKT_HAS_AREA(report_id)) + *contact_size += SIS_AREA_LEN_PER_CONTACT; + if (SIS_PKT_HAS_PRESSURE(report_id)) + *contact_size += SIS_PRESSURE_LEN_PER_CONTACT; + } + + *num_contacts = buf[count_idx]; + return 0; +} + +static int sis_ts_report_contact(struct sis_ts_data *ts, const u8 *data, u8 id) +{ + struct input_dev *input = ts->input; + int slot; + u8 status = data[SIS_CONTACT_STATUS_OFFSET]; + u8 pressure; + u8 height, width; + u16 x, y; + + if (status != SIS_STATUS_DOWN && status != SIS_STATUS_UP) { + dev_err(&ts->client->dev, "Unexpected touch status: %#02x\n", + data[SIS_CONTACT_STATUS_OFFSET]); + return -EINVAL; + } + + slot = input_mt_get_slot_by_key(input, data[SIS_CONTACT_ID_OFFSET]); + if (slot < 0) + return -ENOENT; + + input_mt_slot(input, slot); + input_mt_report_slot_state(input, MT_TOOL_FINGER, + status == SIS_STATUS_DOWN); + + if (status == SIS_STATUS_DOWN) { + pressure = height = width = 1; + if (id != SIS_ALL_IN_ONE_PACKAGE) { + if (SIS_PKT_HAS_AREA(id)) { + width = data[SIS_CONTACT_WIDTH_OFFSET]; + height = data[SIS_CONTACT_HEIGHT_OFFSET]; + } + + if (SIS_PKT_HAS_PRESSURE(id)) + pressure = + data[SIS_CONTACT_PRESSURE_OFFSET(id)]; + } + + x = get_unaligned_le16(&data[SIS_CONTACT_X_OFFSET]); + y = get_unaligned_le16(&data[SIS_CONTACT_Y_OFFSET]); + + input_report_abs(input, ABS_MT_TOUCH_MAJOR, + width * SIS_AREA_UNIT); + input_report_abs(input, ABS_MT_TOUCH_MINOR, + height * SIS_AREA_UNIT); + input_report_abs(input, ABS_MT_PRESSURE, pressure); + input_report_abs(input, ABS_MT_POSITION_X, x); + input_report_abs(input, ABS_MT_POSITION_Y, y); + } + + return 0; +} + +static void sis_ts_handle_packet(struct sis_ts_data *ts) +{ + const u8 *contact; + unsigned int num_to_report = 0; + unsigned int num_contacts; + unsigned int num_reported; + unsigned int contact_size; + int error; + u8 report_id; + + do { + error = sis_read_packet(ts->client, ts->packet, + &num_contacts, &contact_size); + if (error) + break; + + if (num_to_report == 0) { + num_to_report = num_contacts; + } else if (num_contacts != 0) { + dev_err(&ts->client->dev, + "%s: nonzero (%d) point count in tail packet\n", + __func__, num_contacts); + break; + } + + report_id = ts->packet[SIS_PKT_REPORT_OFFSET]; + contact = &ts->packet[SIS_PKT_CONTACT_OFFSET]; + num_reported = 0; + + while (num_to_report > 0) { + error = sis_ts_report_contact(ts, contact, report_id); + if (error) + break; + + contact += contact_size; + num_to_report--; + num_reported++; + + if (report_id != SIS_ALL_IN_ONE_PACKAGE && + num_reported >= 5) { + /* + * The remainder of contacts is sent + * in the 2nd packet. + */ + break; + } + } + } while (num_to_report > 0); + + input_mt_sync_frame(ts->input); + input_sync(ts->input); +} + +static irqreturn_t sis_ts_irq_handler(int irq, void *dev_id) +{ + struct sis_ts_data *ts = dev_id; + + do { + sis_ts_handle_packet(ts); + } while (ts->attn_gpio && gpiod_get_value_cansleep(ts->attn_gpio)); + + return IRQ_HANDLED; +} + +static void sis_ts_reset(struct sis_ts_data *ts) +{ + if (ts->reset_gpio) { + /* Get out of reset */ + usleep_range(1000, 2000); + gpiod_set_value(ts->reset_gpio, 1); + usleep_range(1000, 2000); + gpiod_set_value(ts->reset_gpio, 0); + msleep(100); + } +} + +static int sis_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct sis_ts_data *ts; + struct input_dev *input; + int error; + + ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + ts->client = client; + i2c_set_clientdata(client, ts); + + ts->attn_gpio = devm_gpiod_get_optional(&client->dev, + "attn", GPIOD_IN); + if (IS_ERR(ts->attn_gpio)) { + error = PTR_ERR(ts->attn_gpio); + if (error != -EPROBE_DEFER) + dev_err(&client->dev, + "Failed to get attention GPIO: %d\n", error); + return error; + } + + ts->reset_gpio = devm_gpiod_get_optional(&client->dev, + "reset", GPIOD_OUT_LOW); + if (IS_ERR(ts->reset_gpio)) { + error = PTR_ERR(ts->reset_gpio); + if (error != -EPROBE_DEFER) + dev_err(&client->dev, + "Failed to get reset GPIO: %d\n", error); + return error; + } + + sis_ts_reset(ts); + + ts->input = input = devm_input_allocate_device(&client->dev); + if (!input) { + dev_err(&client->dev, "Failed to allocate input device\n"); + return -ENOMEM; + } + + input->name = "SiS Touchscreen"; + input->id.bustype = BUS_I2C; + + input_set_abs_params(input, ABS_MT_POSITION_X, 0, SIS_MAX_X, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, SIS_MAX_Y, 0, 0); + input_set_abs_params(input, ABS_MT_PRESSURE, 0, SIS_MAX_PRESSURE, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, + 0, SIS_AREA_LENGTH_LONGER, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MINOR, + 0, SIS_AREA_LENGTH_SHORT, 0, 0); + + error = input_mt_init_slots(input, SIS_MAX_FINGERS, INPUT_MT_DIRECT); + if (error) { + dev_err(&client->dev, + "Failed to initialize MT slots: %d\n", error); + return error; + } + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, sis_ts_irq_handler, + IRQF_ONESHOT, + client->name, ts); + if (error) { + dev_err(&client->dev, "Failed to request IRQ: %d\n", error); + return error; + } + + error = input_register_device(ts->input); + if (error) { + dev_err(&client->dev, + "Failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id sis_ts_dt_ids[] = { + { .compatible = "sis,9200-ts" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sis_ts_dt_ids); +#endif + +static const struct i2c_device_id sis_ts_id[] = { + { SIS_I2C_NAME, 0 }, + { "9200-ts", 0 }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(i2c, sis_ts_id); + +static struct i2c_driver sis_ts_driver = { + .driver = { + .name = SIS_I2C_NAME, + .of_match_table = of_match_ptr(sis_ts_dt_ids), + }, + .probe = sis_ts_probe, + .id_table = sis_ts_id, +}; +module_i2c_driver(sis_ts_driver); + +MODULE_DESCRIPTION("SiS 9200 Family Touchscreen Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Mika Penttilä <mika.penttila@nextfour.com>"); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 33c177ba93be..96de97a46079 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2375,7 +2375,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, static dma_addr_t map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t paddr = page_to_phys(page) + offset; struct protection_domain *domain; @@ -2398,7 +2398,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, * The exported unmap_single function for dma_ops. */ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct protection_domain *domain; struct dma_ops_domain *dma_dom; @@ -2444,7 +2444,7 @@ static int sg_num_pages(struct device *dev, */ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int mapped_pages = 0, npages = 0, prot = 0, i; struct protection_domain *domain; @@ -2525,7 +2525,7 @@ out_err: */ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct protection_domain *domain; struct dma_ops_domain *dma_dom; @@ -2548,7 +2548,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, */ static void *alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { u64 dma_mask = dev->coherent_dma_mask; struct protection_domain *domain; @@ -2604,7 +2604,7 @@ out_free: */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { struct protection_domain *domain; struct dma_ops_domain *dma_dom; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index ea5a9ebf0f78..08a1e2f3690f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -286,7 +286,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, * or NULL on failure. */ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, - struct dma_attrs *attrs, int prot, dma_addr_t *handle, + unsigned long attrs, int prot, dma_addr_t *handle, void (*flush_page)(struct device *, const void *, phys_addr_t)) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -306,7 +306,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, } else { size = ALIGN(size, min_size); } - if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) + if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) alloc_sizes = min_size; count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -400,7 +400,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, } void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); } @@ -560,7 +560,7 @@ out_restore_sg: } void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { /* * The scatterlist segments are mapped into a single diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index afbaa2c69a59..ebb5bf3ddbd9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3552,7 +3552,7 @@ error: static dma_addr_t intel_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return __intel_map_single(dev, page_to_phys(page) + offset, size, dir, *dev->dma_mask); @@ -3711,14 +3711,14 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { struct page *page = NULL; int order; @@ -3764,7 +3764,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, } static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { int order; struct page *page = virt_to_page(vaddr); @@ -3779,7 +3779,7 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK; unsigned long nrpages = 0; @@ -3808,7 +3808,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, } static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { int i; struct dmar_domain *domain; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 5495a5ba8039..7f8728984f44 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -21,9 +21,9 @@ config ARM_GIC_MAX_NR config ARM_GIC_V2M bool - depends on ARM_GIC - depends on PCI && PCI_MSI - select PCI_MSI_IRQ_DOMAIN + depends on PCI + select ARM_GIC + select PCI_MSI config GIC_NON_BANKED bool @@ -37,7 +37,8 @@ config ARM_GIC_V3 config ARM_GIC_V3_ITS bool - select PCI_MSI_IRQ_DOMAIN + depends on PCI + depends on PCI_MSI config ARM_NVIC bool @@ -62,13 +63,13 @@ config ARM_VIC_NR config ARMADA_370_XP_IRQ bool select GENERIC_IRQ_CHIP - select PCI_MSI_IRQ_DOMAIN if PCI_MSI + select PCI_MSI if PCI config ALPINE_MSI bool - depends on PCI && PCI_MSI + depends on PCI + select PCI_MSI select GENERIC_IRQ_CHIP - select PCI_MSI_IRQ_DOMAIN config ATMEL_AIC_IRQ bool @@ -117,7 +118,6 @@ config HISILICON_IRQ_MBIGEN bool select ARM_GIC_V3 select ARM_GIC_V3_ITS - select GENERIC_MSI_IRQ_DOMAIN config IMGPDC_IRQ bool @@ -250,12 +250,10 @@ config IRQ_MXS config MVEBU_ODMI bool - select GENERIC_MSI_IRQ_DOMAIN config LS_SCFG_MSI def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE depends on PCI && PCI_MSI - select PCI_MSI_IRQ_DOMAIN config PARTITION_PERCPU bool diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 3786d0f21972..c5f33c3bd228 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -359,7 +359,7 @@ static void gic_handle_shared_int(bool chained) pending_reg += gic_reg_step; intrmask_reg += gic_reg_step; - if (!config_enabled(CONFIG_64BIT) || mips_cm_is64) + if (!IS_ENABLED(CONFIG_64BIT) || mips_cm_is64) continue; pending[i] |= (u64)gic_read(pending_reg) << 32; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 29b99fb6a16a..19db13e99466 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -289,10 +289,16 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Map reads as normal. + * Map reads as normal only if corrupt_bio_byte set. */ - if (bio_data_dir(bio) == READ) - goto map_bio; + if (bio_data_dir(bio) == READ) { + /* If flags were specified, only corrupt those that match. */ + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) + goto map_bio; + else + return -EIO; + } /* * Drop writes? @@ -330,12 +336,13 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) /* * Corrupt successful READs while in down state. - * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && pb->bio_submitted && - (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); + if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { + if (fc->corrupt_bio_byte) + corrupt_bio_data(bio, fc); + else + return -EIO; + } return error; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7eac080fcb18..d7107d23b897 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -507,13 +507,27 @@ static bool __must_push_back(struct multipath *m) static bool must_push_back_rq(struct multipath *m) { - return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || - __must_push_back(m)); + bool r; + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || + __must_push_back(m)); + spin_unlock_irqrestore(&m->lock, flags); + + return r; } static bool must_push_back_bio(struct multipath *m) { - return __must_push_back(m); + bool r; + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + r = __must_push_back(m); + spin_unlock_irqrestore(&m->lock, flags); + + return r; } /* @@ -1680,12 +1694,14 @@ static void multipath_postsuspend(struct dm_target *ti) static void multipath_resume(struct dm_target *ti) { struct multipath *m = ti->private; + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); else clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - smp_mb__after_atomic(); + spin_unlock_irqrestore(&m->lock, flags); } /* diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 84983549b5e1..1b9795d75ef8 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1270,7 +1270,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, } rs->md.sync_speed_min = (int)value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) { - if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) { + if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) { rs->ti->error = "Only one max_recovery_rate argument pair allowed"; return -EINVAL; } @@ -1960,6 +1960,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->data_offset = cpu_to_le64(rdev->data_offset); sb->new_data_offset = cpu_to_le64(rdev->new_data_offset); sb->sectors = cpu_to_le64(rdev->sectors); + sb->incompat_features = cpu_to_le32(0); /* Zero out the rest of the payload after the size of the superblock */ memset(sb + 1, 0, rdev->sb_size - sizeof(*sb)); @@ -3577,7 +3578,6 @@ static int raid_preresume(struct dm_target *ti) /* Be prepared for mddev_resume() in raid_resume() */ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { - set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); mddev->resync_min = mddev->recovery_cp; } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7a9661868496..1ca7463e8bb2 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -78,6 +78,7 @@ void dm_start_queue(struct request_queue *q) if (!q->mq_ops) dm_old_start_queue(q); else { + queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q); blk_mq_start_stopped_hw_queues(q, true); blk_mq_kick_requeue_list(q); } @@ -101,8 +102,14 @@ void dm_stop_queue(struct request_queue *q) { if (!q->mq_ops) dm_old_stop_queue(q); - else + else { + spin_lock_irq(q->queue_lock); + queue_flag_set(QUEUE_FLAG_STOPPED, q); + spin_unlock_irq(q->queue_lock); + + blk_mq_cancel_requeue_work(q); blk_mq_stop_hw_queues(q); + } } static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, @@ -864,6 +871,17 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, dm_put_live_table(md, srcu_idx); } + /* + * On suspend dm_stop_queue() handles stopping the blk-mq + * request_queue BUT: even though the hw_queues are marked + * BLK_MQ_S_STOPPED at that point there is still a race that + * is allowing block/blk-mq.c to call ->queue_rq against a + * hctx that it really shouldn't. The following check guards + * against this rarity (albeit _not_ race-free). + */ + if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) + return BLK_MQ_RQ_QUEUE_BUSY; + if (ti->type->busy && ti->type->busy(ti)) return BLK_MQ_RQ_QUEUE_BUSY; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 25d1d97154a8..dfa09e14e847 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2082,7 +2082,8 @@ static void unlock_fs(struct mapped_device *md) * Caller must hold md->suspend_lock */ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, - unsigned suspend_flags, int interruptible) + unsigned suspend_flags, int interruptible, + int dmf_suspended_flag) { bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; @@ -2149,6 +2150,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, * to finish. */ r = dm_wait_for_completion(md, interruptible); + if (!r) + set_bit(dmf_suspended_flag, &md->flags); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); @@ -2210,12 +2213,10 @@ retry: map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); - r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); + r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); if (r) goto out_unlock; - set_bit(DMF_SUSPENDED, &md->flags); - dm_table_postsuspend_targets(map); out_unlock: @@ -2309,9 +2310,8 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla * would require changing .presuspend to return an error -- avoid this * until there is a need for more elaborate variants of internal suspend. */ - (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); - - set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, + DMF_SUSPENDED_INTERNALLY); dm_table_postsuspend_targets(map); } diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index 09c39346167f..ffe88bc6b813 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -3378,20 +3378,28 @@ static int cxd2841er_tune_tc(struct dvb_frontend *fe, ret = cxd2841er_get_carrier_offset_i( priv, p->bandwidth_hz, &carrier_offset); + if (ret) + return ret; break; case SYS_DVBT: ret = cxd2841er_get_carrier_offset_t( priv, p->bandwidth_hz, &carrier_offset); + if (ret) + return ret; break; case SYS_DVBT2: ret = cxd2841er_get_carrier_offset_t2( priv, p->bandwidth_hz, &carrier_offset); + if (ret) + return ret; break; case SYS_DVBC_ANNEX_A: ret = cxd2841er_get_carrier_offset_c( priv, &carrier_offset); + if (ret) + return ret; break; default: dev_dbg(&priv->i2c->dev, @@ -3399,8 +3407,6 @@ static int cxd2841er_tune_tc(struct dvb_frontend *fe, __func__, priv->system); return -EINVAL; } - if (ret) - return ret; dev_dbg(&priv->i2c->dev, "%s(): carrier offset %d\n", __func__, carrier_offset); p->frequency += carrier_offset; diff --git a/drivers/media/i2c/adv7180.c b/drivers/media/i2c/adv7180.c index b77b0a4dbf68..95cbc857f36e 100644 --- a/drivers/media/i2c/adv7180.c +++ b/drivers/media/i2c/adv7180.c @@ -100,7 +100,7 @@ #define ADV7180_REG_IDENT 0x0011 #define ADV7180_ID_7180 0x18 -#define ADV7180_REG_ICONF1 0x0040 +#define ADV7180_REG_ICONF1 0x2040 #define ADV7180_ICONF1_ACTIVE_LOW 0x01 #define ADV7180_ICONF1_PSYNC_ONLY 0x10 #define ADV7180_ICONF1_ACTIVE_TO_CLR 0xC0 @@ -113,15 +113,15 @@ #define ADV7180_IRQ1_LOCK 0x01 #define ADV7180_IRQ1_UNLOCK 0x02 -#define ADV7180_REG_ISR1 0x0042 -#define ADV7180_REG_ICR1 0x0043 -#define ADV7180_REG_IMR1 0x0044 -#define ADV7180_REG_IMR2 0x0048 +#define ADV7180_REG_ISR1 0x2042 +#define ADV7180_REG_ICR1 0x2043 +#define ADV7180_REG_IMR1 0x2044 +#define ADV7180_REG_IMR2 0x2048 #define ADV7180_IRQ3_AD_CHANGE 0x08 -#define ADV7180_REG_ISR3 0x004A -#define ADV7180_REG_ICR3 0x004B -#define ADV7180_REG_IMR3 0x004C -#define ADV7180_REG_IMR4 0x50 +#define ADV7180_REG_ISR3 0x204A +#define ADV7180_REG_ICR3 0x204B +#define ADV7180_REG_IMR3 0x204C +#define ADV7180_REG_IMR4 0x2050 #define ADV7180_REG_NTSC_V_BIT_END 0x00E6 #define ADV7180_NTSC_V_BIT_END_MANUAL_NVEND 0x4F diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c index 6d7cad54a65d..53030d631653 100644 --- a/drivers/media/i2c/adv7511.c +++ b/drivers/media/i2c/adv7511.c @@ -1041,6 +1041,8 @@ static int adv7511_s_dv_timings(struct v4l2_subdev *sd, struct v4l2_dv_timings *timings) { struct adv7511_state *state = get_adv7511_state(sd); + struct v4l2_bt_timings *bt = &timings->bt; + u32 fps; v4l2_dbg(1, debug, sd, "%s:\n", __func__); @@ -1052,15 +1054,29 @@ static int adv7511_s_dv_timings(struct v4l2_subdev *sd, if the format is one of the CEA or DMT timings. */ v4l2_find_dv_timings_cap(timings, &adv7511_timings_cap, 0, NULL, NULL); - timings->bt.flags &= ~V4L2_DV_FL_REDUCED_FPS; - /* save timings */ state->dv_timings = *timings; /* set h/vsync polarities */ adv7511_wr_and_or(sd, 0x17, 0x9f, - ((timings->bt.polarities & V4L2_DV_VSYNC_POS_POL) ? 0 : 0x40) | - ((timings->bt.polarities & V4L2_DV_HSYNC_POS_POL) ? 0 : 0x20)); + ((bt->polarities & V4L2_DV_VSYNC_POS_POL) ? 0 : 0x40) | + ((bt->polarities & V4L2_DV_HSYNC_POS_POL) ? 0 : 0x20)); + + fps = (u32)bt->pixelclock / (V4L2_DV_BT_FRAME_WIDTH(bt) * V4L2_DV_BT_FRAME_HEIGHT(bt)); + switch (fps) { + case 24: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 1 << 1); + break; + case 25: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 2 << 1); + break; + case 30: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 3 << 1); + break; + default: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 0); + break; + } /* update quantization range based on new dv_timings */ adv7511_set_rgb_quantization_mode(sd, state->rgb_quantization_range_ctrl); diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index e277b7c23516..c7806ecda2dd 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -246,7 +246,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) struct video_device *vfd_enc; struct resource *res; int i, j, ret; - DEFINE_DMA_ATTRS(attrs); dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) @@ -378,9 +377,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) goto err_enc_reg; } - /* Avoid the iommu eat big hunks */ - dma_set_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, &attrs); - mtk_v4l2_debug(0, "encoder registered as /dev/video%d", vfd_enc->num); diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index 3df66d11c795..b7892f3efd98 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -430,14 +430,11 @@ int bdisp_hw_get_and_clear_irq(struct bdisp_dev *bdisp) */ void bdisp_hw_free_nodes(struct bdisp_ctx *ctx) { - if (ctx && ctx->node[0]) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + if (ctx && ctx->node[0]) dma_free_attrs(ctx->bdisp_dev->dev, sizeof(struct bdisp_node) * MAX_NB_NODE, - ctx->node[0], ctx->node_paddr[0], &attrs); - } + ctx->node[0], ctx->node_paddr[0], + DMA_ATTR_WRITE_COMBINE); } /** @@ -455,12 +452,10 @@ int bdisp_hw_alloc_nodes(struct bdisp_ctx *ctx) unsigned int i, node_size = sizeof(struct bdisp_node); void *base; dma_addr_t paddr; - DEFINE_DMA_ATTRS(attrs); /* Allocate all the nodes within a single memory page */ - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); base = dma_alloc_attrs(dev, node_size * MAX_NB_NODE, &paddr, - GFP_KERNEL | GFP_DMA, &attrs); + GFP_KERNEL | GFP_DMA, DMA_ATTR_WRITE_COMBINE); if (!base) { dev_err(dev, "%s no mem\n", __func__); return -ENOMEM; @@ -493,13 +488,9 @@ void bdisp_hw_free_filters(struct device *dev) { int size = (BDISP_HF_NB * NB_H_FILTER) + (BDISP_VF_NB * NB_V_FILTER); - if (bdisp_h_filter[0].virt) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + if (bdisp_h_filter[0].virt) dma_free_attrs(dev, size, bdisp_h_filter[0].virt, - bdisp_h_filter[0].paddr, &attrs); - } + bdisp_h_filter[0].paddr, DMA_ATTR_WRITE_COMBINE); } /** @@ -516,12 +507,11 @@ int bdisp_hw_alloc_filters(struct device *dev) unsigned int i, size; void *base; dma_addr_t paddr; - DEFINE_DMA_ATTRS(attrs); /* Allocate all the filters within a single memory page */ size = (BDISP_HF_NB * NB_H_FILTER) + (BDISP_VF_NB * NB_V_FILTER); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - base = dma_alloc_attrs(dev, size, &paddr, GFP_KERNEL | GFP_DMA, &attrs); + base = dma_alloc_attrs(dev, size, &paddr, GFP_KERNEL | GFP_DMA, + DMA_ATTR_WRITE_COMBINE); if (!base) return -ENOMEM; diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c index 6b17015048ae..cd0ff4a66fdc 100644 --- a/drivers/media/platform/vim2m.c +++ b/drivers/media/platform/vim2m.c @@ -171,6 +171,9 @@ struct vim2m_ctx { int mode; enum v4l2_colorspace colorspace; + enum v4l2_ycbcr_encoding ycbcr_enc; + enum v4l2_xfer_func xfer_func; + enum v4l2_quantization quant; /* Source and destination queue data */ struct vim2m_q_data q_data[2]; @@ -493,6 +496,9 @@ static int vidioc_g_fmt(struct vim2m_ctx *ctx, struct v4l2_format *f) f->fmt.pix.bytesperline = (q_data->width * q_data->fmt->depth) >> 3; f->fmt.pix.sizeimage = q_data->sizeimage; f->fmt.pix.colorspace = ctx->colorspace; + f->fmt.pix.xfer_func = ctx->xfer_func; + f->fmt.pix.ycbcr_enc = ctx->ycbcr_enc; + f->fmt.pix.quantization = ctx->quant; return 0; } @@ -549,6 +555,9 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, return -EINVAL; } f->fmt.pix.colorspace = ctx->colorspace; + f->fmt.pix.xfer_func = ctx->xfer_func; + f->fmt.pix.ycbcr_enc = ctx->ycbcr_enc; + f->fmt.pix.quantization = ctx->quant; return vidioc_try_fmt(f, fmt); } @@ -630,8 +639,12 @@ static int vidioc_s_fmt_vid_out(struct file *file, void *priv, return ret; ret = vidioc_s_fmt(file2ctx(file), f); - if (!ret) + if (!ret) { ctx->colorspace = f->fmt.pix.colorspace; + ctx->xfer_func = f->fmt.pix.xfer_func; + ctx->ycbcr_enc = f->fmt.pix.ycbcr_enc; + ctx->quant = f->fmt.pix.quantization; + } return ret; } diff --git a/drivers/media/platform/vivid/vivid-cec.c b/drivers/media/platform/vivid/vivid-cec.c index 66aa7292076b..f9f878b8e0a7 100644 --- a/drivers/media/platform/vivid/vivid-cec.c +++ b/drivers/media/platform/vivid/vivid-cec.c @@ -169,7 +169,6 @@ static int vivid_received(struct cec_adapter *adap, struct cec_msg *msg) struct vivid_dev *dev = adap->priv; struct cec_msg reply; u8 dest = cec_msg_destination(msg); - u16 pa; u8 disp_ctl; char osd[14]; @@ -178,15 +177,6 @@ static int vivid_received(struct cec_adapter *adap, struct cec_msg *msg) cec_msg_init(&reply, dest, cec_msg_initiator(msg)); switch (cec_msg_opcode(msg)) { - case CEC_MSG_SET_STREAM_PATH: - if (cec_is_sink(adap)) - return -ENOMSG; - cec_ops_set_stream_path(msg, &pa); - if (pa != adap->phys_addr) - return -ENOMSG; - cec_msg_active_source(&reply, adap->phys_addr); - cec_transmit_msg(adap, &reply, false); - break; case CEC_MSG_SET_OSD_STRING: if (!cec_is_sink(adap)) return -ENOMSG; diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index 863f658a3fa1..59fa204b15f3 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -27,7 +27,7 @@ struct vb2_dc_buf { unsigned long size; void *cookie; dma_addr_t dma_addr; - struct dma_attrs attrs; + unsigned long attrs; enum dma_data_direction dma_dir; struct sg_table *dma_sgt; struct frame_vector *vec; @@ -130,12 +130,12 @@ static void vb2_dc_put(void *buf_priv) kfree(buf->sgt_base); } dma_free_attrs(buf->dev, buf->size, buf->cookie, buf->dma_addr, - &buf->attrs); + buf->attrs); put_device(buf->dev); kfree(buf); } -static void *vb2_dc_alloc(struct device *dev, const struct dma_attrs *attrs, +static void *vb2_dc_alloc(struct device *dev, unsigned long attrs, unsigned long size, enum dma_data_direction dma_dir, gfp_t gfp_flags) { @@ -146,16 +146,16 @@ static void *vb2_dc_alloc(struct device *dev, const struct dma_attrs *attrs, return ERR_PTR(-ENOMEM); if (attrs) - buf->attrs = *attrs; + buf->attrs = attrs; buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr, - GFP_KERNEL | gfp_flags, &buf->attrs); + GFP_KERNEL | gfp_flags, buf->attrs); if (!buf->cookie) { dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); kfree(buf); return ERR_PTR(-ENOMEM); } - if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, &buf->attrs)) + if ((buf->attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) buf->vaddr = buf->cookie; /* Prevent the device from being released while the buffer is used */ @@ -189,7 +189,7 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) vma->vm_pgoff = 0; ret = dma_mmap_attrs(buf->dev, vma, buf->cookie, - buf->dma_addr, buf->size, &buf->attrs); + buf->dma_addr, buf->size, buf->attrs); if (ret) { pr_err("Remapping memory failed, error: %d\n", ret); @@ -372,7 +372,7 @@ static struct sg_table *vb2_dc_get_base_sgt(struct vb2_dc_buf *buf) } ret = dma_get_sgtable_attrs(buf->dev, sgt, buf->cookie, buf->dma_addr, - buf->size, &buf->attrs); + buf->size, buf->attrs); if (ret < 0) { dev_err(buf->dev, "failed to get scatterlist from DMA API\n"); kfree(sgt); @@ -421,15 +421,12 @@ static void vb2_dc_put_userptr(void *buf_priv) struct page **pages; if (sgt) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); /* * No need to sync to CPU, it's already synced to the CPU * since the finish() memop will have been called before this. */ dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); pages = frame_vector_pages(buf->vec); /* sgt should exist only if vector contains pages... */ BUG_ON(IS_ERR(pages)); @@ -484,9 +481,6 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr, struct sg_table *sgt; unsigned long contig_size; unsigned long dma_align = dma_get_cache_alignment(); - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); /* Only cache aligned DMA transfers are reliable */ if (!IS_ALIGNED(vaddr | size, dma_align)) { @@ -548,7 +542,7 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr, * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (sgt->nents <= 0) { pr_err("failed to map scatterlist\n"); ret = -EIO; @@ -572,7 +566,7 @@ out: fail_map_sg: dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); fail_sgt_init: sg_free_table(sgt); @@ -749,7 +743,7 @@ EXPORT_SYMBOL_GPL(vb2_dma_contig_memops); int vb2_dma_contig_set_max_seg_size(struct device *dev, unsigned int size) { if (!dev->dma_parms) { - dev->dma_parms = kzalloc(sizeof(dev->dma_parms), GFP_KERNEL); + dev->dma_parms = kzalloc(sizeof(*dev->dma_parms), GFP_KERNEL); if (!dev->dma_parms) return -ENOMEM; } diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c index a39db8a6db7a..bd82d709ee82 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c @@ -95,7 +95,7 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf, return 0; } -static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_attrs, +static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs, unsigned long size, enum dma_data_direction dma_dir, gfp_t gfp_flags) { @@ -103,9 +103,6 @@ static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_at struct sg_table *sgt; int ret; int num_pages; - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); if (WARN_ON(dev == NULL)) return NULL; @@ -144,7 +141,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_at * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (!sgt->nents) goto fail_map; @@ -179,13 +176,10 @@ static void vb2_dma_sg_put(void *buf_priv) int i = buf->num_pages; if (atomic_dec_and_test(&buf->refcount)) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); dprintk(1, "%s: Freeing buffer of %d pages\n", __func__, buf->num_pages); dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (buf->vaddr) vm_unmap_ram(buf->vaddr, buf->num_pages); sg_free_table(buf->dma_sgt); @@ -228,10 +222,8 @@ static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr, { struct vb2_dma_sg_buf *buf; struct sg_table *sgt; - DEFINE_DMA_ATTRS(attrs); struct frame_vector *vec; - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); buf = kzalloc(sizeof *buf, GFP_KERNEL); if (!buf) return NULL; @@ -262,7 +254,7 @@ static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr, * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (!sgt->nents) goto userptr_fail_map; @@ -286,14 +278,11 @@ static void vb2_dma_sg_put_userptr(void *buf_priv) struct vb2_dma_sg_buf *buf = buf_priv; struct sg_table *sgt = &buf->sg_table; int i = buf->num_pages; - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); dprintk(1, "%s: Releasing userspace buffer of %d pages\n", __func__, buf->num_pages); dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir, - &attrs); + DMA_ATTR_SKIP_CPU_SYNC); if (buf->vaddr) vm_unmap_ram(buf->vaddr, buf->num_pages); sg_free_table(buf->dma_sgt); diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c index 7e8a07ed8d82..c2820a6e164d 100644 --- a/drivers/media/v4l2-core/videobuf2-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c @@ -33,7 +33,7 @@ struct vb2_vmalloc_buf { static void vb2_vmalloc_put(void *buf_priv); -static void *vb2_vmalloc_alloc(struct device *dev, const struct dma_attrs *attrs, +static void *vb2_vmalloc_alloc(struct device *dev, unsigned long attrs, unsigned long size, enum dma_data_direction dma_dir, gfp_t gfp_flags) { diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig index 133712346911..4b4c0c3c3d2f 100644 --- a/drivers/memory/Kconfig +++ b/drivers/memory/Kconfig @@ -115,7 +115,7 @@ config FSL_CORENET_CF config FSL_IFC bool - depends on FSL_SOC + depends on FSL_SOC || ARCH_LAYERSCAPE config JZ4780_NEMC bool "Ingenic JZ4780 SoC NEMC driver" diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 904b4af5f142..1b182b117f9c 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -31,7 +31,9 @@ #include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/fsl_ifc.h> -#include <asm/prom.h> +#include <linux/irqdomain.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> struct fsl_ifc_ctrl *fsl_ifc_ctrl_dev; EXPORT_SYMBOL(fsl_ifc_ctrl_dev); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 40bb8ae5853c..aacf584f2a42 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2338,23 +2338,11 @@ static struct memstick_driver msb_driver = { .resume = msb_resume }; -static int major; - static int __init msb_init(void) { - int rc = register_blkdev(0, DRIVER_NAME); - - if (rc < 0) { - pr_err("failed to register major (error %d)\n", rc); - return rc; - } - - major = rc; - rc = memstick_register_driver(&msb_driver); - if (rc) { - unregister_blkdev(major, DRIVER_NAME); + int rc = memstick_register_driver(&msb_driver); + if (rc) pr_err("failed to register memstick driver (error %d)\n", rc); - } return rc; } @@ -2362,7 +2350,6 @@ static int __init msb_init(void) static void __exit msb_exit(void) { memstick_unregister_driver(&msb_driver); - unregister_blkdev(major, DRIVER_NAME); idr_destroy(&msb_disk_idr); } diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 4cf8f82cfca2..a70b853fa2c9 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -182,7 +182,7 @@ static void genwqe_dev_free(struct genwqe_dev *cd) */ static int genwqe_bus_reset(struct genwqe_dev *cd) { - int bars, rc = 0; + int rc = 0; struct pci_dev *pci_dev = cd->pci_dev; void __iomem *mmio; @@ -193,8 +193,7 @@ static int genwqe_bus_reset(struct genwqe_dev *cd) cd->mmio = NULL; pci_iounmap(pci_dev, mmio); - bars = pci_select_bars(pci_dev, IORESOURCE_MEM); - pci_release_selected_regions(pci_dev, bars); + pci_release_mem_regions(pci_dev); /* * Firmware/BIOS might change memory mapping during bus reset. @@ -218,7 +217,7 @@ static int genwqe_bus_reset(struct genwqe_dev *cd) GENWQE_INJECT_GFIR_FATAL | GENWQE_INJECT_GFIR_INFO); - rc = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name); + rc = pci_request_mem_regions(pci_dev, genwqe_driver_name); if (rc) { dev_err(&pci_dev->dev, "[%s] err: request bars failed (%d)\n", __func__, rc); @@ -1068,10 +1067,9 @@ static int genwqe_health_check_stop(struct genwqe_dev *cd) */ static int genwqe_pci_setup(struct genwqe_dev *cd) { - int err, bars; + int err; struct pci_dev *pci_dev = cd->pci_dev; - bars = pci_select_bars(pci_dev, IORESOURCE_MEM); err = pci_enable_device_mem(pci_dev); if (err) { dev_err(&pci_dev->dev, @@ -1080,7 +1078,7 @@ static int genwqe_pci_setup(struct genwqe_dev *cd) } /* Reserve PCI I/O and memory resources */ - err = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name); + err = pci_request_mem_regions(pci_dev, genwqe_driver_name); if (err) { dev_err(&pci_dev->dev, "[%s] err: request bars failed (%d)\n", __func__, err); @@ -1142,7 +1140,7 @@ static int genwqe_pci_setup(struct genwqe_dev *cd) out_iounmap: pci_iounmap(pci_dev, cd->mmio); out_release_resources: - pci_release_selected_regions(pci_dev, bars); + pci_release_mem_regions(pci_dev); err_disable_device: pci_disable_device(pci_dev); err_out: @@ -1154,14 +1152,12 @@ static int genwqe_pci_setup(struct genwqe_dev *cd) */ static void genwqe_pci_remove(struct genwqe_dev *cd) { - int bars; struct pci_dev *pci_dev = cd->pci_dev; if (cd->mmio) pci_iounmap(pci_dev, cd->mmio); - bars = pci_select_bars(pci_dev, IORESOURCE_MEM); - pci_release_selected_regions(pci_dev, bars); + pci_release_mem_regions(pci_dev); pci_disable_device(pci_dev); } diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 89e5917e1c33..6fd9d367dea7 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -146,3 +146,7 @@ config VOP More information about the Intel MIC family as well as the Linux OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. + +if VOP +source "drivers/vhost/Kconfig.vringh" +endif diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index e047efd83f57..9599d732aff3 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -38,7 +38,7 @@ static inline struct mic_device *vpdev_to_mdev(struct device *dev) static dma_addr_t _mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { void *va = phys_to_virt(page_to_phys(page)) + offset; struct mic_device *mdev = vpdev_to_mdev(dev); @@ -48,7 +48,7 @@ _mic_dma_map_page(struct device *dev, struct page *page, static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct mic_device *mdev = vpdev_to_mdev(dev); @@ -144,7 +144,7 @@ static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev) static void *__mic_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -164,7 +164,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size, } static void __mic_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -176,7 +176,7 @@ static void __mic_dma_free(struct device *dev, size_t size, void *vaddr, static dma_addr_t __mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *va = phys_to_virt(page_to_phys(page)) + offset; struct scif_hw_dev *scdev = dev_get_drvdata(dev); @@ -188,7 +188,7 @@ __mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, static void __mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -198,7 +198,7 @@ __mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -229,7 +229,7 @@ err: static void __mic_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -327,7 +327,7 @@ static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev) static dma_addr_t mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *va = phys_to_virt(page_to_phys(page)) + offset; struct mic_device *mdev = dev_get_drvdata(dev->parent); @@ -338,7 +338,7 @@ mic_dma_map_page(struct device *dev, struct page *page, static void mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct mic_device *mdev = dev_get_drvdata(dev->parent); mic_unmap_single(mdev, dma_addr, size); diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index 845dd27d9f41..377947580203 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -122,7 +122,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, for (offset = 0; offset <= master->size - blocksize; offset += blocksize) { /* Nothing more in higher memory on BCM47XX (MIPS) */ - if (config_enabled(CONFIG_BCM47XX) && offset >= 0x2000000) + if (IS_ENABLED(CONFIG_BCM47XX) && offset >= 0x2000000) break; if (curr_part >= BCM47XXPART_MAX_PARTS) { diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c index 9a1a6ffd16b8..94d3eb42c4d5 100644 --- a/drivers/mtd/chips/cfi_cmdset_0020.c +++ b/drivers/mtd/chips/cfi_cmdset_0020.c @@ -416,7 +416,7 @@ static int cfi_staa_read (struct mtd_info *mtd, loff_t from, size_t len, size_t return ret; } -static inline int do_write_buffer(struct map_info *map, struct flchip *chip, +static int do_write_buffer(struct map_info *map, struct flchip *chip, unsigned long adr, const u_char *buf, int len) { struct cfi_private *cfi = map->fldrv_priv; diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 64a248556d29..58329d2dacd1 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -113,12 +113,12 @@ config MTD_SST25L if you want to specify device partitioning. config MTD_BCM47XXSFLASH - tristate "R/O support for serial flash on BCMA bus" + tristate "Support for serial flash on BCMA bus" depends on BCMA_SFLASH && (MIPS || ARM) help BCMA bus can have various flash memories attached, they are registered by bcma as platform devices. This enables driver for - serial flash memories (only read-only mode is implemented). + serial flash memories. config MTD_SLRAM tristate "Uncached system RAM" @@ -171,18 +171,6 @@ config MTDRAM_ERASE_SIZE as a module, it is also possible to specify this as a parameter when loading the module. -#If not a module (I don't want to test it as a module) -config MTDRAM_ABS_POS - hex "SRAM Hexadecimal Absolute position or 0" - depends on MTD_MTDRAM=y - default "0" - help - If you have system RAM accessible by the CPU but not used by Linux - in normal operation, you can give the physical address at which the - available RAM starts, and the MTDRAM driver will use it instead of - allocating space from Linux's available memory. Otherwise, leave - this set to zero. Most people will want to leave this as zero. - config MTD_BLOCK2MTD tristate "MTD using block device" depends on BLOCK diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 9d6854467651..9cf7fcd28034 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -73,14 +73,15 @@ static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) return spi_write(spi, flash->command, len + 1); } -static void m25p80_write(struct spi_nor *nor, loff_t to, size_t len, - size_t *retlen, const u_char *buf) +static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len, + const u_char *buf) { struct m25p *flash = nor->priv; struct spi_device *spi = flash->spi; struct spi_transfer t[2] = {}; struct spi_message m; int cmd_sz = m25p_cmdsz(nor); + ssize_t ret; spi_message_init(&m); @@ -98,9 +99,14 @@ static void m25p80_write(struct spi_nor *nor, loff_t to, size_t len, t[1].len = len; spi_message_add_tail(&t[1], &m); - spi_sync(spi, &m); + ret = spi_sync(spi, &m); + if (ret) + return ret; - *retlen += m.actual_length - cmd_sz; + ret = m.actual_length - cmd_sz; + if (ret < 0) + return -EIO; + return ret; } static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor) @@ -119,21 +125,21 @@ static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor) * Read an address range from the nor chip. The address range * may be any size provided it is within the physical boundaries. */ -static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len, - size_t *retlen, u_char *buf) +static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len, + u_char *buf) { struct m25p *flash = nor->priv; struct spi_device *spi = flash->spi; struct spi_transfer t[2]; struct spi_message m; unsigned int dummy = nor->read_dummy; + ssize_t ret; /* convert the dummy cycles to the number of bytes */ dummy /= 8; if (spi_flash_read_supported(spi)) { struct spi_flash_read_message msg; - int ret; memset(&msg, 0, sizeof(msg)); @@ -149,8 +155,9 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len, msg.data_nbits = m25p80_rx_nbits(nor); ret = spi_flash_read(spi, &msg); - *retlen = msg.retlen; - return ret; + if (ret < 0) + return ret; + return msg.retlen; } spi_message_init(&m); @@ -165,13 +172,17 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len, t[1].rx_buf = buf; t[1].rx_nbits = m25p80_rx_nbits(nor); - t[1].len = len; + t[1].len = min(len, spi_max_transfer_size(spi)); spi_message_add_tail(&t[1], &m); - spi_sync(spi, &m); + ret = spi_sync(spi, &m); + if (ret) + return ret; - *retlen = m.actual_length - m25p_cmdsz(nor) - dummy; - return 0; + ret = m.actual_length - m25p_cmdsz(nor) - dummy; + if (ret < 0) + return -EIO; + return ret; } /* diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 22f3858c0364..3fad35942895 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -186,7 +186,7 @@ static int of_flash_probe(struct platform_device *dev) * consists internally of 2 non-identical NOR chips on one die. */ p = of_get_property(dp, "reg", &count); - if (count % reg_tuple_size != 0) { + if (!p || count % reg_tuple_size != 0) { dev_err(&dev->dev, "Malformed reg property on %s\n", dev->dev.of_node->full_name); err = -EINVAL; diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c index 744ca5cacc9b..f9fa3fad728e 100644 --- a/drivers/mtd/maps/pmcmsp-flash.c +++ b/drivers/mtd/maps/pmcmsp-flash.c @@ -75,15 +75,15 @@ static int __init init_msp_flash(void) printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt); - msp_flash = kmalloc(fcnt * sizeof(struct map_info *), GFP_KERNEL); + msp_flash = kcalloc(fcnt, sizeof(*msp_flash), GFP_KERNEL); if (!msp_flash) return -ENOMEM; - msp_parts = kmalloc(fcnt * sizeof(struct mtd_partition *), GFP_KERNEL); + msp_parts = kcalloc(fcnt, sizeof(*msp_parts), GFP_KERNEL); if (!msp_parts) goto free_msp_flash; - msp_maps = kcalloc(fcnt, sizeof(struct mtd_info), GFP_KERNEL); + msp_maps = kcalloc(fcnt, sizeof(*msp_maps), GFP_KERNEL); if (!msp_maps) goto free_msp_parts; diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c index 142fc3d79463..784c6e1a0391 100644 --- a/drivers/mtd/maps/sa1100-flash.c +++ b/drivers/mtd/maps/sa1100-flash.c @@ -230,8 +230,10 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev, info->mtd = mtd_concat_create(cdev, info->num_subdev, plat->name); - if (info->mtd == NULL) + if (info->mtd == NULL) { ret = -ENXIO; + goto err; + } } info->mtd->dev.parent = &pdev->dev; diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index f05e0e9eb2f7..21ff58099f3b 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -438,7 +438,7 @@ config MTD_NAND_FSL_ELBC config MTD_NAND_FSL_IFC tristate "NAND support for Freescale IFC controller" - depends on MTD_NAND && FSL_SOC + depends on MTD_NAND && (FSL_SOC || ARCH_LAYERSCAPE) select FSL_IFC select MEMORY help @@ -539,7 +539,6 @@ config MTD_NAND_FSMC config MTD_NAND_XWAY tristate "Support for NAND on Lantiq XWAY SoC" depends on LANTIQ && SOC_TYPE_XWAY - select MTD_NAND_PLATFORM help Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached to the External Bus Unit (EBU). @@ -563,4 +562,11 @@ config MTD_NAND_QCOM Enables support for NAND flash chips on SoCs containing the EBI2 NAND controller. This controller is found on IPQ806x SoC. +config MTD_NAND_MTK + tristate "Support for NAND controller on MTK SoCs" + depends on HAS_DMA + help + Enables support for NAND controller on MTK SoCs. + This controller is found on mt27xx, mt81xx, mt65xx SoCs. + endif # MTD_NAND diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index f55335373f7c..cafde6f3d957 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -57,5 +57,6 @@ obj-$(CONFIG_MTD_NAND_SUNXI) += sunxi_nand.o obj-$(CONFIG_MTD_NAND_HISI504) += hisi504_nand.o obj-$(CONFIG_MTD_NAND_BRCMNAND) += brcmnand/ obj-$(CONFIG_MTD_NAND_QCOM) += qcom_nandc.o +obj-$(CONFIG_MTD_NAND_MTK) += mtk_nand.o mtk_ecc.o nand-objs := nand_base.o nand_bbt.o nand_timings.o diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index b76ad7c0144f..8eb2c64df38c 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -340,6 +340,36 @@ static const u16 brcmnand_regs_v71[] = { [BRCMNAND_FC_BASE] = 0x400, }; +/* BRCMNAND v7.2 */ +static const u16 brcmnand_regs_v72[] = { + [BRCMNAND_CMD_START] = 0x04, + [BRCMNAND_CMD_EXT_ADDRESS] = 0x08, + [BRCMNAND_CMD_ADDRESS] = 0x0c, + [BRCMNAND_INTFC_STATUS] = 0x14, + [BRCMNAND_CS_SELECT] = 0x18, + [BRCMNAND_CS_XOR] = 0x1c, + [BRCMNAND_LL_OP] = 0x20, + [BRCMNAND_CS0_BASE] = 0x50, + [BRCMNAND_CS1_BASE] = 0, + [BRCMNAND_CORR_THRESHOLD] = 0xdc, + [BRCMNAND_CORR_THRESHOLD_EXT] = 0xe0, + [BRCMNAND_UNCORR_COUNT] = 0xfc, + [BRCMNAND_CORR_COUNT] = 0x100, + [BRCMNAND_CORR_EXT_ADDR] = 0x10c, + [BRCMNAND_CORR_ADDR] = 0x110, + [BRCMNAND_UNCORR_EXT_ADDR] = 0x114, + [BRCMNAND_UNCORR_ADDR] = 0x118, + [BRCMNAND_SEMAPHORE] = 0x150, + [BRCMNAND_ID] = 0x194, + [BRCMNAND_ID_EXT] = 0x198, + [BRCMNAND_LL_RDATA] = 0x19c, + [BRCMNAND_OOB_READ_BASE] = 0x200, + [BRCMNAND_OOB_READ_10_BASE] = 0, + [BRCMNAND_OOB_WRITE_BASE] = 0x400, + [BRCMNAND_OOB_WRITE_10_BASE] = 0, + [BRCMNAND_FC_BASE] = 0x600, +}; + enum brcmnand_cs_reg { BRCMNAND_CS_CFG_EXT = 0, BRCMNAND_CS_CFG, @@ -435,7 +465,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl) } /* Register offsets */ - if (ctrl->nand_version >= 0x0701) + if (ctrl->nand_version >= 0x0702) + ctrl->reg_offsets = brcmnand_regs_v72; + else if (ctrl->nand_version >= 0x0701) ctrl->reg_offsets = brcmnand_regs_v71; else if (ctrl->nand_version >= 0x0600) ctrl->reg_offsets = brcmnand_regs_v60; @@ -480,7 +512,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl) } /* Maximum spare area sector size (per 512B) */ - if (ctrl->nand_version >= 0x0600) + if (ctrl->nand_version >= 0x0702) + ctrl->max_oob = 128; + else if (ctrl->nand_version >= 0x0600) ctrl->max_oob = 64; else if (ctrl->nand_version >= 0x0500) ctrl->max_oob = 32; @@ -583,14 +617,20 @@ static void brcmnand_wr_corr_thresh(struct brcmnand_host *host, u8 val) enum brcmnand_reg reg = BRCMNAND_CORR_THRESHOLD; int cs = host->cs; - if (ctrl->nand_version >= 0x0600) + if (ctrl->nand_version >= 0x0702) + bits = 7; + else if (ctrl->nand_version >= 0x0600) bits = 6; else if (ctrl->nand_version >= 0x0500) bits = 5; else bits = 4; - if (ctrl->nand_version >= 0x0600) { + if (ctrl->nand_version >= 0x0702) { + if (cs >= 4) + reg = BRCMNAND_CORR_THRESHOLD_EXT; + shift = (cs % 4) * bits; + } else if (ctrl->nand_version >= 0x0600) { if (cs >= 5) reg = BRCMNAND_CORR_THRESHOLD_EXT; shift = (cs % 5) * bits; @@ -631,19 +671,28 @@ enum { static inline u32 brcmnand_spare_area_mask(struct brcmnand_controller *ctrl) { - if (ctrl->nand_version >= 0x0600) + if (ctrl->nand_version >= 0x0702) + return GENMASK(7, 0); + else if (ctrl->nand_version >= 0x0600) return GENMASK(6, 0); else return GENMASK(5, 0); } #define NAND_ACC_CONTROL_ECC_SHIFT 16 +#define NAND_ACC_CONTROL_ECC_EXT_SHIFT 13 static inline u32 brcmnand_ecc_level_mask(struct brcmnand_controller *ctrl) { u32 mask = (ctrl->nand_version >= 0x0600) ? 0x1f : 0x0f; - return mask << NAND_ACC_CONTROL_ECC_SHIFT; + mask <<= NAND_ACC_CONTROL_ECC_SHIFT; + + /* v7.2 includes additional ECC levels */ + if (ctrl->nand_version >= 0x0702) + mask |= 0x7 << NAND_ACC_CONTROL_ECC_EXT_SHIFT; + + return mask; } static void brcmnand_set_ecc_enabled(struct brcmnand_host *host, int en) @@ -667,7 +716,9 @@ static void brcmnand_set_ecc_enabled(struct brcmnand_host *host, int en) static inline int brcmnand_sector_1k_shift(struct brcmnand_controller *ctrl) { - if (ctrl->nand_version >= 0x0600) + if (ctrl->nand_version >= 0x0702) + return 9; + else if (ctrl->nand_version >= 0x0600) return 7; else if (ctrl->nand_version >= 0x0500) return 6; @@ -773,10 +824,16 @@ enum brcmnand_llop_type { * Internal support functions ***********************************************************************/ -static inline bool is_hamming_ecc(struct brcmnand_cfg *cfg) +static inline bool is_hamming_ecc(struct brcmnand_controller *ctrl, + struct brcmnand_cfg *cfg) { - return cfg->sector_size_1k == 0 && cfg->spare_area_size == 16 && - cfg->ecc_level == 15; + if (ctrl->nand_version <= 0x0701) + return cfg->sector_size_1k == 0 && cfg->spare_area_size == 16 && + cfg->ecc_level == 15; + else + return cfg->sector_size_1k == 0 && ((cfg->spare_area_size == 16 && + cfg->ecc_level == 15) || + (cfg->spare_area_size == 28 && cfg->ecc_level == 16)); } /* @@ -931,7 +988,7 @@ static int brcmstb_choose_ecc_layout(struct brcmnand_host *host) if (p->sector_size_1k) ecc_level <<= 1; - if (is_hamming_ecc(p)) { + if (is_hamming_ecc(host->ctrl, p)) { ecc->bytes = 3 * sectors; mtd_set_ooblayout(mtd, &brcmnand_hamming_ooblayout_ops); return 0; @@ -1108,7 +1165,7 @@ static void brcmnand_send_cmd(struct brcmnand_host *host, int cmd) ctrl->cmd_pending = cmd; intfc = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS); - BUG_ON(!(intfc & INTFC_CTLR_READY)); + WARN_ON(!(intfc & INTFC_CTLR_READY)); mb(); /* flush previous writes */ brcmnand_write_reg(ctrl, BRCMNAND_CMD_START, @@ -1545,6 +1602,56 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip, return ret; } +/* + * Check a page to see if it is erased (w/ bitflips) after an uncorrectable ECC + * error + * + * Because the HW ECC signals an ECC error if an erase paged has even a single + * bitflip, we must check each ECC error to see if it is actually an erased + * page with bitflips, not a truly corrupted page. + * + * On a real error, return a negative error code (-EBADMSG for ECC error), and + * buf will contain raw data. + * Otherwise, buf gets filled with 0xffs and return the maximum number of + * bitflips-per-ECC-sector to the caller. + * + */ +static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd, + struct nand_chip *chip, void *buf, u64 addr) +{ + int i, sas; + void *oob = chip->oob_poi; + int bitflips = 0; + int page = addr >> chip->page_shift; + int ret; + + if (!buf) { + buf = chip->buffers->databuf; + /* Invalidate page cache */ + chip->pagebuf = -1; + } + + sas = mtd->oobsize / chip->ecc.steps; + + /* read without ecc for verification */ + chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); + ret = chip->ecc.read_page_raw(mtd, chip, buf, true, page); + if (ret) + return ret; + + for (i = 0; i < chip->ecc.steps; i++, oob += sas) { + ret = nand_check_erased_ecc_chunk(buf, chip->ecc.size, + oob, sas, NULL, 0, + chip->ecc.strength); + if (ret < 0) + return ret; + + bitflips = max(bitflips, ret); + } + + return bitflips; +} + static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip, u64 addr, unsigned int trans, u32 *buf, u8 *oob) { @@ -1552,9 +1659,11 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip, struct brcmnand_controller *ctrl = host->ctrl; u64 err_addr = 0; int err; + bool retry = true; dev_dbg(ctrl->dev, "read %llx -> %p\n", (unsigned long long)addr, buf); +try_dmaread: brcmnand_write_reg(ctrl, BRCMNAND_UNCORR_COUNT, 0); if (has_flash_dma(ctrl) && !oob && flash_dma_buf_ok(buf)) { @@ -1575,6 +1684,34 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip, } if (mtd_is_eccerr(err)) { + /* + * On controller version and 7.0, 7.1 , DMA read after a + * prior PIO read that reported uncorrectable error, + * the DMA engine captures this error following DMA read + * cleared only on subsequent DMA read, so just retry once + * to clear a possible false error reported for current DMA + * read + */ + if ((ctrl->nand_version == 0x0700) || + (ctrl->nand_version == 0x0701)) { + if (retry) { + retry = false; + goto try_dmaread; + } + } + + /* + * Controller version 7.2 has hw encoder to detect erased page + * bitflips, apply sw verification for older controllers only + */ + if (ctrl->nand_version < 0x0702) { + err = brcmstb_nand_verify_erased_page(mtd, chip, buf, + addr); + /* erased page bitflips corrected */ + if (err > 0) + return err; + } + dev_dbg(ctrl->dev, "uncorrectable error at 0x%llx\n", (unsigned long long)err_addr); mtd->ecc_stats.failed++; @@ -1857,7 +1994,8 @@ static int brcmnand_set_cfg(struct brcmnand_host *host, return 0; } -static void brcmnand_print_cfg(char *buf, struct brcmnand_cfg *cfg) +static void brcmnand_print_cfg(struct brcmnand_host *host, + char *buf, struct brcmnand_cfg *cfg) { buf += sprintf(buf, "%lluMiB total, %uKiB blocks, %u%s pages, %uB OOB, %u-bit", @@ -1868,7 +2006,7 @@ static void brcmnand_print_cfg(char *buf, struct brcmnand_cfg *cfg) cfg->spare_area_size, cfg->device_width); /* Account for Hamming ECC and for BCH 512B vs 1KiB sectors */ - if (is_hamming_ecc(cfg)) + if (is_hamming_ecc(host->ctrl, cfg)) sprintf(buf, ", Hamming ECC"); else if (cfg->sector_size_1k) sprintf(buf, ", BCH-%u (1KiB sector)", cfg->ecc_level << 1); @@ -1987,7 +2125,7 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) brcmnand_set_ecc_enabled(host, 1); - brcmnand_print_cfg(msg, cfg); + brcmnand_print_cfg(host, msg, cfg); dev_info(ctrl->dev, "detected %s\n", msg); /* Configure ACC_CONTROL */ @@ -1995,6 +2133,10 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) tmp = nand_readreg(ctrl, offs); tmp &= ~ACC_CONTROL_PARTIAL_PAGE; tmp &= ~ACC_CONTROL_RD_ERASED; + + /* We need to turn on Read from erased paged protected by ECC */ + if (ctrl->nand_version >= 0x0702) + tmp |= ACC_CONTROL_RD_ERASED; tmp &= ~ACC_CONTROL_FAST_PGM_RDIN; if (ctrl->features & BRCMNAND_HAS_PREFETCH) { /* @@ -2195,6 +2337,7 @@ static const struct of_device_id brcmnand_of_match[] = { { .compatible = "brcm,brcmnand-v6.2" }, { .compatible = "brcm,brcmnand-v7.0" }, { .compatible = "brcm,brcmnand-v7.1" }, + { .compatible = "brcm,brcmnand-v7.2" }, {}, }; MODULE_DEVICE_TABLE(of, brcmnand_of_match); diff --git a/drivers/mtd/nand/jz4780_bch.c b/drivers/mtd/nand/jz4780_bch.c index d74f4ba4a6f4..731c6051d91e 100644 --- a/drivers/mtd/nand/jz4780_bch.c +++ b/drivers/mtd/nand/jz4780_bch.c @@ -375,6 +375,6 @@ static struct platform_driver jz4780_bch_driver = { module_platform_driver(jz4780_bch_driver); MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>"); -MODULE_AUTHOR("Harvey Hunt <harvey.hunt@imgtec.com>"); +MODULE_AUTHOR("Harvey Hunt <harveyhuntnexus@gmail.com>"); MODULE_DESCRIPTION("Ingenic JZ4780 BCH error correction driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/nand/jz4780_nand.c b/drivers/mtd/nand/jz4780_nand.c index daf3c4217f4d..175f67da25af 100644 --- a/drivers/mtd/nand/jz4780_nand.c +++ b/drivers/mtd/nand/jz4780_nand.c @@ -412,6 +412,6 @@ static struct platform_driver jz4780_nand_driver = { module_platform_driver(jz4780_nand_driver); MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>"); -MODULE_AUTHOR("Harvey Hunt <harvey.hunt@imgtec.com>"); +MODULE_AUTHOR("Harvey Hunt <harveyhuntnexus@gmail.com>"); MODULE_DESCRIPTION("Ingenic JZ4780 NAND driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c new file mode 100644 index 000000000000..25a4fbd4d24a --- /dev/null +++ b/drivers/mtd/nand/mtk_ecc.c @@ -0,0 +1,530 @@ +/* + * MTK ECC controller driver. + * Copyright (C) 2016 MediaTek Inc. + * Authors: Xiaolei Li <xiaolei.li@mediatek.com> + * Jorge Ramirez-Ortiz <jorge.ramirez-ortiz@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/iopoll.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/mutex.h> + +#include "mtk_ecc.h" + +#define ECC_IDLE_MASK BIT(0) +#define ECC_IRQ_EN BIT(0) +#define ECC_OP_ENABLE (1) +#define ECC_OP_DISABLE (0) + +#define ECC_ENCCON (0x00) +#define ECC_ENCCNFG (0x04) +#define ECC_CNFG_4BIT (0) +#define ECC_CNFG_6BIT (1) +#define ECC_CNFG_8BIT (2) +#define ECC_CNFG_10BIT (3) +#define ECC_CNFG_12BIT (4) +#define ECC_CNFG_14BIT (5) +#define ECC_CNFG_16BIT (6) +#define ECC_CNFG_18BIT (7) +#define ECC_CNFG_20BIT (8) +#define ECC_CNFG_22BIT (9) +#define ECC_CNFG_24BIT (0xa) +#define ECC_CNFG_28BIT (0xb) +#define ECC_CNFG_32BIT (0xc) +#define ECC_CNFG_36BIT (0xd) +#define ECC_CNFG_40BIT (0xe) +#define ECC_CNFG_44BIT (0xf) +#define ECC_CNFG_48BIT (0x10) +#define ECC_CNFG_52BIT (0x11) +#define ECC_CNFG_56BIT (0x12) +#define ECC_CNFG_60BIT (0x13) +#define ECC_MODE_SHIFT (5) +#define ECC_MS_SHIFT (16) +#define ECC_ENCDIADDR (0x08) +#define ECC_ENCIDLE (0x0C) +#define ECC_ENCPAR(x) (0x10 + (x) * sizeof(u32)) +#define ECC_ENCIRQ_EN (0x80) +#define ECC_ENCIRQ_STA (0x84) +#define ECC_DECCON (0x100) +#define ECC_DECCNFG (0x104) +#define DEC_EMPTY_EN BIT(31) +#define DEC_CNFG_CORRECT (0x3 << 12) +#define ECC_DECIDLE (0x10C) +#define ECC_DECENUM0 (0x114) +#define ERR_MASK (0x3f) +#define ECC_DECDONE (0x124) +#define ECC_DECIRQ_EN (0x200) +#define ECC_DECIRQ_STA (0x204) + +#define ECC_TIMEOUT (500000) + +#define ECC_IDLE_REG(op) ((op) == ECC_ENCODE ? ECC_ENCIDLE : ECC_DECIDLE) +#define ECC_CTL_REG(op) ((op) == ECC_ENCODE ? ECC_ENCCON : ECC_DECCON) +#define ECC_IRQ_REG(op) ((op) == ECC_ENCODE ? \ + ECC_ENCIRQ_EN : ECC_DECIRQ_EN) + +struct mtk_ecc { + struct device *dev; + void __iomem *regs; + struct clk *clk; + + struct completion done; + struct mutex lock; + u32 sectors; +}; + +static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc, + enum mtk_ecc_operation op) +{ + struct device *dev = ecc->dev; + u32 val; + int ret; + + ret = readl_poll_timeout_atomic(ecc->regs + ECC_IDLE_REG(op), val, + val & ECC_IDLE_MASK, + 10, ECC_TIMEOUT); + if (ret) + dev_warn(dev, "%s NOT idle\n", + op == ECC_ENCODE ? "encoder" : "decoder"); +} + +static irqreturn_t mtk_ecc_irq(int irq, void *id) +{ + struct mtk_ecc *ecc = id; + enum mtk_ecc_operation op; + u32 dec, enc; + + dec = readw(ecc->regs + ECC_DECIRQ_STA) & ECC_IRQ_EN; + if (dec) { + op = ECC_DECODE; + dec = readw(ecc->regs + ECC_DECDONE); + if (dec & ecc->sectors) { + ecc->sectors = 0; + complete(&ecc->done); + } else { + return IRQ_HANDLED; + } + } else { + enc = readl(ecc->regs + ECC_ENCIRQ_STA) & ECC_IRQ_EN; + if (enc) { + op = ECC_ENCODE; + complete(&ecc->done); + } else { + return IRQ_NONE; + } + } + + writel(0, ecc->regs + ECC_IRQ_REG(op)); + + return IRQ_HANDLED; +} + +static void mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config) +{ + u32 ecc_bit = ECC_CNFG_4BIT, dec_sz, enc_sz; + u32 reg; + + switch (config->strength) { + case 4: + ecc_bit = ECC_CNFG_4BIT; + break; + case 6: + ecc_bit = ECC_CNFG_6BIT; + break; + case 8: + ecc_bit = ECC_CNFG_8BIT; + break; + case 10: + ecc_bit = ECC_CNFG_10BIT; + break; + case 12: + ecc_bit = ECC_CNFG_12BIT; + break; + case 14: + ecc_bit = ECC_CNFG_14BIT; + break; + case 16: + ecc_bit = ECC_CNFG_16BIT; + break; + case 18: + ecc_bit = ECC_CNFG_18BIT; + break; + case 20: + ecc_bit = ECC_CNFG_20BIT; + break; + case 22: + ecc_bit = ECC_CNFG_22BIT; + break; + case 24: + ecc_bit = ECC_CNFG_24BIT; + break; + case 28: + ecc_bit = ECC_CNFG_28BIT; + break; + case 32: + ecc_bit = ECC_CNFG_32BIT; + break; + case 36: + ecc_bit = ECC_CNFG_36BIT; + break; + case 40: + ecc_bit = ECC_CNFG_40BIT; + break; + case 44: + ecc_bit = ECC_CNFG_44BIT; + break; + case 48: + ecc_bit = ECC_CNFG_48BIT; + break; + case 52: + ecc_bit = ECC_CNFG_52BIT; + break; + case 56: + ecc_bit = ECC_CNFG_56BIT; + break; + case 60: + ecc_bit = ECC_CNFG_60BIT; + break; + default: + dev_err(ecc->dev, "invalid strength %d, default to 4 bits\n", + config->strength); + } + + if (config->op == ECC_ENCODE) { + /* configure ECC encoder (in bits) */ + enc_sz = config->len << 3; + + reg = ecc_bit | (config->mode << ECC_MODE_SHIFT); + reg |= (enc_sz << ECC_MS_SHIFT); + writel(reg, ecc->regs + ECC_ENCCNFG); + + if (config->mode != ECC_NFI_MODE) + writel(lower_32_bits(config->addr), + ecc->regs + ECC_ENCDIADDR); + + } else { + /* configure ECC decoder (in bits) */ + dec_sz = (config->len << 3) + + config->strength * ECC_PARITY_BITS; + + reg = ecc_bit | (config->mode << ECC_MODE_SHIFT); + reg |= (dec_sz << ECC_MS_SHIFT) | DEC_CNFG_CORRECT; + reg |= DEC_EMPTY_EN; + writel(reg, ecc->regs + ECC_DECCNFG); + + if (config->sectors) + ecc->sectors = 1 << (config->sectors - 1); + } +} + +void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats, + int sectors) +{ + u32 offset, i, err; + u32 bitflips = 0; + + stats->corrected = 0; + stats->failed = 0; + + for (i = 0; i < sectors; i++) { + offset = (i >> 2) << 2; + err = readl(ecc->regs + ECC_DECENUM0 + offset); + err = err >> ((i % 4) * 8); + err &= ERR_MASK; + if (err == ERR_MASK) { + /* uncorrectable errors */ + stats->failed++; + continue; + } + + stats->corrected += err; + bitflips = max_t(u32, bitflips, err); + } + + stats->bitflips = bitflips; +} +EXPORT_SYMBOL(mtk_ecc_get_stats); + +void mtk_ecc_release(struct mtk_ecc *ecc) +{ + clk_disable_unprepare(ecc->clk); + put_device(ecc->dev); +} +EXPORT_SYMBOL(mtk_ecc_release); + +static void mtk_ecc_hw_init(struct mtk_ecc *ecc) +{ + mtk_ecc_wait_idle(ecc, ECC_ENCODE); + writew(ECC_OP_DISABLE, ecc->regs + ECC_ENCCON); + + mtk_ecc_wait_idle(ecc, ECC_DECODE); + writel(ECC_OP_DISABLE, ecc->regs + ECC_DECCON); +} + +static struct mtk_ecc *mtk_ecc_get(struct device_node *np) +{ + struct platform_device *pdev; + struct mtk_ecc *ecc; + + pdev = of_find_device_by_node(np); + if (!pdev || !platform_get_drvdata(pdev)) + return ERR_PTR(-EPROBE_DEFER); + + get_device(&pdev->dev); + ecc = platform_get_drvdata(pdev); + clk_prepare_enable(ecc->clk); + mtk_ecc_hw_init(ecc); + + return ecc; +} + +struct mtk_ecc *of_mtk_ecc_get(struct device_node *of_node) +{ + struct mtk_ecc *ecc = NULL; + struct device_node *np; + + np = of_parse_phandle(of_node, "ecc-engine", 0); + if (np) { + ecc = mtk_ecc_get(np); + of_node_put(np); + } + + return ecc; +} +EXPORT_SYMBOL(of_mtk_ecc_get); + +int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config) +{ + enum mtk_ecc_operation op = config->op; + int ret; + + ret = mutex_lock_interruptible(&ecc->lock); + if (ret) { + dev_err(ecc->dev, "interrupted when attempting to lock\n"); + return ret; + } + + mtk_ecc_wait_idle(ecc, op); + mtk_ecc_config(ecc, config); + writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op)); + + init_completion(&ecc->done); + writew(ECC_IRQ_EN, ecc->regs + ECC_IRQ_REG(op)); + + return 0; +} +EXPORT_SYMBOL(mtk_ecc_enable); + +void mtk_ecc_disable(struct mtk_ecc *ecc) +{ + enum mtk_ecc_operation op = ECC_ENCODE; + + /* find out the running operation */ + if (readw(ecc->regs + ECC_CTL_REG(op)) != ECC_OP_ENABLE) + op = ECC_DECODE; + + /* disable it */ + mtk_ecc_wait_idle(ecc, op); + writew(0, ecc->regs + ECC_IRQ_REG(op)); + writew(ECC_OP_DISABLE, ecc->regs + ECC_CTL_REG(op)); + + mutex_unlock(&ecc->lock); +} +EXPORT_SYMBOL(mtk_ecc_disable); + +int mtk_ecc_wait_done(struct mtk_ecc *ecc, enum mtk_ecc_operation op) +{ + int ret; + + ret = wait_for_completion_timeout(&ecc->done, msecs_to_jiffies(500)); + if (!ret) { + dev_err(ecc->dev, "%s timeout - interrupt did not arrive)\n", + (op == ECC_ENCODE) ? "encoder" : "decoder"); + return -ETIMEDOUT; + } + + return 0; +} +EXPORT_SYMBOL(mtk_ecc_wait_done); + +int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, + u8 *data, u32 bytes) +{ + dma_addr_t addr; + u32 *p, len, i; + int ret = 0; + + addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE); + ret = dma_mapping_error(ecc->dev, addr); + if (ret) { + dev_err(ecc->dev, "dma mapping error\n"); + return -EINVAL; + } + + config->op = ECC_ENCODE; + config->addr = addr; + ret = mtk_ecc_enable(ecc, config); + if (ret) { + dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE); + return ret; + } + + ret = mtk_ecc_wait_done(ecc, ECC_ENCODE); + if (ret) + goto timeout; + + mtk_ecc_wait_idle(ecc, ECC_ENCODE); + + /* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */ + len = (config->strength * ECC_PARITY_BITS + 7) >> 3; + p = (u32 *)(data + bytes); + + /* write the parity bytes generated by the ECC back to the OOB region */ + for (i = 0; i < len; i++) + p[i] = readl(ecc->regs + ECC_ENCPAR(i)); +timeout: + + dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE); + mtk_ecc_disable(ecc); + + return ret; +} +EXPORT_SYMBOL(mtk_ecc_encode); + +void mtk_ecc_adjust_strength(u32 *p) +{ + u32 ecc[] = {4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36, + 40, 44, 48, 52, 56, 60}; + int i; + + for (i = 0; i < ARRAY_SIZE(ecc); i++) { + if (*p <= ecc[i]) { + if (!i) + *p = ecc[i]; + else if (*p != ecc[i]) + *p = ecc[i - 1]; + return; + } + } + + *p = ecc[ARRAY_SIZE(ecc) - 1]; +} +EXPORT_SYMBOL(mtk_ecc_adjust_strength); + +static int mtk_ecc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mtk_ecc *ecc; + struct resource *res; + int irq, ret; + + ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL); + if (!ecc) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ecc->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(ecc->regs)) { + dev_err(dev, "failed to map regs: %ld\n", PTR_ERR(ecc->regs)); + return PTR_ERR(ecc->regs); + } + + ecc->clk = devm_clk_get(dev, NULL); + if (IS_ERR(ecc->clk)) { + dev_err(dev, "failed to get clock: %ld\n", PTR_ERR(ecc->clk)); + return PTR_ERR(ecc->clk); + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return -EINVAL; + } + + ret = dma_set_mask(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "failed to set DMA mask\n"); + return ret; + } + + ret = devm_request_irq(dev, irq, mtk_ecc_irq, 0x0, "mtk-ecc", ecc); + if (ret) { + dev_err(dev, "failed to request irq\n"); + return -EINVAL; + } + + ecc->dev = dev; + mutex_init(&ecc->lock); + platform_set_drvdata(pdev, ecc); + dev_info(dev, "probed\n"); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int mtk_ecc_suspend(struct device *dev) +{ + struct mtk_ecc *ecc = dev_get_drvdata(dev); + + clk_disable_unprepare(ecc->clk); + + return 0; +} + +static int mtk_ecc_resume(struct device *dev) +{ + struct mtk_ecc *ecc = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(ecc->clk); + if (ret) { + dev_err(dev, "failed to enable clk\n"); + return ret; + } + + mtk_ecc_hw_init(ecc); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(mtk_ecc_pm_ops, mtk_ecc_suspend, mtk_ecc_resume); +#endif + +static const struct of_device_id mtk_ecc_dt_match[] = { + { .compatible = "mediatek,mt2701-ecc" }, + {}, +}; + +MODULE_DEVICE_TABLE(of, mtk_ecc_dt_match); + +static struct platform_driver mtk_ecc_driver = { + .probe = mtk_ecc_probe, + .driver = { + .name = "mtk-ecc", + .of_match_table = of_match_ptr(mtk_ecc_dt_match), +#ifdef CONFIG_PM_SLEEP + .pm = &mtk_ecc_pm_ops, +#endif + }, +}; + +module_platform_driver(mtk_ecc_driver); + +MODULE_AUTHOR("Xiaolei Li <xiaolei.li@mediatek.com>"); +MODULE_DESCRIPTION("MTK Nand ECC Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mtd/nand/mtk_ecc.h b/drivers/mtd/nand/mtk_ecc.h new file mode 100644 index 000000000000..cbeba5cd1c13 --- /dev/null +++ b/drivers/mtd/nand/mtk_ecc.h @@ -0,0 +1,50 @@ +/* + * MTK SDG1 ECC controller + * + * Copyright (c) 2016 Mediatek + * Authors: Xiaolei Li <xiaolei.li@mediatek.com> + * Jorge Ramirez-Ortiz <jorge.ramirez-ortiz@linaro.org> + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef __DRIVERS_MTD_NAND_MTK_ECC_H__ +#define __DRIVERS_MTD_NAND_MTK_ECC_H__ + +#include <linux/types.h> + +#define ECC_PARITY_BITS (14) + +enum mtk_ecc_mode {ECC_DMA_MODE = 0, ECC_NFI_MODE = 1}; +enum mtk_ecc_operation {ECC_ENCODE, ECC_DECODE}; + +struct device_node; +struct mtk_ecc; + +struct mtk_ecc_stats { + u32 corrected; + u32 bitflips; + u32 failed; +}; + +struct mtk_ecc_config { + enum mtk_ecc_operation op; + enum mtk_ecc_mode mode; + dma_addr_t addr; + u32 strength; + u32 sectors; + u32 len; +}; + +int mtk_ecc_encode(struct mtk_ecc *, struct mtk_ecc_config *, u8 *, u32); +void mtk_ecc_get_stats(struct mtk_ecc *, struct mtk_ecc_stats *, int); +int mtk_ecc_wait_done(struct mtk_ecc *, enum mtk_ecc_operation); +int mtk_ecc_enable(struct mtk_ecc *, struct mtk_ecc_config *); +void mtk_ecc_disable(struct mtk_ecc *); +void mtk_ecc_adjust_strength(u32 *); + +struct mtk_ecc *of_mtk_ecc_get(struct device_node *); +void mtk_ecc_release(struct mtk_ecc *); + +#endif diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c new file mode 100644 index 000000000000..ddaa2acb9dd7 --- /dev/null +++ b/drivers/mtd/nand/mtk_nand.c @@ -0,0 +1,1526 @@ +/* + * MTK NAND Flash controller driver. + * Copyright (C) 2016 MediaTek Inc. + * Authors: Xiaolei Li <xiaolei.li@mediatek.com> + * Jorge Ramirez-Ortiz <jorge.ramirez-ortiz@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/clk.h> +#include <linux/mtd/nand.h> +#include <linux/mtd/mtd.h> +#include <linux/module.h> +#include <linux/iopoll.h> +#include <linux/of.h> +#include "mtk_ecc.h" + +/* NAND controller register definition */ +#define NFI_CNFG (0x00) +#define CNFG_AHB BIT(0) +#define CNFG_READ_EN BIT(1) +#define CNFG_DMA_BURST_EN BIT(2) +#define CNFG_BYTE_RW BIT(6) +#define CNFG_HW_ECC_EN BIT(8) +#define CNFG_AUTO_FMT_EN BIT(9) +#define CNFG_OP_CUST (6 << 12) +#define NFI_PAGEFMT (0x04) +#define PAGEFMT_FDM_ECC_SHIFT (12) +#define PAGEFMT_FDM_SHIFT (8) +#define PAGEFMT_SPARE_16 (0) +#define PAGEFMT_SPARE_26 (1) +#define PAGEFMT_SPARE_27 (2) +#define PAGEFMT_SPARE_28 (3) +#define PAGEFMT_SPARE_32 (4) +#define PAGEFMT_SPARE_36 (5) +#define PAGEFMT_SPARE_40 (6) +#define PAGEFMT_SPARE_44 (7) +#define PAGEFMT_SPARE_48 (8) +#define PAGEFMT_SPARE_49 (9) +#define PAGEFMT_SPARE_50 (0xa) +#define PAGEFMT_SPARE_51 (0xb) +#define PAGEFMT_SPARE_52 (0xc) +#define PAGEFMT_SPARE_62 (0xd) +#define PAGEFMT_SPARE_63 (0xe) +#define PAGEFMT_SPARE_64 (0xf) +#define PAGEFMT_SPARE_SHIFT (4) +#define PAGEFMT_SEC_SEL_512 BIT(2) +#define PAGEFMT_512_2K (0) +#define PAGEFMT_2K_4K (1) +#define PAGEFMT_4K_8K (2) +#define PAGEFMT_8K_16K (3) +/* NFI control */ +#define NFI_CON (0x08) +#define CON_FIFO_FLUSH BIT(0) +#define CON_NFI_RST BIT(1) +#define CON_BRD BIT(8) /* burst read */ +#define CON_BWR BIT(9) /* burst write */ +#define CON_SEC_SHIFT (12) +/* Timming control register */ +#define NFI_ACCCON (0x0C) +#define NFI_INTR_EN (0x10) +#define INTR_AHB_DONE_EN BIT(6) +#define NFI_INTR_STA (0x14) +#define NFI_CMD (0x20) +#define NFI_ADDRNOB (0x30) +#define NFI_COLADDR (0x34) +#define NFI_ROWADDR (0x38) +#define NFI_STRDATA (0x40) +#define STAR_EN (1) +#define STAR_DE (0) +#define NFI_CNRNB (0x44) +#define NFI_DATAW (0x50) +#define NFI_DATAR (0x54) +#define NFI_PIO_DIRDY (0x58) +#define PIO_DI_RDY (0x01) +#define NFI_STA (0x60) +#define STA_CMD BIT(0) +#define STA_ADDR BIT(1) +#define STA_BUSY BIT(8) +#define STA_EMP_PAGE BIT(12) +#define NFI_FSM_CUSTDATA (0xe << 16) +#define NFI_FSM_MASK (0xf << 16) +#define NFI_ADDRCNTR (0x70) +#define CNTR_MASK GENMASK(16, 12) +#define NFI_STRADDR (0x80) +#define NFI_BYTELEN (0x84) +#define NFI_CSEL (0x90) +#define NFI_FDML(x) (0xA0 + (x) * sizeof(u32) * 2) +#define NFI_FDMM(x) (0xA4 + (x) * sizeof(u32) * 2) +#define NFI_FDM_MAX_SIZE (8) +#define NFI_FDM_MIN_SIZE (1) +#define NFI_MASTER_STA (0x224) +#define MASTER_STA_MASK (0x0FFF) +#define NFI_EMPTY_THRESH (0x23C) + +#define MTK_NAME "mtk-nand" +#define KB(x) ((x) * 1024UL) +#define MB(x) (KB(x) * 1024UL) + +#define MTK_TIMEOUT (500000) +#define MTK_RESET_TIMEOUT (1000000) +#define MTK_MAX_SECTOR (16) +#define MTK_NAND_MAX_NSELS (2) + +struct mtk_nfc_bad_mark_ctl { + void (*bm_swap)(struct mtd_info *, u8 *buf, int raw); + u32 sec; + u32 pos; +}; + +/* + * FDM: region used to store free OOB data + */ +struct mtk_nfc_fdm { + u32 reg_size; + u32 ecc_size; +}; + +struct mtk_nfc_nand_chip { + struct list_head node; + struct nand_chip nand; + + struct mtk_nfc_bad_mark_ctl bad_mark; + struct mtk_nfc_fdm fdm; + u32 spare_per_sector; + + int nsels; + u8 sels[0]; + /* nothing after this field */ +}; + +struct mtk_nfc_clk { + struct clk *nfi_clk; + struct clk *pad_clk; +}; + +struct mtk_nfc { + struct nand_hw_control controller; + struct mtk_ecc_config ecc_cfg; + struct mtk_nfc_clk clk; + struct mtk_ecc *ecc; + + struct device *dev; + void __iomem *regs; + + struct completion done; + struct list_head chips; + + u8 *buffer; +}; + +static inline struct mtk_nfc_nand_chip *to_mtk_nand(struct nand_chip *nand) +{ + return container_of(nand, struct mtk_nfc_nand_chip, nand); +} + +static inline u8 *data_ptr(struct nand_chip *chip, const u8 *p, int i) +{ + return (u8 *)p + i * chip->ecc.size; +} + +static inline u8 *oob_ptr(struct nand_chip *chip, int i) +{ + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + u8 *poi; + + /* map the sector's FDM data to free oob: + * the beginning of the oob area stores the FDM data of bad mark sectors + */ + + if (i < mtk_nand->bad_mark.sec) + poi = chip->oob_poi + (i + 1) * mtk_nand->fdm.reg_size; + else if (i == mtk_nand->bad_mark.sec) + poi = chip->oob_poi; + else + poi = chip->oob_poi + i * mtk_nand->fdm.reg_size; + + return poi; +} + +static inline int mtk_data_len(struct nand_chip *chip) +{ + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + + return chip->ecc.size + mtk_nand->spare_per_sector; +} + +static inline u8 *mtk_data_ptr(struct nand_chip *chip, int i) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + + return nfc->buffer + i * mtk_data_len(chip); +} + +static inline u8 *mtk_oob_ptr(struct nand_chip *chip, int i) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + + return nfc->buffer + i * mtk_data_len(chip) + chip->ecc.size; +} + +static inline void nfi_writel(struct mtk_nfc *nfc, u32 val, u32 reg) +{ + writel(val, nfc->regs + reg); +} + +static inline void nfi_writew(struct mtk_nfc *nfc, u16 val, u32 reg) +{ + writew(val, nfc->regs + reg); +} + +static inline void nfi_writeb(struct mtk_nfc *nfc, u8 val, u32 reg) +{ + writeb(val, nfc->regs + reg); +} + +static inline u32 nfi_readl(struct mtk_nfc *nfc, u32 reg) +{ + return readl_relaxed(nfc->regs + reg); +} + +static inline u16 nfi_readw(struct mtk_nfc *nfc, u32 reg) +{ + return readw_relaxed(nfc->regs + reg); +} + +static inline u8 nfi_readb(struct mtk_nfc *nfc, u32 reg) +{ + return readb_relaxed(nfc->regs + reg); +} + +static void mtk_nfc_hw_reset(struct mtk_nfc *nfc) +{ + struct device *dev = nfc->dev; + u32 val; + int ret; + + /* reset all registers and force the NFI master to terminate */ + nfi_writel(nfc, CON_FIFO_FLUSH | CON_NFI_RST, NFI_CON); + + /* wait for the master to finish the last transaction */ + ret = readl_poll_timeout(nfc->regs + NFI_MASTER_STA, val, + !(val & MASTER_STA_MASK), 50, + MTK_RESET_TIMEOUT); + if (ret) + dev_warn(dev, "master active in reset [0x%x] = 0x%x\n", + NFI_MASTER_STA, val); + + /* ensure any status register affected by the NFI master is reset */ + nfi_writel(nfc, CON_FIFO_FLUSH | CON_NFI_RST, NFI_CON); + nfi_writew(nfc, STAR_DE, NFI_STRDATA); +} + +static int mtk_nfc_send_command(struct mtk_nfc *nfc, u8 command) +{ + struct device *dev = nfc->dev; + u32 val; + int ret; + + nfi_writel(nfc, command, NFI_CMD); + + ret = readl_poll_timeout_atomic(nfc->regs + NFI_STA, val, + !(val & STA_CMD), 10, MTK_TIMEOUT); + if (ret) { + dev_warn(dev, "nfi core timed out entering command mode\n"); + return -EIO; + } + + return 0; +} + +static int mtk_nfc_send_address(struct mtk_nfc *nfc, int addr) +{ + struct device *dev = nfc->dev; + u32 val; + int ret; + + nfi_writel(nfc, addr, NFI_COLADDR); + nfi_writel(nfc, 0, NFI_ROWADDR); + nfi_writew(nfc, 1, NFI_ADDRNOB); + + ret = readl_poll_timeout_atomic(nfc->regs + NFI_STA, val, + !(val & STA_ADDR), 10, MTK_TIMEOUT); + if (ret) { + dev_warn(dev, "nfi core timed out entering address mode\n"); + return -EIO; + } + + return 0; +} + +static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc *nfc = nand_get_controller_data(chip); + u32 fmt, spare; + + if (!mtd->writesize) + return 0; + + spare = mtk_nand->spare_per_sector; + + switch (mtd->writesize) { + case 512: + fmt = PAGEFMT_512_2K | PAGEFMT_SEC_SEL_512; + break; + case KB(2): + if (chip->ecc.size == 512) + fmt = PAGEFMT_2K_4K | PAGEFMT_SEC_SEL_512; + else + fmt = PAGEFMT_512_2K; + break; + case KB(4): + if (chip->ecc.size == 512) + fmt = PAGEFMT_4K_8K | PAGEFMT_SEC_SEL_512; + else + fmt = PAGEFMT_2K_4K; + break; + case KB(8): + if (chip->ecc.size == 512) + fmt = PAGEFMT_8K_16K | PAGEFMT_SEC_SEL_512; + else + fmt = PAGEFMT_4K_8K; + break; + case KB(16): + fmt = PAGEFMT_8K_16K; + break; + default: + dev_err(nfc->dev, "invalid page len: %d\n", mtd->writesize); + return -EINVAL; + } + + /* + * the hardware will double the value for this eccsize, so we need to + * halve it + */ + if (chip->ecc.size == 1024) + spare >>= 1; + + switch (spare) { + case 16: + fmt |= (PAGEFMT_SPARE_16 << PAGEFMT_SPARE_SHIFT); + break; + case 26: + fmt |= (PAGEFMT_SPARE_26 << PAGEFMT_SPARE_SHIFT); + break; + case 27: + fmt |= (PAGEFMT_SPARE_27 << PAGEFMT_SPARE_SHIFT); + break; + case 28: + fmt |= (PAGEFMT_SPARE_28 << PAGEFMT_SPARE_SHIFT); + break; + case 32: + fmt |= (PAGEFMT_SPARE_32 << PAGEFMT_SPARE_SHIFT); + break; + case 36: + fmt |= (PAGEFMT_SPARE_36 << PAGEFMT_SPARE_SHIFT); + break; + case 40: + fmt |= (PAGEFMT_SPARE_40 << PAGEFMT_SPARE_SHIFT); + break; + case 44: + fmt |= (PAGEFMT_SPARE_44 << PAGEFMT_SPARE_SHIFT); + break; + case 48: + fmt |= (PAGEFMT_SPARE_48 << PAGEFMT_SPARE_SHIFT); + break; + case 49: + fmt |= (PAGEFMT_SPARE_49 << PAGEFMT_SPARE_SHIFT); + break; + case 50: + fmt |= (PAGEFMT_SPARE_50 << PAGEFMT_SPARE_SHIFT); + break; + case 51: + fmt |= (PAGEFMT_SPARE_51 << PAGEFMT_SPARE_SHIFT); + break; + case 52: + fmt |= (PAGEFMT_SPARE_52 << PAGEFMT_SPARE_SHIFT); + break; + case 62: + fmt |= (PAGEFMT_SPARE_62 << PAGEFMT_SPARE_SHIFT); + break; + case 63: + fmt |= (PAGEFMT_SPARE_63 << PAGEFMT_SPARE_SHIFT); + break; + case 64: + fmt |= (PAGEFMT_SPARE_64 << PAGEFMT_SPARE_SHIFT); + break; + default: + dev_err(nfc->dev, "invalid spare per sector %d\n", spare); + return -EINVAL; + } + + fmt |= mtk_nand->fdm.reg_size << PAGEFMT_FDM_SHIFT; + fmt |= mtk_nand->fdm.ecc_size << PAGEFMT_FDM_ECC_SHIFT; + nfi_writew(nfc, fmt, NFI_PAGEFMT); + + nfc->ecc_cfg.strength = chip->ecc.strength; + nfc->ecc_cfg.len = chip->ecc.size + mtk_nand->fdm.ecc_size; + + return 0; +} + +static void mtk_nfc_select_chip(struct mtd_info *mtd, int chip) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + struct mtk_nfc *nfc = nand_get_controller_data(nand); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(nand); + + if (chip < 0) + return; + + mtk_nfc_hw_runtime_config(mtd); + + nfi_writel(nfc, mtk_nand->sels[chip], NFI_CSEL); +} + +static int mtk_nfc_dev_ready(struct mtd_info *mtd) +{ + struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd)); + + if (nfi_readl(nfc, NFI_STA) & STA_BUSY) + return 0; + + return 1; +} + +static void mtk_nfc_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl) +{ + struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd)); + + if (ctrl & NAND_ALE) { + mtk_nfc_send_address(nfc, dat); + } else if (ctrl & NAND_CLE) { + mtk_nfc_hw_reset(nfc); + + nfi_writew(nfc, CNFG_OP_CUST, NFI_CNFG); + mtk_nfc_send_command(nfc, dat); + } +} + +static inline void mtk_nfc_wait_ioready(struct mtk_nfc *nfc) +{ + int rc; + u8 val; + + rc = readb_poll_timeout_atomic(nfc->regs + NFI_PIO_DIRDY, val, + val & PIO_DI_RDY, 10, MTK_TIMEOUT); + if (rc < 0) + dev_err(nfc->dev, "data not ready\n"); +} + +static inline u8 mtk_nfc_read_byte(struct mtd_info *mtd) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc *nfc = nand_get_controller_data(chip); + u32 reg; + + /* after each byte read, the NFI_STA reg is reset by the hardware */ + reg = nfi_readl(nfc, NFI_STA) & NFI_FSM_MASK; + if (reg != NFI_FSM_CUSTDATA) { + reg = nfi_readw(nfc, NFI_CNFG); + reg |= CNFG_BYTE_RW | CNFG_READ_EN; + nfi_writew(nfc, reg, NFI_CNFG); + + /* + * set to max sector to allow the HW to continue reading over + * unaligned accesses + */ + reg = (MTK_MAX_SECTOR << CON_SEC_SHIFT) | CON_BRD; + nfi_writel(nfc, reg, NFI_CON); + + /* trigger to fetch data */ + nfi_writew(nfc, STAR_EN, NFI_STRDATA); + } + + mtk_nfc_wait_ioready(nfc); + + return nfi_readb(nfc, NFI_DATAR); +} + +static void mtk_nfc_read_buf(struct mtd_info *mtd, u8 *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + buf[i] = mtk_nfc_read_byte(mtd); +} + +static void mtk_nfc_write_byte(struct mtd_info *mtd, u8 byte) +{ + struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd)); + u32 reg; + + reg = nfi_readl(nfc, NFI_STA) & NFI_FSM_MASK; + + if (reg != NFI_FSM_CUSTDATA) { + reg = nfi_readw(nfc, NFI_CNFG) | CNFG_BYTE_RW; + nfi_writew(nfc, reg, NFI_CNFG); + + reg = MTK_MAX_SECTOR << CON_SEC_SHIFT | CON_BWR; + nfi_writel(nfc, reg, NFI_CON); + + nfi_writew(nfc, STAR_EN, NFI_STRDATA); + } + + mtk_nfc_wait_ioready(nfc); + nfi_writeb(nfc, byte, NFI_DATAW); +} + +static void mtk_nfc_write_buf(struct mtd_info *mtd, const u8 *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + mtk_nfc_write_byte(mtd, buf[i]); +} + +static int mtk_nfc_sector_encode(struct nand_chip *chip, u8 *data) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + int size = chip->ecc.size + mtk_nand->fdm.reg_size; + + nfc->ecc_cfg.mode = ECC_DMA_MODE; + nfc->ecc_cfg.op = ECC_ENCODE; + + return mtk_ecc_encode(nfc->ecc, &nfc->ecc_cfg, data, size); +} + +static void mtk_nfc_no_bad_mark_swap(struct mtd_info *a, u8 *b, int c) +{ + /* nop */ +} + +static void mtk_nfc_bad_mark_swap(struct mtd_info *mtd, u8 *buf, int raw) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *nand = to_mtk_nand(chip); + u32 bad_pos = nand->bad_mark.pos; + + if (raw) + bad_pos += nand->bad_mark.sec * mtk_data_len(chip); + else + bad_pos += nand->bad_mark.sec * chip->ecc.size; + + swap(chip->oob_poi[0], buf[bad_pos]); +} + +static int mtk_nfc_format_subpage(struct mtd_info *mtd, u32 offset, + u32 len, const u8 *buf) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_fdm *fdm = &mtk_nand->fdm; + u32 start, end; + int i, ret; + + start = offset / chip->ecc.size; + end = DIV_ROUND_UP(offset + len, chip->ecc.size); + + memset(nfc->buffer, 0xff, mtd->writesize + mtd->oobsize); + for (i = 0; i < chip->ecc.steps; i++) { + memcpy(mtk_data_ptr(chip, i), data_ptr(chip, buf, i), + chip->ecc.size); + + if (start > i || i >= end) + continue; + + if (i == mtk_nand->bad_mark.sec) + mtk_nand->bad_mark.bm_swap(mtd, nfc->buffer, 1); + + memcpy(mtk_oob_ptr(chip, i), oob_ptr(chip, i), fdm->reg_size); + + /* program the CRC back to the OOB */ + ret = mtk_nfc_sector_encode(chip, mtk_data_ptr(chip, i)); + if (ret < 0) + return ret; + } + + return 0; +} + +static void mtk_nfc_format_page(struct mtd_info *mtd, const u8 *buf) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_fdm *fdm = &mtk_nand->fdm; + u32 i; + + memset(nfc->buffer, 0xff, mtd->writesize + mtd->oobsize); + for (i = 0; i < chip->ecc.steps; i++) { + if (buf) + memcpy(mtk_data_ptr(chip, i), data_ptr(chip, buf, i), + chip->ecc.size); + + if (i == mtk_nand->bad_mark.sec) + mtk_nand->bad_mark.bm_swap(mtd, nfc->buffer, 1); + + memcpy(mtk_oob_ptr(chip, i), oob_ptr(chip, i), fdm->reg_size); + } +} + +static inline void mtk_nfc_read_fdm(struct nand_chip *chip, u32 start, + u32 sectors) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc_fdm *fdm = &mtk_nand->fdm; + u32 vall, valm; + u8 *oobptr; + int i, j; + + for (i = 0; i < sectors; i++) { + oobptr = oob_ptr(chip, start + i); + vall = nfi_readl(nfc, NFI_FDML(i)); + valm = nfi_readl(nfc, NFI_FDMM(i)); + + for (j = 0; j < fdm->reg_size; j++) + oobptr[j] = (j >= 4 ? valm : vall) >> ((j % 4) * 8); + } +} + +static inline void mtk_nfc_write_fdm(struct nand_chip *chip) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc_fdm *fdm = &mtk_nand->fdm; + u32 vall, valm; + u8 *oobptr; + int i, j; + + for (i = 0; i < chip->ecc.steps; i++) { + oobptr = oob_ptr(chip, i); + vall = 0; + valm = 0; + for (j = 0; j < 8; j++) { + if (j < 4) + vall |= (j < fdm->reg_size ? oobptr[j] : 0xff) + << (j * 8); + else + valm |= (j < fdm->reg_size ? oobptr[j] : 0xff) + << ((j - 4) * 8); + } + nfi_writel(nfc, vall, NFI_FDML(i)); + nfi_writel(nfc, valm, NFI_FDMM(i)); + } +} + +static int mtk_nfc_do_write_page(struct mtd_info *mtd, struct nand_chip *chip, + const u8 *buf, int page, int len) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct device *dev = nfc->dev; + dma_addr_t addr; + u32 reg; + int ret; + + addr = dma_map_single(dev, (void *)buf, len, DMA_TO_DEVICE); + ret = dma_mapping_error(nfc->dev, addr); + if (ret) { + dev_err(nfc->dev, "dma mapping error\n"); + return -EINVAL; + } + + reg = nfi_readw(nfc, NFI_CNFG) | CNFG_AHB | CNFG_DMA_BURST_EN; + nfi_writew(nfc, reg, NFI_CNFG); + + nfi_writel(nfc, chip->ecc.steps << CON_SEC_SHIFT, NFI_CON); + nfi_writel(nfc, lower_32_bits(addr), NFI_STRADDR); + nfi_writew(nfc, INTR_AHB_DONE_EN, NFI_INTR_EN); + + init_completion(&nfc->done); + + reg = nfi_readl(nfc, NFI_CON) | CON_BWR; + nfi_writel(nfc, reg, NFI_CON); + nfi_writew(nfc, STAR_EN, NFI_STRDATA); + + ret = wait_for_completion_timeout(&nfc->done, msecs_to_jiffies(500)); + if (!ret) { + dev_err(dev, "program ahb done timeout\n"); + nfi_writew(nfc, 0, NFI_INTR_EN); + ret = -ETIMEDOUT; + goto timeout; + } + + ret = readl_poll_timeout_atomic(nfc->regs + NFI_ADDRCNTR, reg, + (reg & CNTR_MASK) >= chip->ecc.steps, + 10, MTK_TIMEOUT); + if (ret) + dev_err(dev, "hwecc write timeout\n"); + +timeout: + + dma_unmap_single(nfc->dev, addr, len, DMA_TO_DEVICE); + nfi_writel(nfc, 0, NFI_CON); + + return ret; +} + +static int mtk_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip, + const u8 *buf, int page, int raw) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + size_t len; + const u8 *bufpoi; + u32 reg; + int ret; + + if (!raw) { + /* OOB => FDM: from register, ECC: from HW */ + reg = nfi_readw(nfc, NFI_CNFG) | CNFG_AUTO_FMT_EN; + nfi_writew(nfc, reg | CNFG_HW_ECC_EN, NFI_CNFG); + + nfc->ecc_cfg.op = ECC_ENCODE; + nfc->ecc_cfg.mode = ECC_NFI_MODE; + ret = mtk_ecc_enable(nfc->ecc, &nfc->ecc_cfg); + if (ret) { + /* clear NFI config */ + reg = nfi_readw(nfc, NFI_CNFG); + reg &= ~(CNFG_AUTO_FMT_EN | CNFG_HW_ECC_EN); + nfi_writew(nfc, reg, NFI_CNFG); + + return ret; + } + + memcpy(nfc->buffer, buf, mtd->writesize); + mtk_nand->bad_mark.bm_swap(mtd, nfc->buffer, raw); + bufpoi = nfc->buffer; + + /* write OOB into the FDM registers (OOB area in MTK NAND) */ + mtk_nfc_write_fdm(chip); + } else { + bufpoi = buf; + } + + len = mtd->writesize + (raw ? mtd->oobsize : 0); + ret = mtk_nfc_do_write_page(mtd, chip, bufpoi, page, len); + + if (!raw) + mtk_ecc_disable(nfc->ecc); + + return ret; +} + +static int mtk_nfc_write_page_hwecc(struct mtd_info *mtd, + struct nand_chip *chip, const u8 *buf, + int oob_on, int page) +{ + return mtk_nfc_write_page(mtd, chip, buf, page, 0); +} + +static int mtk_nfc_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + const u8 *buf, int oob_on, int pg) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + + mtk_nfc_format_page(mtd, buf); + return mtk_nfc_write_page(mtd, chip, nfc->buffer, pg, 1); +} + +static int mtk_nfc_write_subpage_hwecc(struct mtd_info *mtd, + struct nand_chip *chip, u32 offset, + u32 data_len, const u8 *buf, + int oob_on, int page) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + int ret; + + ret = mtk_nfc_format_subpage(mtd, offset, data_len, buf); + if (ret < 0) + return ret; + + /* use the data in the private buffer (now with FDM and CRC) */ + return mtk_nfc_write_page(mtd, chip, nfc->buffer, page, 1); +} + +static int mtk_nfc_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, + int page) +{ + int ret; + + chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); + + ret = mtk_nfc_write_page_raw(mtd, chip, NULL, 1, page); + if (ret < 0) + return -EIO; + + chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + ret = chip->waitfunc(mtd, chip); + + return ret & NAND_STATUS_FAIL ? -EIO : 0; +} + +static int mtk_nfc_update_ecc_stats(struct mtd_info *mtd, u8 *buf, u32 sectors) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_ecc_stats stats; + int rc, i; + + rc = nfi_readl(nfc, NFI_STA) & STA_EMP_PAGE; + if (rc) { + memset(buf, 0xff, sectors * chip->ecc.size); + for (i = 0; i < sectors; i++) + memset(oob_ptr(chip, i), 0xff, mtk_nand->fdm.reg_size); + return 0; + } + + mtk_ecc_get_stats(nfc->ecc, &stats, sectors); + mtd->ecc_stats.corrected += stats.corrected; + mtd->ecc_stats.failed += stats.failed; + + return stats.bitflips; +} + +static int mtk_nfc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, + u32 data_offs, u32 readlen, + u8 *bufpoi, int page, int raw) +{ + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + u32 spare = mtk_nand->spare_per_sector; + u32 column, sectors, start, end, reg; + dma_addr_t addr; + int bitflips; + size_t len; + u8 *buf; + int rc; + + start = data_offs / chip->ecc.size; + end = DIV_ROUND_UP(data_offs + readlen, chip->ecc.size); + + sectors = end - start; + column = start * (chip->ecc.size + spare); + + len = sectors * chip->ecc.size + (raw ? sectors * spare : 0); + buf = bufpoi + start * chip->ecc.size; + + if (column != 0) + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, column, -1); + + addr = dma_map_single(nfc->dev, buf, len, DMA_FROM_DEVICE); + rc = dma_mapping_error(nfc->dev, addr); + if (rc) { + dev_err(nfc->dev, "dma mapping error\n"); + + return -EINVAL; + } + + reg = nfi_readw(nfc, NFI_CNFG); + reg |= CNFG_READ_EN | CNFG_DMA_BURST_EN | CNFG_AHB; + if (!raw) { + reg |= CNFG_AUTO_FMT_EN | CNFG_HW_ECC_EN; + nfi_writew(nfc, reg, NFI_CNFG); + + nfc->ecc_cfg.mode = ECC_NFI_MODE; + nfc->ecc_cfg.sectors = sectors; + nfc->ecc_cfg.op = ECC_DECODE; + rc = mtk_ecc_enable(nfc->ecc, &nfc->ecc_cfg); + if (rc) { + dev_err(nfc->dev, "ecc enable\n"); + /* clear NFI_CNFG */ + reg &= ~(CNFG_DMA_BURST_EN | CNFG_AHB | CNFG_READ_EN | + CNFG_AUTO_FMT_EN | CNFG_HW_ECC_EN); + nfi_writew(nfc, reg, NFI_CNFG); + dma_unmap_single(nfc->dev, addr, len, DMA_FROM_DEVICE); + + return rc; + } + } else { + nfi_writew(nfc, reg, NFI_CNFG); + } + + nfi_writel(nfc, sectors << CON_SEC_SHIFT, NFI_CON); + nfi_writew(nfc, INTR_AHB_DONE_EN, NFI_INTR_EN); + nfi_writel(nfc, lower_32_bits(addr), NFI_STRADDR); + + init_completion(&nfc->done); + reg = nfi_readl(nfc, NFI_CON) | CON_BRD; + nfi_writel(nfc, reg, NFI_CON); + nfi_writew(nfc, STAR_EN, NFI_STRDATA); + + rc = wait_for_completion_timeout(&nfc->done, msecs_to_jiffies(500)); + if (!rc) + dev_warn(nfc->dev, "read ahb/dma done timeout\n"); + + rc = readl_poll_timeout_atomic(nfc->regs + NFI_BYTELEN, reg, + (reg & CNTR_MASK) >= sectors, 10, + MTK_TIMEOUT); + if (rc < 0) { + dev_err(nfc->dev, "subpage done timeout\n"); + bitflips = -EIO; + } else { + bitflips = 0; + if (!raw) { + rc = mtk_ecc_wait_done(nfc->ecc, ECC_DECODE); + bitflips = rc < 0 ? -ETIMEDOUT : + mtk_nfc_update_ecc_stats(mtd, buf, sectors); + mtk_nfc_read_fdm(chip, start, sectors); + } + } + + dma_unmap_single(nfc->dev, addr, len, DMA_FROM_DEVICE); + + if (raw) + goto done; + + mtk_ecc_disable(nfc->ecc); + + if (clamp(mtk_nand->bad_mark.sec, start, end) == mtk_nand->bad_mark.sec) + mtk_nand->bad_mark.bm_swap(mtd, bufpoi, raw); +done: + nfi_writel(nfc, 0, NFI_CON); + + return bitflips; +} + +static int mtk_nfc_read_subpage_hwecc(struct mtd_info *mtd, + struct nand_chip *chip, u32 off, + u32 len, u8 *p, int pg) +{ + return mtk_nfc_read_subpage(mtd, chip, off, len, p, pg, 0); +} + +static int mtk_nfc_read_page_hwecc(struct mtd_info *mtd, + struct nand_chip *chip, u8 *p, + int oob_on, int pg) +{ + return mtk_nfc_read_subpage(mtd, chip, 0, mtd->writesize, p, pg, 0); +} + +static int mtk_nfc_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + u8 *buf, int oob_on, int page) +{ + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc *nfc = nand_get_controller_data(chip); + struct mtk_nfc_fdm *fdm = &mtk_nand->fdm; + int i, ret; + + memset(nfc->buffer, 0xff, mtd->writesize + mtd->oobsize); + ret = mtk_nfc_read_subpage(mtd, chip, 0, mtd->writesize, nfc->buffer, + page, 1); + if (ret < 0) + return ret; + + for (i = 0; i < chip->ecc.steps; i++) { + memcpy(oob_ptr(chip, i), mtk_oob_ptr(chip, i), fdm->reg_size); + + if (i == mtk_nand->bad_mark.sec) + mtk_nand->bad_mark.bm_swap(mtd, nfc->buffer, 1); + + if (buf) + memcpy(data_ptr(chip, buf, i), mtk_data_ptr(chip, i), + chip->ecc.size); + } + + return ret; +} + +static int mtk_nfc_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, + int page) +{ + chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + + return mtk_nfc_read_page_raw(mtd, chip, NULL, 1, page); +} + +static inline void mtk_nfc_hw_init(struct mtk_nfc *nfc) +{ + /* + * ACCON: access timing control register + * ------------------------------------- + * 31:28: minimum required time for CS post pulling down after accessing + * the device + * 27:22: minimum required time for CS pre pulling down before accessing + * the device + * 21:16: minimum required time from NCEB low to NREB low + * 15:12: minimum required time from NWEB high to NREB low. + * 11:08: write enable hold time + * 07:04: write wait states + * 03:00: read wait states + */ + nfi_writel(nfc, 0x10804211, NFI_ACCCON); + + /* + * CNRNB: nand ready/busy register + * ------------------------------- + * 7:4: timeout register for polling the NAND busy/ready signal + * 0 : poll the status of the busy/ready signal after [7:4]*16 cycles. + */ + nfi_writew(nfc, 0xf1, NFI_CNRNB); + nfi_writew(nfc, PAGEFMT_8K_16K, NFI_PAGEFMT); + + mtk_nfc_hw_reset(nfc); + + nfi_readl(nfc, NFI_INTR_STA); + nfi_writel(nfc, 0, NFI_INTR_EN); +} + +static irqreturn_t mtk_nfc_irq(int irq, void *id) +{ + struct mtk_nfc *nfc = id; + u16 sta, ien; + + sta = nfi_readw(nfc, NFI_INTR_STA); + ien = nfi_readw(nfc, NFI_INTR_EN); + + if (!(sta & ien)) + return IRQ_NONE; + + nfi_writew(nfc, ~sta & ien, NFI_INTR_EN); + complete(&nfc->done); + + return IRQ_HANDLED; +} + +static int mtk_nfc_enable_clk(struct device *dev, struct mtk_nfc_clk *clk) +{ + int ret; + + ret = clk_prepare_enable(clk->nfi_clk); + if (ret) { + dev_err(dev, "failed to enable nfi clk\n"); + return ret; + } + + ret = clk_prepare_enable(clk->pad_clk); + if (ret) { + dev_err(dev, "failed to enable pad clk\n"); + clk_disable_unprepare(clk->nfi_clk); + return ret; + } + + return 0; +} + +static void mtk_nfc_disable_clk(struct mtk_nfc_clk *clk) +{ + clk_disable_unprepare(clk->nfi_clk); + clk_disable_unprepare(clk->pad_clk); +} + +static int mtk_nfc_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oob_region) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + struct mtk_nfc_fdm *fdm = &mtk_nand->fdm; + u32 eccsteps; + + eccsteps = mtd->writesize / chip->ecc.size; + + if (section >= eccsteps) + return -ERANGE; + + oob_region->length = fdm->reg_size - fdm->ecc_size; + oob_region->offset = section * fdm->reg_size + fdm->ecc_size; + + return 0; +} + +static int mtk_nfc_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oob_region) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip); + u32 eccsteps; + + if (section) + return -ERANGE; + + eccsteps = mtd->writesize / chip->ecc.size; + oob_region->offset = mtk_nand->fdm.reg_size * eccsteps; + oob_region->length = mtd->oobsize - oob_region->offset; + + return 0; +} + +static const struct mtd_ooblayout_ops mtk_nfc_ooblayout_ops = { + .free = mtk_nfc_ooblayout_free, + .ecc = mtk_nfc_ooblayout_ecc, +}; + +static void mtk_nfc_set_fdm(struct mtk_nfc_fdm *fdm, struct mtd_info *mtd) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + struct mtk_nfc_nand_chip *chip = to_mtk_nand(nand); + u32 ecc_bytes; + + ecc_bytes = DIV_ROUND_UP(nand->ecc.strength * ECC_PARITY_BITS, 8); + + fdm->reg_size = chip->spare_per_sector - ecc_bytes; + if (fdm->reg_size > NFI_FDM_MAX_SIZE) + fdm->reg_size = NFI_FDM_MAX_SIZE; + + /* bad block mark storage */ + fdm->ecc_size = 1; +} + +static void mtk_nfc_set_bad_mark_ctl(struct mtk_nfc_bad_mark_ctl *bm_ctl, + struct mtd_info *mtd) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + + if (mtd->writesize == 512) { + bm_ctl->bm_swap = mtk_nfc_no_bad_mark_swap; + } else { + bm_ctl->bm_swap = mtk_nfc_bad_mark_swap; + bm_ctl->sec = mtd->writesize / mtk_data_len(nand); + bm_ctl->pos = mtd->writesize % mtk_data_len(nand); + } +} + +static void mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + u32 spare[] = {16, 26, 27, 28, 32, 36, 40, 44, + 48, 49, 50, 51, 52, 62, 63, 64}; + u32 eccsteps, i; + + eccsteps = mtd->writesize / nand->ecc.size; + *sps = mtd->oobsize / eccsteps; + + if (nand->ecc.size == 1024) + *sps >>= 1; + + for (i = 0; i < ARRAY_SIZE(spare); i++) { + if (*sps <= spare[i]) { + if (!i) + *sps = spare[i]; + else if (*sps != spare[i]) + *sps = spare[i - 1]; + break; + } + } + + if (i >= ARRAY_SIZE(spare)) + *sps = spare[ARRAY_SIZE(spare) - 1]; + + if (nand->ecc.size == 1024) + *sps <<= 1; +} + +static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + u32 spare; + int free; + + /* support only ecc hw mode */ + if (nand->ecc.mode != NAND_ECC_HW) { + dev_err(dev, "ecc.mode not supported\n"); + return -EINVAL; + } + + /* if optional dt settings not present */ + if (!nand->ecc.size || !nand->ecc.strength) { + /* use datasheet requirements */ + nand->ecc.strength = nand->ecc_strength_ds; + nand->ecc.size = nand->ecc_step_ds; + + /* + * align eccstrength and eccsize + * this controller only supports 512 and 1024 sizes + */ + if (nand->ecc.size < 1024) { + if (mtd->writesize > 512) { + nand->ecc.size = 1024; + nand->ecc.strength <<= 1; + } else { + nand->ecc.size = 512; + } + } else { + nand->ecc.size = 1024; + } + + mtk_nfc_set_spare_per_sector(&spare, mtd); + + /* calculate oob bytes except ecc parity data */ + free = ((nand->ecc.strength * ECC_PARITY_BITS) + 7) >> 3; + free = spare - free; + + /* + * enhance ecc strength if oob left is bigger than max FDM size + * or reduce ecc strength if oob size is not enough for ecc + * parity data. + */ + if (free > NFI_FDM_MAX_SIZE) { + spare -= NFI_FDM_MAX_SIZE; + nand->ecc.strength = (spare << 3) / ECC_PARITY_BITS; + } else if (free < 0) { + spare -= NFI_FDM_MIN_SIZE; + nand->ecc.strength = (spare << 3) / ECC_PARITY_BITS; + } + } + + mtk_ecc_adjust_strength(&nand->ecc.strength); + + dev_info(dev, "eccsize %d eccstrength %d\n", + nand->ecc.size, nand->ecc.strength); + + return 0; +} + +static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, + struct device_node *np) +{ + struct mtk_nfc_nand_chip *chip; + struct nand_chip *nand; + struct mtd_info *mtd; + int nsels, len; + u32 tmp; + int ret; + int i; + + if (!of_get_property(np, "reg", &nsels)) + return -ENODEV; + + nsels /= sizeof(u32); + if (!nsels || nsels > MTK_NAND_MAX_NSELS) { + dev_err(dev, "invalid reg property size %d\n", nsels); + return -EINVAL; + } + + chip = devm_kzalloc(dev, sizeof(*chip) + nsels * sizeof(u8), + GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->nsels = nsels; + for (i = 0; i < nsels; i++) { + ret = of_property_read_u32_index(np, "reg", i, &tmp); + if (ret) { + dev_err(dev, "reg property failure : %d\n", ret); + return ret; + } + chip->sels[i] = tmp; + } + + nand = &chip->nand; + nand->controller = &nfc->controller; + + nand_set_flash_node(nand, np); + nand_set_controller_data(nand, nfc); + + nand->options |= NAND_USE_BOUNCE_BUFFER | NAND_SUBPAGE_READ; + nand->dev_ready = mtk_nfc_dev_ready; + nand->select_chip = mtk_nfc_select_chip; + nand->write_byte = mtk_nfc_write_byte; + nand->write_buf = mtk_nfc_write_buf; + nand->read_byte = mtk_nfc_read_byte; + nand->read_buf = mtk_nfc_read_buf; + nand->cmd_ctrl = mtk_nfc_cmd_ctrl; + + /* set default mode in case dt entry is missing */ + nand->ecc.mode = NAND_ECC_HW; + + nand->ecc.write_subpage = mtk_nfc_write_subpage_hwecc; + nand->ecc.write_page_raw = mtk_nfc_write_page_raw; + nand->ecc.write_page = mtk_nfc_write_page_hwecc; + nand->ecc.write_oob_raw = mtk_nfc_write_oob_std; + nand->ecc.write_oob = mtk_nfc_write_oob_std; + + nand->ecc.read_subpage = mtk_nfc_read_subpage_hwecc; + nand->ecc.read_page_raw = mtk_nfc_read_page_raw; + nand->ecc.read_page = mtk_nfc_read_page_hwecc; + nand->ecc.read_oob_raw = mtk_nfc_read_oob_std; + nand->ecc.read_oob = mtk_nfc_read_oob_std; + + mtd = nand_to_mtd(nand); + mtd->owner = THIS_MODULE; + mtd->dev.parent = dev; + mtd->name = MTK_NAME; + mtd_set_ooblayout(mtd, &mtk_nfc_ooblayout_ops); + + mtk_nfc_hw_init(nfc); + + ret = nand_scan_ident(mtd, nsels, NULL); + if (ret) + return -ENODEV; + + /* store bbt magic in page, cause OOB is not protected */ + if (nand->bbt_options & NAND_BBT_USE_FLASH) + nand->bbt_options |= NAND_BBT_NO_OOB; + + ret = mtk_nfc_ecc_init(dev, mtd); + if (ret) + return -EINVAL; + + if (nand->options & NAND_BUSWIDTH_16) { + dev_err(dev, "16bits buswidth not supported"); + return -EINVAL; + } + + mtk_nfc_set_spare_per_sector(&chip->spare_per_sector, mtd); + mtk_nfc_set_fdm(&chip->fdm, mtd); + mtk_nfc_set_bad_mark_ctl(&chip->bad_mark, mtd); + + len = mtd->writesize + mtd->oobsize; + nfc->buffer = devm_kzalloc(dev, len, GFP_KERNEL); + if (!nfc->buffer) + return -ENOMEM; + + ret = nand_scan_tail(mtd); + if (ret) + return -ENODEV; + + ret = mtd_device_parse_register(mtd, NULL, NULL, NULL, 0); + if (ret) { + dev_err(dev, "mtd parse partition error\n"); + nand_release(mtd); + return ret; + } + + list_add_tail(&chip->node, &nfc->chips); + + return 0; +} + +static int mtk_nfc_nand_chips_init(struct device *dev, struct mtk_nfc *nfc) +{ + struct device_node *np = dev->of_node; + struct device_node *nand_np; + int ret; + + for_each_child_of_node(np, nand_np) { + ret = mtk_nfc_nand_chip_init(dev, nfc, nand_np); + if (ret) { + of_node_put(nand_np); + return ret; + } + } + + return 0; +} + +static int mtk_nfc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct mtk_nfc *nfc; + struct resource *res; + int ret, irq; + + nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL); + if (!nfc) + return -ENOMEM; + + spin_lock_init(&nfc->controller.lock); + init_waitqueue_head(&nfc->controller.wq); + INIT_LIST_HEAD(&nfc->chips); + + /* probe defer if not ready */ + nfc->ecc = of_mtk_ecc_get(np); + if (IS_ERR(nfc->ecc)) + return PTR_ERR(nfc->ecc); + else if (!nfc->ecc) + return -ENODEV; + + nfc->dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + nfc->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(nfc->regs)) { + ret = PTR_ERR(nfc->regs); + dev_err(dev, "no nfi base\n"); + goto release_ecc; + } + + nfc->clk.nfi_clk = devm_clk_get(dev, "nfi_clk"); + if (IS_ERR(nfc->clk.nfi_clk)) { + dev_err(dev, "no clk\n"); + ret = PTR_ERR(nfc->clk.nfi_clk); + goto release_ecc; + } + + nfc->clk.pad_clk = devm_clk_get(dev, "pad_clk"); + if (IS_ERR(nfc->clk.pad_clk)) { + dev_err(dev, "no pad clk\n"); + ret = PTR_ERR(nfc->clk.pad_clk); + goto release_ecc; + } + + ret = mtk_nfc_enable_clk(dev, &nfc->clk); + if (ret) + goto release_ecc; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "no nfi irq resource\n"); + ret = -EINVAL; + goto clk_disable; + } + + ret = devm_request_irq(dev, irq, mtk_nfc_irq, 0x0, "mtk-nand", nfc); + if (ret) { + dev_err(dev, "failed to request nfi irq\n"); + goto clk_disable; + } + + ret = dma_set_mask(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "failed to set dma mask\n"); + goto clk_disable; + } + + platform_set_drvdata(pdev, nfc); + + ret = mtk_nfc_nand_chips_init(dev, nfc); + if (ret) { + dev_err(dev, "failed to init nand chips\n"); + goto clk_disable; + } + + return 0; + +clk_disable: + mtk_nfc_disable_clk(&nfc->clk); + +release_ecc: + mtk_ecc_release(nfc->ecc); + + return ret; +} + +static int mtk_nfc_remove(struct platform_device *pdev) +{ + struct mtk_nfc *nfc = platform_get_drvdata(pdev); + struct mtk_nfc_nand_chip *chip; + + while (!list_empty(&nfc->chips)) { + chip = list_first_entry(&nfc->chips, struct mtk_nfc_nand_chip, + node); + nand_release(nand_to_mtd(&chip->nand)); + list_del(&chip->node); + } + + mtk_ecc_release(nfc->ecc); + mtk_nfc_disable_clk(&nfc->clk); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int mtk_nfc_suspend(struct device *dev) +{ + struct mtk_nfc *nfc = dev_get_drvdata(dev); + + mtk_nfc_disable_clk(&nfc->clk); + + return 0; +} + +static int mtk_nfc_resume(struct device *dev) +{ + struct mtk_nfc *nfc = dev_get_drvdata(dev); + struct mtk_nfc_nand_chip *chip; + struct nand_chip *nand; + struct mtd_info *mtd; + int ret; + u32 i; + + udelay(200); + + ret = mtk_nfc_enable_clk(dev, &nfc->clk); + if (ret) + return ret; + + mtk_nfc_hw_init(nfc); + + /* reset NAND chip if VCC was powered off */ + list_for_each_entry(chip, &nfc->chips, node) { + nand = &chip->nand; + mtd = nand_to_mtd(nand); + for (i = 0; i < chip->nsels; i++) { + nand->select_chip(mtd, i); + nand->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + } + } + + return 0; +} + +static SIMPLE_DEV_PM_OPS(mtk_nfc_pm_ops, mtk_nfc_suspend, mtk_nfc_resume); +#endif + +static const struct of_device_id mtk_nfc_id_table[] = { + { .compatible = "mediatek,mt2701-nfc" }, + {} +}; +MODULE_DEVICE_TABLE(of, mtk_nfc_id_table); + +static struct platform_driver mtk_nfc_driver = { + .probe = mtk_nfc_probe, + .remove = mtk_nfc_remove, + .driver = { + .name = MTK_NAME, + .of_match_table = mtk_nfc_id_table, +#ifdef CONFIG_PM_SLEEP + .pm = &mtk_nfc_pm_ops, +#endif + }, +}; + +module_platform_driver(mtk_nfc_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Xiaolei Li <xiaolei.li@mediatek.com>"); +MODULE_DESCRIPTION("MTK Nand Flash Controller Driver"); diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 0b0dc29d2af7..77533f7f2429 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2610,7 +2610,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, int cached = writelen > bytes && page != blockmask; uint8_t *wbuf = buf; int use_bufpoi; - int part_pagewr = (column || writelen < (mtd->writesize - 1)); + int part_pagewr = (column || writelen < mtd->writesize); if (part_pagewr) use_bufpoi = 1; diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index ccc05f5b2695..2af9869a115e 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -168,6 +168,7 @@ struct nand_flash_dev nand_flash_ids[] = { /* Manufacturer IDs */ struct nand_manufacturers nand_manuf_ids[] = { {NAND_MFR_TOSHIBA, "Toshiba"}, + {NAND_MFR_ESMT, "ESMT"}, {NAND_MFR_SAMSUNG, "Samsung"}, {NAND_MFR_FUJITSU, "Fujitsu"}, {NAND_MFR_NATIONAL, "National"}, diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index a136da8df6fe..a59361c36f40 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -118,8 +118,6 @@ #define PREFETCH_STATUS_FIFO_CNT(val) ((val >> 24) & 0x7F) #define STATUS_BUFF_EMPTY 0x00000001 -#define OMAP24XX_DMA_GPMC 4 - #define SECTOR_BYTES 512 /* 4 bit padding to make byte aligned, 56 = 52 + 4 */ #define BCH4_BIT_PAD 4 @@ -1811,7 +1809,6 @@ static int omap_nand_probe(struct platform_device *pdev) struct nand_chip *nand_chip; int err; dma_cap_mask_t mask; - unsigned sig; struct resource *res; struct device *dev = &pdev->dev; int min_oobbytes = BADBLOCK_MARKER_LENGTH; @@ -1924,11 +1921,11 @@ static int omap_nand_probe(struct platform_device *pdev) case NAND_OMAP_PREFETCH_DMA: dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - sig = OMAP24XX_DMA_GPMC; - info->dma = dma_request_channel(mask, omap_dma_filter_fn, &sig); - if (!info->dma) { + info->dma = dma_request_chan(pdev->dev.parent, "rxtx"); + + if (IS_ERR(info->dma)) { dev_err(&pdev->dev, "DMA engine request failed\n"); - err = -ENXIO; + err = PTR_ERR(info->dma); goto return_error; } else { struct dma_slave_config cfg; diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index a83a690688b4..e414b31b71c1 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -39,6 +39,7 @@ #include <linux/gpio.h> #include <linux/interrupt.h> #include <linux/iopoll.h> +#include <linux/reset.h> #define NFC_REG_CTL 0x0000 #define NFC_REG_ST 0x0004 @@ -153,6 +154,7 @@ /* define bit use in NFC_ECC_ST */ #define NFC_ECC_ERR(x) BIT(x) +#define NFC_ECC_ERR_MSK GENMASK(15, 0) #define NFC_ECC_PAT_FOUND(x) BIT(x + 16) #define NFC_ECC_ERR_CNT(b, x) (((x) >> (((b) % 4) * 8)) & 0xff) @@ -269,10 +271,12 @@ struct sunxi_nfc { void __iomem *regs; struct clk *ahb_clk; struct clk *mod_clk; + struct reset_control *reset; unsigned long assigned_cs; unsigned long clk_rate; struct list_head chips; struct completion complete; + struct dma_chan *dmac; }; static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl) @@ -365,6 +369,67 @@ static int sunxi_nfc_rst(struct sunxi_nfc *nfc) return ret; } +static int sunxi_nfc_dma_op_prepare(struct mtd_info *mtd, const void *buf, + int chunksize, int nchunks, + enum dma_data_direction ddir, + struct scatterlist *sg) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller); + struct dma_async_tx_descriptor *dmad; + enum dma_transfer_direction tdir; + dma_cookie_t dmat; + int ret; + + if (ddir == DMA_FROM_DEVICE) + tdir = DMA_DEV_TO_MEM; + else + tdir = DMA_MEM_TO_DEV; + + sg_init_one(sg, buf, nchunks * chunksize); + ret = dma_map_sg(nfc->dev, sg, 1, ddir); + if (!ret) + return -ENOMEM; + + dmad = dmaengine_prep_slave_sg(nfc->dmac, sg, 1, tdir, DMA_CTRL_ACK); + if (!dmad) { + ret = -EINVAL; + goto err_unmap_buf; + } + + writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD, + nfc->regs + NFC_REG_CTL); + writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM); + writel(chunksize, nfc->regs + NFC_REG_CNT); + dmat = dmaengine_submit(dmad); + + ret = dma_submit_error(dmat); + if (ret) + goto err_clr_dma_flag; + + return 0; + +err_clr_dma_flag: + writel(readl(nfc->regs + NFC_REG_CTL) & ~NFC_RAM_METHOD, + nfc->regs + NFC_REG_CTL); + +err_unmap_buf: + dma_unmap_sg(nfc->dev, sg, 1, ddir); + return ret; +} + +static void sunxi_nfc_dma_op_cleanup(struct mtd_info *mtd, + enum dma_data_direction ddir, + struct scatterlist *sg) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller); + + dma_unmap_sg(nfc->dev, sg, 1, ddir); + writel(readl(nfc->regs + NFC_REG_CTL) & ~NFC_RAM_METHOD, + nfc->regs + NFC_REG_CTL); +} + static int sunxi_nfc_dev_ready(struct mtd_info *mtd) { struct nand_chip *nand = mtd_to_nand(mtd); @@ -822,17 +887,15 @@ static void sunxi_nfc_hw_ecc_update_stats(struct mtd_info *mtd, } static int sunxi_nfc_hw_ecc_correct(struct mtd_info *mtd, u8 *data, u8 *oob, - int step, bool *erased) + int step, u32 status, bool *erased) { struct nand_chip *nand = mtd_to_nand(mtd); struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller); struct nand_ecc_ctrl *ecc = &nand->ecc; - u32 status, tmp; + u32 tmp; *erased = false; - status = readl(nfc->regs + NFC_REG_ECC_ST); - if (status & NFC_ECC_ERR(step)) return -EBADMSG; @@ -898,6 +961,7 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd, *cur_off = oob_off + ecc->bytes + 4; ret = sunxi_nfc_hw_ecc_correct(mtd, data, oob_required ? oob : NULL, 0, + readl(nfc->regs + NFC_REG_ECC_ST), &erased); if (erased) return 1; @@ -967,6 +1031,130 @@ static void sunxi_nfc_hw_ecc_read_extra_oob(struct mtd_info *mtd, *cur_off = mtd->oobsize + mtd->writesize; } +static int sunxi_nfc_hw_ecc_read_chunks_dma(struct mtd_info *mtd, uint8_t *buf, + int oob_required, int page, + int nchunks) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + bool randomized = nand->options & NAND_NEED_SCRAMBLING; + struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller); + struct nand_ecc_ctrl *ecc = &nand->ecc; + unsigned int max_bitflips = 0; + int ret, i, raw_mode = 0; + struct scatterlist sg; + u32 status; + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + return ret; + + ret = sunxi_nfc_dma_op_prepare(mtd, buf, ecc->size, nchunks, + DMA_FROM_DEVICE, &sg); + if (ret) + return ret; + + sunxi_nfc_hw_ecc_enable(mtd); + sunxi_nfc_randomizer_config(mtd, page, false); + sunxi_nfc_randomizer_enable(mtd); + + writel((NAND_CMD_RNDOUTSTART << 16) | (NAND_CMD_RNDOUT << 8) | + NAND_CMD_READSTART, nfc->regs + NFC_REG_RCMD_SET); + + dma_async_issue_pending(nfc->dmac); + + writel(NFC_PAGE_OP | NFC_DATA_SWAP_METHOD | NFC_DATA_TRANS, + nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_events(nfc, NFC_CMD_INT_FLAG, true, 0); + if (ret) + dmaengine_terminate_all(nfc->dmac); + + sunxi_nfc_randomizer_disable(mtd); + sunxi_nfc_hw_ecc_disable(mtd); + + sunxi_nfc_dma_op_cleanup(mtd, DMA_FROM_DEVICE, &sg); + + if (ret) + return ret; + + status = readl(nfc->regs + NFC_REG_ECC_ST); + + for (i = 0; i < nchunks; i++) { + int data_off = i * ecc->size; + int oob_off = i * (ecc->bytes + 4); + u8 *data = buf + data_off; + u8 *oob = nand->oob_poi + oob_off; + bool erased; + + ret = sunxi_nfc_hw_ecc_correct(mtd, randomized ? data : NULL, + oob_required ? oob : NULL, + i, status, &erased); + + /* ECC errors are handled in the second loop. */ + if (ret < 0) + continue; + + if (oob_required && !erased) { + /* TODO: use DMA to retrieve OOB */ + nand->cmdfunc(mtd, NAND_CMD_RNDOUT, + mtd->writesize + oob_off, -1); + nand->read_buf(mtd, oob, ecc->bytes + 4); + + sunxi_nfc_hw_ecc_get_prot_oob_bytes(mtd, oob, i, + !i, page); + } + + if (erased) + raw_mode = 1; + + sunxi_nfc_hw_ecc_update_stats(mtd, &max_bitflips, ret); + } + + if (status & NFC_ECC_ERR_MSK) { + for (i = 0; i < nchunks; i++) { + int data_off = i * ecc->size; + int oob_off = i * (ecc->bytes + 4); + u8 *data = buf + data_off; + u8 *oob = nand->oob_poi + oob_off; + + if (!(status & NFC_ECC_ERR(i))) + continue; + + /* + * Re-read the data with the randomizer disabled to + * identify bitflips in erased pages. + */ + if (randomized) { + /* TODO: use DMA to read page in raw mode */ + nand->cmdfunc(mtd, NAND_CMD_RNDOUT, + data_off, -1); + nand->read_buf(mtd, data, ecc->size); + } + + /* TODO: use DMA to retrieve OOB */ + nand->cmdfunc(mtd, NAND_CMD_RNDOUT, + mtd->writesize + oob_off, -1); + nand->read_buf(mtd, oob, ecc->bytes + 4); + + ret = nand_check_erased_ecc_chunk(data, ecc->size, + oob, ecc->bytes + 4, + NULL, 0, + ecc->strength); + if (ret >= 0) + raw_mode = 1; + + sunxi_nfc_hw_ecc_update_stats(mtd, &max_bitflips, ret); + } + } + + if (oob_required) + sunxi_nfc_hw_ecc_read_extra_oob(mtd, nand->oob_poi, + NULL, !raw_mode, + page); + + return max_bitflips; +} + static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd, const u8 *data, int data_off, const u8 *oob, int oob_off, @@ -1065,6 +1253,23 @@ static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd, return max_bitflips; } +static int sunxi_nfc_hw_ecc_read_page_dma(struct mtd_info *mtd, + struct nand_chip *chip, u8 *buf, + int oob_required, int page) +{ + int ret; + + ret = sunxi_nfc_hw_ecc_read_chunks_dma(mtd, buf, oob_required, page, + chip->ecc.steps); + if (ret >= 0) + return ret; + + /* Fallback to PIO mode */ + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, 0, -1); + + return sunxi_nfc_hw_ecc_read_page(mtd, chip, buf, oob_required, page); +} + static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, u32 data_offs, u32 readlen, @@ -1098,6 +1303,25 @@ static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd, return max_bitflips; } +static int sunxi_nfc_hw_ecc_read_subpage_dma(struct mtd_info *mtd, + struct nand_chip *chip, + u32 data_offs, u32 readlen, + u8 *buf, int page) +{ + int nchunks = DIV_ROUND_UP(data_offs + readlen, chip->ecc.size); + int ret; + + ret = sunxi_nfc_hw_ecc_read_chunks_dma(mtd, buf, false, page, nchunks); + if (ret >= 0) + return ret; + + /* Fallback to PIO mode */ + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, 0, -1); + + return sunxi_nfc_hw_ecc_read_subpage(mtd, chip, data_offs, readlen, + buf, page); +} + static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, @@ -1130,6 +1354,99 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, return 0; } +static int sunxi_nfc_hw_ecc_write_subpage(struct mtd_info *mtd, + struct nand_chip *chip, + u32 data_offs, u32 data_len, + const u8 *buf, int oob_required, + int page) +{ + struct nand_ecc_ctrl *ecc = &chip->ecc; + int ret, i, cur_off = 0; + + sunxi_nfc_hw_ecc_enable(mtd); + + for (i = data_offs / ecc->size; + i < DIV_ROUND_UP(data_offs + data_len, ecc->size); i++) { + int data_off = i * ecc->size; + int oob_off = i * (ecc->bytes + 4); + const u8 *data = buf + data_off; + const u8 *oob = chip->oob_poi + oob_off; + + ret = sunxi_nfc_hw_ecc_write_chunk(mtd, data, data_off, oob, + oob_off + mtd->writesize, + &cur_off, !i, page); + if (ret) + return ret; + } + + sunxi_nfc_hw_ecc_disable(mtd); + + return 0; +} + +static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd, + struct nand_chip *chip, + const u8 *buf, + int oob_required, + int page) +{ + struct nand_chip *nand = mtd_to_nand(mtd); + struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller); + struct nand_ecc_ctrl *ecc = &nand->ecc; + struct scatterlist sg; + int ret, i; + + ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); + if (ret) + return ret; + + ret = sunxi_nfc_dma_op_prepare(mtd, buf, ecc->size, ecc->steps, + DMA_TO_DEVICE, &sg); + if (ret) + goto pio_fallback; + + for (i = 0; i < ecc->steps; i++) { + const u8 *oob = nand->oob_poi + (i * (ecc->bytes + 4)); + + sunxi_nfc_hw_ecc_set_prot_oob_bytes(mtd, oob, i, !i, page); + } + + sunxi_nfc_hw_ecc_enable(mtd); + sunxi_nfc_randomizer_config(mtd, page, false); + sunxi_nfc_randomizer_enable(mtd); + + writel((NAND_CMD_RNDIN << 8) | NAND_CMD_PAGEPROG, + nfc->regs + NFC_REG_RCMD_SET); + + dma_async_issue_pending(nfc->dmac); + + writel(NFC_PAGE_OP | NFC_DATA_SWAP_METHOD | + NFC_DATA_TRANS | NFC_ACCESS_DIR, + nfc->regs + NFC_REG_CMD); + + ret = sunxi_nfc_wait_events(nfc, NFC_CMD_INT_FLAG, true, 0); + if (ret) + dmaengine_terminate_all(nfc->dmac); + + sunxi_nfc_randomizer_disable(mtd); + sunxi_nfc_hw_ecc_disable(mtd); + + sunxi_nfc_dma_op_cleanup(mtd, DMA_TO_DEVICE, &sg); + + if (ret) + return ret; + + if (oob_required || (chip->options & NAND_NEED_SCRAMBLING)) + /* TODO: use DMA to transfer extra OOB bytes ? */ + sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi, + NULL, page); + + return 0; + +pio_fallback: + return sunxi_nfc_hw_ecc_write_page(mtd, chip, buf, oob_required, page); +} + static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, @@ -1497,10 +1814,19 @@ static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, int ret; int i; + if (ecc->size != 512 && ecc->size != 1024) + return -EINVAL; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + /* Prefer 1k ECC chunk over 512 ones */ + if (ecc->size == 512 && mtd->writesize > 512) { + ecc->size = 1024; + ecc->strength *= 2; + } + /* Add ECC info retrieval from DT */ for (i = 0; i < ARRAY_SIZE(strengths); i++) { if (ecc->strength <= strengths[i]) @@ -1550,14 +1876,28 @@ static int sunxi_nand_hw_ecc_ctrl_init(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc, struct device_node *np) { + struct nand_chip *nand = mtd_to_nand(mtd); + struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand); + struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller); int ret; ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np); if (ret) return ret; - ecc->read_page = sunxi_nfc_hw_ecc_read_page; - ecc->write_page = sunxi_nfc_hw_ecc_write_page; + if (nfc->dmac) { + ecc->read_page = sunxi_nfc_hw_ecc_read_page_dma; + ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage_dma; + ecc->write_page = sunxi_nfc_hw_ecc_write_page_dma; + nand->options |= NAND_USE_BOUNCE_BUFFER; + } else { + ecc->read_page = sunxi_nfc_hw_ecc_read_page; + ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage; + ecc->write_page = sunxi_nfc_hw_ecc_write_page; + } + + /* TODO: support DMA for raw accesses and subpage write */ + ecc->write_subpage = sunxi_nfc_hw_ecc_write_subpage; ecc->read_oob_raw = nand_read_oob_std; ecc->write_oob_raw = nand_write_oob_std; ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage; @@ -1871,26 +2211,59 @@ static int sunxi_nfc_probe(struct platform_device *pdev) if (ret) goto out_ahb_clk_unprepare; + nfc->reset = devm_reset_control_get_optional(dev, "ahb"); + if (!IS_ERR(nfc->reset)) { + ret = reset_control_deassert(nfc->reset); + if (ret) { + dev_err(dev, "reset err %d\n", ret); + goto out_mod_clk_unprepare; + } + } else if (PTR_ERR(nfc->reset) != -ENOENT) { + ret = PTR_ERR(nfc->reset); + goto out_mod_clk_unprepare; + } + ret = sunxi_nfc_rst(nfc); if (ret) - goto out_mod_clk_unprepare; + goto out_ahb_reset_reassert; writel(0, nfc->regs + NFC_REG_INT); ret = devm_request_irq(dev, irq, sunxi_nfc_interrupt, 0, "sunxi-nand", nfc); if (ret) - goto out_mod_clk_unprepare; + goto out_ahb_reset_reassert; + + nfc->dmac = dma_request_slave_channel(dev, "rxtx"); + if (nfc->dmac) { + struct dma_slave_config dmac_cfg = { }; + + dmac_cfg.src_addr = r->start + NFC_REG_IO_DATA; + dmac_cfg.dst_addr = dmac_cfg.src_addr; + dmac_cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dmac_cfg.dst_addr_width = dmac_cfg.src_addr_width; + dmac_cfg.src_maxburst = 4; + dmac_cfg.dst_maxburst = 4; + dmaengine_slave_config(nfc->dmac, &dmac_cfg); + } else { + dev_warn(dev, "failed to request rxtx DMA channel\n"); + } platform_set_drvdata(pdev, nfc); ret = sunxi_nand_chips_init(dev, nfc); if (ret) { dev_err(dev, "failed to init nand chips\n"); - goto out_mod_clk_unprepare; + goto out_release_dmac; } return 0; +out_release_dmac: + if (nfc->dmac) + dma_release_channel(nfc->dmac); +out_ahb_reset_reassert: + if (!IS_ERR(nfc->reset)) + reset_control_assert(nfc->reset); out_mod_clk_unprepare: clk_disable_unprepare(nfc->mod_clk); out_ahb_clk_unprepare: @@ -1904,6 +2277,12 @@ static int sunxi_nfc_remove(struct platform_device *pdev) struct sunxi_nfc *nfc = platform_get_drvdata(pdev); sunxi_nand_chips_cleanup(nfc); + + if (!IS_ERR(nfc->reset)) + reset_control_assert(nfc->reset); + + if (nfc->dmac) + dma_release_channel(nfc->dmac); clk_disable_unprepare(nfc->mod_clk); clk_disable_unprepare(nfc->ahb_clk); diff --git a/drivers/mtd/nand/xway_nand.c b/drivers/mtd/nand/xway_nand.c index 0cf0ac07a8c2..1f2948c0c458 100644 --- a/drivers/mtd/nand/xway_nand.c +++ b/drivers/mtd/nand/xway_nand.c @@ -4,6 +4,7 @@ * by the Free Software Foundation. * * Copyright © 2012 John Crispin <blogic@openwrt.org> + * Copyright © 2016 Hauke Mehrtens <hauke@hauke-m.de> */ #include <linux/mtd/nand.h> @@ -16,20 +17,28 @@ #define EBU_ADDSEL1 0x24 #define EBU_NAND_CON 0xB0 #define EBU_NAND_WAIT 0xB4 +#define NAND_WAIT_RD BIT(0) /* NAND flash status output */ +#define NAND_WAIT_WR_C BIT(3) /* NAND Write/Read complete */ #define EBU_NAND_ECC0 0xB8 #define EBU_NAND_ECC_AC 0xBC -/* nand commands */ -#define NAND_CMD_ALE (1 << 2) -#define NAND_CMD_CLE (1 << 3) -#define NAND_CMD_CS (1 << 4) -#define NAND_WRITE_CMD_RESET 0xff +/* + * nand commands + * The pins of the NAND chip are selected based on the address bits of the + * "register" read and write. There are no special registers, but an + * address range and the lower address bits are used to activate the + * correct line. For example when the bit (1 << 2) is set in the address + * the ALE pin will be activated. + */ +#define NAND_CMD_ALE BIT(2) /* address latch enable */ +#define NAND_CMD_CLE BIT(3) /* command latch enable */ +#define NAND_CMD_CS BIT(4) /* chip select */ +#define NAND_CMD_SE BIT(5) /* spare area access latch */ +#define NAND_CMD_WP BIT(6) /* write protect */ #define NAND_WRITE_CMD (NAND_CMD_CS | NAND_CMD_CLE) #define NAND_WRITE_ADDR (NAND_CMD_CS | NAND_CMD_ALE) #define NAND_WRITE_DATA (NAND_CMD_CS) #define NAND_READ_DATA (NAND_CMD_CS) -#define NAND_WAIT_WR_C (1 << 3) -#define NAND_WAIT_RD (0x1) /* we need to tel the ebu which addr we mapped the nand to */ #define ADDSEL1_MASK(x) (x << 4) @@ -54,31 +63,41 @@ #define NAND_CON_CSMUX (1 << 1) #define NAND_CON_NANDM 1 -static void xway_reset_chip(struct nand_chip *chip) +struct xway_nand_data { + struct nand_chip chip; + unsigned long csflags; + void __iomem *nandaddr; +}; + +static u8 xway_readb(struct mtd_info *mtd, int op) { - unsigned long nandaddr = (unsigned long) chip->IO_ADDR_W; - unsigned long flags; + struct nand_chip *chip = mtd_to_nand(mtd); + struct xway_nand_data *data = nand_get_controller_data(chip); - nandaddr &= ~NAND_WRITE_ADDR; - nandaddr |= NAND_WRITE_CMD; + return readb(data->nandaddr + op); +} - /* finish with a reset */ - spin_lock_irqsave(&ebu_lock, flags); - writeb(NAND_WRITE_CMD_RESET, (void __iomem *) nandaddr); - while ((ltq_ebu_r32(EBU_NAND_WAIT) & NAND_WAIT_WR_C) == 0) - ; - spin_unlock_irqrestore(&ebu_lock, flags); +static void xway_writeb(struct mtd_info *mtd, int op, u8 value) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct xway_nand_data *data = nand_get_controller_data(chip); + + writeb(value, data->nandaddr + op); } -static void xway_select_chip(struct mtd_info *mtd, int chip) +static void xway_select_chip(struct mtd_info *mtd, int select) { + struct nand_chip *chip = mtd_to_nand(mtd); + struct xway_nand_data *data = nand_get_controller_data(chip); - switch (chip) { + switch (select) { case -1: ltq_ebu_w32_mask(NAND_CON_CE, 0, EBU_NAND_CON); ltq_ebu_w32_mask(NAND_CON_NANDM, 0, EBU_NAND_CON); + spin_unlock_irqrestore(&ebu_lock, data->csflags); break; case 0: + spin_lock_irqsave(&ebu_lock, data->csflags); ltq_ebu_w32_mask(0, NAND_CON_NANDM, EBU_NAND_CON); ltq_ebu_w32_mask(0, NAND_CON_CE, EBU_NAND_CON); break; @@ -89,26 +108,16 @@ static void xway_select_chip(struct mtd_info *mtd, int chip) static void xway_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long nandaddr = (unsigned long) this->IO_ADDR_W; - unsigned long flags; - - if (ctrl & NAND_CTRL_CHANGE) { - nandaddr &= ~(NAND_WRITE_CMD | NAND_WRITE_ADDR); - if (ctrl & NAND_CLE) - nandaddr |= NAND_WRITE_CMD; - else - nandaddr |= NAND_WRITE_ADDR; - this->IO_ADDR_W = (void __iomem *) nandaddr; - } + if (cmd == NAND_CMD_NONE) + return; - if (cmd != NAND_CMD_NONE) { - spin_lock_irqsave(&ebu_lock, flags); - writeb(cmd, this->IO_ADDR_W); - while ((ltq_ebu_r32(EBU_NAND_WAIT) & NAND_WAIT_WR_C) == 0) - ; - spin_unlock_irqrestore(&ebu_lock, flags); - } + if (ctrl & NAND_CLE) + xway_writeb(mtd, NAND_WRITE_CMD, cmd); + else if (ctrl & NAND_ALE) + xway_writeb(mtd, NAND_WRITE_ADDR, cmd); + + while ((ltq_ebu_r32(EBU_NAND_WAIT) & NAND_WAIT_WR_C) == 0) + ; } static int xway_dev_ready(struct mtd_info *mtd) @@ -118,80 +127,122 @@ static int xway_dev_ready(struct mtd_info *mtd) static unsigned char xway_read_byte(struct mtd_info *mtd) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long nandaddr = (unsigned long) this->IO_ADDR_R; - unsigned long flags; - int ret; + return xway_readb(mtd, NAND_READ_DATA); +} + +static void xway_read_buf(struct mtd_info *mtd, u_char *buf, int len) +{ + int i; - spin_lock_irqsave(&ebu_lock, flags); - ret = ltq_r8((void __iomem *)(nandaddr + NAND_READ_DATA)); - spin_unlock_irqrestore(&ebu_lock, flags); + for (i = 0; i < len; i++) + buf[i] = xway_readb(mtd, NAND_WRITE_DATA); +} - return ret; +static void xway_write_buf(struct mtd_info *mtd, const u_char *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + xway_writeb(mtd, NAND_WRITE_DATA, buf[i]); } +/* + * Probe for the NAND device. + */ static int xway_nand_probe(struct platform_device *pdev) { - struct nand_chip *this = platform_get_drvdata(pdev); - unsigned long nandaddr = (unsigned long) this->IO_ADDR_W; - const __be32 *cs = of_get_property(pdev->dev.of_node, - "lantiq,cs", NULL); + struct xway_nand_data *data; + struct mtd_info *mtd; + struct resource *res; + int err; + u32 cs; u32 cs_flag = 0; + /* Allocate memory for the device structure (and zero it) */ + data = devm_kzalloc(&pdev->dev, sizeof(struct xway_nand_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->nandaddr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->nandaddr)) + return PTR_ERR(data->nandaddr); + + nand_set_flash_node(&data->chip, pdev->dev.of_node); + mtd = nand_to_mtd(&data->chip); + mtd->dev.parent = &pdev->dev; + + data->chip.cmd_ctrl = xway_cmd_ctrl; + data->chip.dev_ready = xway_dev_ready; + data->chip.select_chip = xway_select_chip; + data->chip.write_buf = xway_write_buf; + data->chip.read_buf = xway_read_buf; + data->chip.read_byte = xway_read_byte; + data->chip.chip_delay = 30; + + data->chip.ecc.mode = NAND_ECC_SOFT; + data->chip.ecc.algo = NAND_ECC_HAMMING; + + platform_set_drvdata(pdev, data); + nand_set_controller_data(&data->chip, data); + /* load our CS from the DT. Either we find a valid 1 or default to 0 */ - if (cs && (*cs == 1)) + err = of_property_read_u32(pdev->dev.of_node, "lantiq,cs", &cs); + if (!err && cs == 1) cs_flag = NAND_CON_IN_CS1 | NAND_CON_OUT_CS1; /* setup the EBU to run in NAND mode on our base addr */ - ltq_ebu_w32(CPHYSADDR(nandaddr) - | ADDSEL1_MASK(3) | ADDSEL1_REGEN, EBU_ADDSEL1); + ltq_ebu_w32(CPHYSADDR(data->nandaddr) + | ADDSEL1_MASK(3) | ADDSEL1_REGEN, EBU_ADDSEL1); ltq_ebu_w32(BUSCON1_SETUP | BUSCON1_BCGEN_RES | BUSCON1_WAITWRC2 - | BUSCON1_WAITRDC2 | BUSCON1_HOLDC1 | BUSCON1_RECOVC1 - | BUSCON1_CMULT4, LTQ_EBU_BUSCON1); + | BUSCON1_WAITRDC2 | BUSCON1_HOLDC1 | BUSCON1_RECOVC1 + | BUSCON1_CMULT4, LTQ_EBU_BUSCON1); ltq_ebu_w32(NAND_CON_NANDM | NAND_CON_CSMUX | NAND_CON_CS_P - | NAND_CON_SE_P | NAND_CON_WP_P | NAND_CON_PRE_P - | cs_flag, EBU_NAND_CON); + | NAND_CON_SE_P | NAND_CON_WP_P | NAND_CON_PRE_P + | cs_flag, EBU_NAND_CON); - /* finish with a reset */ - xway_reset_chip(this); + /* Scan to find existence of the device */ + err = nand_scan(mtd, 1); + if (err) + return err; - return 0; -} + err = mtd_device_register(mtd, NULL, 0); + if (err) + nand_release(mtd); -static struct platform_nand_data xway_nand_data = { - .chip = { - .nr_chips = 1, - .chip_delay = 30, - }, - .ctrl = { - .probe = xway_nand_probe, - .cmd_ctrl = xway_cmd_ctrl, - .dev_ready = xway_dev_ready, - .select_chip = xway_select_chip, - .read_byte = xway_read_byte, - } -}; + return err; +} /* - * Try to find the node inside the DT. If it is available attach out - * platform_nand_data + * Remove a NAND device. */ -static int __init xway_register_nand(void) +static int xway_nand_remove(struct platform_device *pdev) { - struct device_node *node; - struct platform_device *pdev; - - node = of_find_compatible_node(NULL, NULL, "lantiq,nand-xway"); - if (!node) - return -ENOENT; - pdev = of_find_device_by_node(node); - if (!pdev) - return -EINVAL; - pdev->dev.platform_data = &xway_nand_data; - of_node_put(node); + struct xway_nand_data *data = platform_get_drvdata(pdev); + + nand_release(nand_to_mtd(&data->chip)); + return 0; } -subsys_initcall(xway_register_nand); +static const struct of_device_id xway_nand_match[] = { + { .compatible = "lantiq,nand-xway" }, + {}, +}; +MODULE_DEVICE_TABLE(of, xway_nand_match); + +static struct platform_driver xway_nand_driver = { + .probe = xway_nand_probe, + .remove = xway_nand_remove, + .driver = { + .name = "lantiq,nand-xway", + .of_match_table = xway_nand_match, + }, +}; + +module_platform_driver(xway_nand_driver); + +MODULE_LICENSE("GPL"); diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index a4b029a417f0..1a6d0e367b89 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -3188,13 +3188,13 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len, size_t tmp_retlen; ret = action(mtd, from, len, &tmp_retlen, buf); + if (ret) + break; buf += tmp_retlen; len -= tmp_retlen; *retlen += tmp_retlen; - if (ret) - break; } otp_pages--; } diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig index d42c98e1f581..4a682ee0f632 100644 --- a/drivers/mtd/spi-nor/Kconfig +++ b/drivers/mtd/spi-nor/Kconfig @@ -29,6 +29,26 @@ config MTD_SPI_NOR_USE_4K_SECTORS Please note that some tools/drivers/filesystems may not work with 4096 B erase size (e.g. UBIFS requires 15 KiB as a minimum). +config SPI_ATMEL_QUADSPI + tristate "Atmel Quad SPI Controller" + depends on ARCH_AT91 || (ARM && COMPILE_TEST) + depends on OF && HAS_IOMEM + help + This enables support for the Quad SPI controller in master mode. + This driver does not support generic SPI. The implementation only + supports SPI NOR. + +config SPI_CADENCE_QUADSPI + tristate "Cadence Quad SPI controller" + depends on OF && ARM + help + Enable support for the Cadence Quad SPI Flash controller. + + Cadence QSPI is a specialized controller for connecting an SPI + Flash over 1/2/4-bit wide bus. Enable this option if you have a + device with a Cadence QSPI controller and want to access the + Flash as an MTD device. + config SPI_FSL_QUADSPI tristate "Freescale Quad SPI controller" depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST @@ -38,6 +58,13 @@ config SPI_FSL_QUADSPI This controller does not support generic SPI. It only supports SPI NOR. +config SPI_HISI_SFC + tristate "Hisilicon SPI-NOR Flash Controller(SFC)" + depends on ARCH_HISI || COMPILE_TEST + depends on HAS_IOMEM && HAS_DMA + help + This enables support for hisilicon SPI-NOR flash controller. + config SPI_NXP_SPIFI tristate "NXP SPI Flash Interface (SPIFI)" depends on OF && (ARCH_LPC18XX || COMPILE_TEST) diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile index 0bf3a7f81675..121695e83542 100644 --- a/drivers/mtd/spi-nor/Makefile +++ b/drivers/mtd/spi-nor/Makefile @@ -1,4 +1,7 @@ obj-$(CONFIG_MTD_SPI_NOR) += spi-nor.o +obj-$(CONFIG_SPI_ATMEL_QUADSPI) += atmel-quadspi.o +obj-$(CONFIG_SPI_CADENCE_QUADSPI) += cadence-quadspi.o obj-$(CONFIG_SPI_FSL_QUADSPI) += fsl-quadspi.o +obj-$(CONFIG_SPI_HISI_SFC) += hisi-sfc.o obj-$(CONFIG_MTD_MT81xx_NOR) += mtk-quadspi.o obj-$(CONFIG_SPI_NXP_SPIFI) += nxp-spifi.o diff --git a/drivers/mtd/spi-nor/atmel-quadspi.c b/drivers/mtd/spi-nor/atmel-quadspi.c new file mode 100644 index 000000000000..47937d9beec6 --- /dev/null +++ b/drivers/mtd/spi-nor/atmel-quadspi.c @@ -0,0 +1,732 @@ +/* + * Driver for Atmel QSPI Controller + * + * Copyright (C) 2015 Atmel Corporation + * + * Author: Cyrille Pitchen <cyrille.pitchen@atmel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + * This driver is based on drivers/mtd/spi-nor/fsl-quadspi.c from Freescale. + */ + +#include <linux/kernel.h> +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/spi-nor.h> +#include <linux/platform_data/atmel.h> +#include <linux/of.h> + +#include <linux/io.h> +#include <linux/gpio.h> +#include <linux/pinctrl/consumer.h> + +/* QSPI register offsets */ +#define QSPI_CR 0x0000 /* Control Register */ +#define QSPI_MR 0x0004 /* Mode Register */ +#define QSPI_RD 0x0008 /* Receive Data Register */ +#define QSPI_TD 0x000c /* Transmit Data Register */ +#define QSPI_SR 0x0010 /* Status Register */ +#define QSPI_IER 0x0014 /* Interrupt Enable Register */ +#define QSPI_IDR 0x0018 /* Interrupt Disable Register */ +#define QSPI_IMR 0x001c /* Interrupt Mask Register */ +#define QSPI_SCR 0x0020 /* Serial Clock Register */ + +#define QSPI_IAR 0x0030 /* Instruction Address Register */ +#define QSPI_ICR 0x0034 /* Instruction Code Register */ +#define QSPI_IFR 0x0038 /* Instruction Frame Register */ + +#define QSPI_SMR 0x0040 /* Scrambling Mode Register */ +#define QSPI_SKR 0x0044 /* Scrambling Key Register */ + +#define QSPI_WPMR 0x00E4 /* Write Protection Mode Register */ +#define QSPI_WPSR 0x00E8 /* Write Protection Status Register */ + +#define QSPI_VERSION 0x00FC /* Version Register */ + + +/* Bitfields in QSPI_CR (Control Register) */ +#define QSPI_CR_QSPIEN BIT(0) +#define QSPI_CR_QSPIDIS BIT(1) +#define QSPI_CR_SWRST BIT(7) +#define QSPI_CR_LASTXFER BIT(24) + +/* Bitfields in QSPI_MR (Mode Register) */ +#define QSPI_MR_SSM BIT(0) +#define QSPI_MR_LLB BIT(1) +#define QSPI_MR_WDRBT BIT(2) +#define QSPI_MR_SMRM BIT(3) +#define QSPI_MR_CSMODE_MASK GENMASK(5, 4) +#define QSPI_MR_CSMODE_NOT_RELOADED (0 << 4) +#define QSPI_MR_CSMODE_LASTXFER (1 << 4) +#define QSPI_MR_CSMODE_SYSTEMATICALLY (2 << 4) +#define QSPI_MR_NBBITS_MASK GENMASK(11, 8) +#define QSPI_MR_NBBITS(n) ((((n) - 8) << 8) & QSPI_MR_NBBITS_MASK) +#define QSPI_MR_DLYBCT_MASK GENMASK(23, 16) +#define QSPI_MR_DLYBCT(n) (((n) << 16) & QSPI_MR_DLYBCT_MASK) +#define QSPI_MR_DLYCS_MASK GENMASK(31, 24) +#define QSPI_MR_DLYCS(n) (((n) << 24) & QSPI_MR_DLYCS_MASK) + +/* Bitfields in QSPI_SR/QSPI_IER/QSPI_IDR/QSPI_IMR */ +#define QSPI_SR_RDRF BIT(0) +#define QSPI_SR_TDRE BIT(1) +#define QSPI_SR_TXEMPTY BIT(2) +#define QSPI_SR_OVRES BIT(3) +#define QSPI_SR_CSR BIT(8) +#define QSPI_SR_CSS BIT(9) +#define QSPI_SR_INSTRE BIT(10) +#define QSPI_SR_QSPIENS BIT(24) + +#define QSPI_SR_CMD_COMPLETED (QSPI_SR_INSTRE | QSPI_SR_CSR) + +/* Bitfields in QSPI_SCR (Serial Clock Register) */ +#define QSPI_SCR_CPOL BIT(0) +#define QSPI_SCR_CPHA BIT(1) +#define QSPI_SCR_SCBR_MASK GENMASK(15, 8) +#define QSPI_SCR_SCBR(n) (((n) << 8) & QSPI_SCR_SCBR_MASK) +#define QSPI_SCR_DLYBS_MASK GENMASK(23, 16) +#define QSPI_SCR_DLYBS(n) (((n) << 16) & QSPI_SCR_DLYBS_MASK) + +/* Bitfields in QSPI_ICR (Instruction Code Register) */ +#define QSPI_ICR_INST_MASK GENMASK(7, 0) +#define QSPI_ICR_INST(inst) (((inst) << 0) & QSPI_ICR_INST_MASK) +#define QSPI_ICR_OPT_MASK GENMASK(23, 16) +#define QSPI_ICR_OPT(opt) (((opt) << 16) & QSPI_ICR_OPT_MASK) + +/* Bitfields in QSPI_IFR (Instruction Frame Register) */ +#define QSPI_IFR_WIDTH_MASK GENMASK(2, 0) +#define QSPI_IFR_WIDTH_SINGLE_BIT_SPI (0 << 0) +#define QSPI_IFR_WIDTH_DUAL_OUTPUT (1 << 0) +#define QSPI_IFR_WIDTH_QUAD_OUTPUT (2 << 0) +#define QSPI_IFR_WIDTH_DUAL_IO (3 << 0) +#define QSPI_IFR_WIDTH_QUAD_IO (4 << 0) +#define QSPI_IFR_WIDTH_DUAL_CMD (5 << 0) +#define QSPI_IFR_WIDTH_QUAD_CMD (6 << 0) +#define QSPI_IFR_INSTEN BIT(4) +#define QSPI_IFR_ADDREN BIT(5) +#define QSPI_IFR_OPTEN BIT(6) +#define QSPI_IFR_DATAEN BIT(7) +#define QSPI_IFR_OPTL_MASK GENMASK(9, 8) +#define QSPI_IFR_OPTL_1BIT (0 << 8) +#define QSPI_IFR_OPTL_2BIT (1 << 8) +#define QSPI_IFR_OPTL_4BIT (2 << 8) +#define QSPI_IFR_OPTL_8BIT (3 << 8) +#define QSPI_IFR_ADDRL BIT(10) +#define QSPI_IFR_TFRTYP_MASK GENMASK(13, 12) +#define QSPI_IFR_TFRTYP_TRSFR_READ (0 << 12) +#define QSPI_IFR_TFRTYP_TRSFR_READ_MEM (1 << 12) +#define QSPI_IFR_TFRTYP_TRSFR_WRITE (2 << 12) +#define QSPI_IFR_TFRTYP_TRSFR_WRITE_MEM (3 << 13) +#define QSPI_IFR_CRM BIT(14) +#define QSPI_IFR_NBDUM_MASK GENMASK(20, 16) +#define QSPI_IFR_NBDUM(n) (((n) << 16) & QSPI_IFR_NBDUM_MASK) + +/* Bitfields in QSPI_SMR (Scrambling Mode Register) */ +#define QSPI_SMR_SCREN BIT(0) +#define QSPI_SMR_RVDIS BIT(1) + +/* Bitfields in QSPI_WPMR (Write Protection Mode Register) */ +#define QSPI_WPMR_WPEN BIT(0) +#define QSPI_WPMR_WPKEY_MASK GENMASK(31, 8) +#define QSPI_WPMR_WPKEY(wpkey) (((wpkey) << 8) & QSPI_WPMR_WPKEY_MASK) + +/* Bitfields in QSPI_WPSR (Write Protection Status Register) */ +#define QSPI_WPSR_WPVS BIT(0) +#define QSPI_WPSR_WPVSRC_MASK GENMASK(15, 8) +#define QSPI_WPSR_WPVSRC(src) (((src) << 8) & QSPI_WPSR_WPVSRC) + + +struct atmel_qspi { + void __iomem *regs; + void __iomem *mem; + struct clk *clk; + struct platform_device *pdev; + u32 pending; + + struct spi_nor nor; + u32 clk_rate; + struct completion cmd_completion; +}; + +struct atmel_qspi_command { + union { + struct { + u32 instruction:1; + u32 address:3; + u32 mode:1; + u32 dummy:1; + u32 data:1; + u32 reserved:25; + } bits; + u32 word; + } enable; + u8 instruction; + u8 mode; + u8 num_mode_cycles; + u8 num_dummy_cycles; + u32 address; + + size_t buf_len; + const void *tx_buf; + void *rx_buf; +}; + +/* Register access functions */ +static inline u32 qspi_readl(struct atmel_qspi *aq, u32 reg) +{ + return readl_relaxed(aq->regs + reg); +} + +static inline void qspi_writel(struct atmel_qspi *aq, u32 reg, u32 value) +{ + writel_relaxed(value, aq->regs + reg); +} + +static int atmel_qspi_run_transfer(struct atmel_qspi *aq, + const struct atmel_qspi_command *cmd) +{ + void __iomem *ahb_mem; + + /* Then fallback to a PIO transfer (memcpy() DOES NOT work!) */ + ahb_mem = aq->mem; + if (cmd->enable.bits.address) + ahb_mem += cmd->address; + if (cmd->tx_buf) + _memcpy_toio(ahb_mem, cmd->tx_buf, cmd->buf_len); + else + _memcpy_fromio(cmd->rx_buf, ahb_mem, cmd->buf_len); + + return 0; +} + +#ifdef DEBUG +static void atmel_qspi_debug_command(struct atmel_qspi *aq, + const struct atmel_qspi_command *cmd, + u32 ifr) +{ + u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE]; + size_t len = 0; + int i; + + if (cmd->enable.bits.instruction) + cmd_buf[len++] = cmd->instruction; + + for (i = cmd->enable.bits.address-1; i >= 0; --i) + cmd_buf[len++] = (cmd->address >> (i << 3)) & 0xff; + + if (cmd->enable.bits.mode) + cmd_buf[len++] = cmd->mode; + + if (cmd->enable.bits.dummy) { + int num = cmd->num_dummy_cycles; + + switch (ifr & QSPI_IFR_WIDTH_MASK) { + case QSPI_IFR_WIDTH_SINGLE_BIT_SPI: + case QSPI_IFR_WIDTH_DUAL_OUTPUT: + case QSPI_IFR_WIDTH_QUAD_OUTPUT: + num >>= 3; + break; + case QSPI_IFR_WIDTH_DUAL_IO: + case QSPI_IFR_WIDTH_DUAL_CMD: + num >>= 2; + break; + case QSPI_IFR_WIDTH_QUAD_IO: + case QSPI_IFR_WIDTH_QUAD_CMD: + num >>= 1; + break; + default: + return; + } + + for (i = 0; i < num; ++i) + cmd_buf[len++] = 0; + } + + /* Dump the SPI command */ + print_hex_dump(KERN_DEBUG, "qspi cmd: ", DUMP_PREFIX_NONE, + 32, 1, cmd_buf, len, false); + +#ifdef VERBOSE_DEBUG + /* If verbose debug is enabled, also dump the TX data */ + if (cmd->enable.bits.data && cmd->tx_buf) + print_hex_dump(KERN_DEBUG, "qspi tx : ", DUMP_PREFIX_NONE, + 32, 1, cmd->tx_buf, cmd->buf_len, false); +#endif +} +#else +#define atmel_qspi_debug_command(aq, cmd, ifr) +#endif + +static int atmel_qspi_run_command(struct atmel_qspi *aq, + const struct atmel_qspi_command *cmd, + u32 ifr_tfrtyp, u32 ifr_width) +{ + u32 iar, icr, ifr, sr; + int err = 0; + + iar = 0; + icr = 0; + ifr = ifr_tfrtyp | ifr_width; + + /* Compute instruction parameters */ + if (cmd->enable.bits.instruction) { + icr |= QSPI_ICR_INST(cmd->instruction); + ifr |= QSPI_IFR_INSTEN; + } + + /* Compute address parameters */ + switch (cmd->enable.bits.address) { + case 4: + ifr |= QSPI_IFR_ADDRL; + /* fall through to the 24bit (3 byte) address case. */ + case 3: + iar = (cmd->enable.bits.data) ? 0 : cmd->address; + ifr |= QSPI_IFR_ADDREN; + break; + case 0: + break; + default: + return -EINVAL; + } + + /* Compute option parameters */ + if (cmd->enable.bits.mode && cmd->num_mode_cycles) { + u32 mode_cycle_bits, mode_bits; + + icr |= QSPI_ICR_OPT(cmd->mode); + ifr |= QSPI_IFR_OPTEN; + + switch (ifr & QSPI_IFR_WIDTH_MASK) { + case QSPI_IFR_WIDTH_SINGLE_BIT_SPI: + case QSPI_IFR_WIDTH_DUAL_OUTPUT: + case QSPI_IFR_WIDTH_QUAD_OUTPUT: + mode_cycle_bits = 1; + break; + case QSPI_IFR_WIDTH_DUAL_IO: + case QSPI_IFR_WIDTH_DUAL_CMD: + mode_cycle_bits = 2; + break; + case QSPI_IFR_WIDTH_QUAD_IO: + case QSPI_IFR_WIDTH_QUAD_CMD: + mode_cycle_bits = 4; + break; + default: + return -EINVAL; + } + + mode_bits = cmd->num_mode_cycles * mode_cycle_bits; + switch (mode_bits) { + case 1: + ifr |= QSPI_IFR_OPTL_1BIT; + break; + + case 2: + ifr |= QSPI_IFR_OPTL_2BIT; + break; + + case 4: + ifr |= QSPI_IFR_OPTL_4BIT; + break; + + case 8: + ifr |= QSPI_IFR_OPTL_8BIT; + break; + + default: + return -EINVAL; + } + } + + /* Set number of dummy cycles */ + if (cmd->enable.bits.dummy) + ifr |= QSPI_IFR_NBDUM(cmd->num_dummy_cycles); + + /* Set data enable */ + if (cmd->enable.bits.data) { + ifr |= QSPI_IFR_DATAEN; + + /* Special case for Continuous Read Mode */ + if (!cmd->tx_buf && !cmd->rx_buf) + ifr |= QSPI_IFR_CRM; + } + + /* Clear pending interrupts */ + (void)qspi_readl(aq, QSPI_SR); + + /* Set QSPI Instruction Frame registers */ + atmel_qspi_debug_command(aq, cmd, ifr); + qspi_writel(aq, QSPI_IAR, iar); + qspi_writel(aq, QSPI_ICR, icr); + qspi_writel(aq, QSPI_IFR, ifr); + + /* Skip to the final steps if there is no data */ + if (!cmd->enable.bits.data) + goto no_data; + + /* Dummy read of QSPI_IFR to synchronize APB and AHB accesses */ + (void)qspi_readl(aq, QSPI_IFR); + + /* Stop here for continuous read */ + if (!cmd->tx_buf && !cmd->rx_buf) + return 0; + /* Send/Receive data */ + err = atmel_qspi_run_transfer(aq, cmd); + + /* Release the chip-select */ + qspi_writel(aq, QSPI_CR, QSPI_CR_LASTXFER); + + if (err) + return err; + +#if defined(DEBUG) && defined(VERBOSE_DEBUG) + /* + * If verbose debug is enabled, also dump the RX data in addition to + * the SPI command previously dumped by atmel_qspi_debug_command() + */ + if (cmd->rx_buf) + print_hex_dump(KERN_DEBUG, "qspi rx : ", DUMP_PREFIX_NONE, + 32, 1, cmd->rx_buf, cmd->buf_len, false); +#endif +no_data: + /* Poll INSTRuction End status */ + sr = qspi_readl(aq, QSPI_SR); + if ((sr & QSPI_SR_CMD_COMPLETED) == QSPI_SR_CMD_COMPLETED) + return err; + + /* Wait for INSTRuction End interrupt */ + reinit_completion(&aq->cmd_completion); + aq->pending = sr & QSPI_SR_CMD_COMPLETED; + qspi_writel(aq, QSPI_IER, QSPI_SR_CMD_COMPLETED); + if (!wait_for_completion_timeout(&aq->cmd_completion, + msecs_to_jiffies(1000))) + err = -ETIMEDOUT; + qspi_writel(aq, QSPI_IDR, QSPI_SR_CMD_COMPLETED); + + return err; +} + +static int atmel_qspi_read_reg(struct spi_nor *nor, u8 opcode, + u8 *buf, int len) +{ + struct atmel_qspi *aq = nor->priv; + struct atmel_qspi_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.enable.bits.instruction = 1; + cmd.enable.bits.data = 1; + cmd.instruction = opcode; + cmd.rx_buf = buf; + cmd.buf_len = len; + return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ, + QSPI_IFR_WIDTH_SINGLE_BIT_SPI); +} + +static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode, + u8 *buf, int len) +{ + struct atmel_qspi *aq = nor->priv; + struct atmel_qspi_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.enable.bits.instruction = 1; + cmd.enable.bits.data = (buf != NULL && len > 0); + cmd.instruction = opcode; + cmd.tx_buf = buf; + cmd.buf_len = len; + return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE, + QSPI_IFR_WIDTH_SINGLE_BIT_SPI); +} + +static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len, + const u_char *write_buf) +{ + struct atmel_qspi *aq = nor->priv; + struct atmel_qspi_command cmd; + ssize_t ret; + + memset(&cmd, 0, sizeof(cmd)); + cmd.enable.bits.instruction = 1; + cmd.enable.bits.address = nor->addr_width; + cmd.enable.bits.data = 1; + cmd.instruction = nor->program_opcode; + cmd.address = (u32)to; + cmd.tx_buf = write_buf; + cmd.buf_len = len; + ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE_MEM, + QSPI_IFR_WIDTH_SINGLE_BIT_SPI); + return (ret < 0) ? ret : len; +} + +static int atmel_qspi_erase(struct spi_nor *nor, loff_t offs) +{ + struct atmel_qspi *aq = nor->priv; + struct atmel_qspi_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.enable.bits.instruction = 1; + cmd.enable.bits.address = nor->addr_width; + cmd.instruction = nor->erase_opcode; + cmd.address = (u32)offs; + return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE, + QSPI_IFR_WIDTH_SINGLE_BIT_SPI); +} + +static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len, + u_char *read_buf) +{ + struct atmel_qspi *aq = nor->priv; + struct atmel_qspi_command cmd; + u8 num_mode_cycles, num_dummy_cycles; + u32 ifr_width; + ssize_t ret; + + switch (nor->flash_read) { + case SPI_NOR_NORMAL: + case SPI_NOR_FAST: + ifr_width = QSPI_IFR_WIDTH_SINGLE_BIT_SPI; + break; + + case SPI_NOR_DUAL: + ifr_width = QSPI_IFR_WIDTH_DUAL_OUTPUT; + break; + + case SPI_NOR_QUAD: + ifr_width = QSPI_IFR_WIDTH_QUAD_OUTPUT; + break; + + default: + return -EINVAL; + } + + if (nor->read_dummy >= 2) { + num_mode_cycles = 2; + num_dummy_cycles = nor->read_dummy - 2; + } else { + num_mode_cycles = nor->read_dummy; + num_dummy_cycles = 0; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.enable.bits.instruction = 1; + cmd.enable.bits.address = nor->addr_width; + cmd.enable.bits.mode = (num_mode_cycles > 0); + cmd.enable.bits.dummy = (num_dummy_cycles > 0); + cmd.enable.bits.data = 1; + cmd.instruction = nor->read_opcode; + cmd.address = (u32)from; + cmd.mode = 0xff; /* This value prevents from entering the 0-4-4 mode */ + cmd.num_mode_cycles = num_mode_cycles; + cmd.num_dummy_cycles = num_dummy_cycles; + cmd.rx_buf = read_buf; + cmd.buf_len = len; + ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ_MEM, + ifr_width); + return (ret < 0) ? ret : len; +} + +static int atmel_qspi_init(struct atmel_qspi *aq) +{ + unsigned long src_rate; + u32 mr, scr, scbr; + + /* Reset the QSPI controller */ + qspi_writel(aq, QSPI_CR, QSPI_CR_SWRST); + + /* Set the QSPI controller in Serial Memory Mode */ + mr = QSPI_MR_NBBITS(8) | QSPI_MR_SSM; + qspi_writel(aq, QSPI_MR, mr); + + src_rate = clk_get_rate(aq->clk); + if (!src_rate) + return -EINVAL; + + /* Compute the QSPI baudrate */ + scbr = DIV_ROUND_UP(src_rate, aq->clk_rate); + if (scbr > 0) + scbr--; + scr = QSPI_SCR_SCBR(scbr); + qspi_writel(aq, QSPI_SCR, scr); + + /* Enable the QSPI controller */ + qspi_writel(aq, QSPI_CR, QSPI_CR_QSPIEN); + + return 0; +} + +static irqreturn_t atmel_qspi_interrupt(int irq, void *dev_id) +{ + struct atmel_qspi *aq = (struct atmel_qspi *)dev_id; + u32 status, mask, pending; + + status = qspi_readl(aq, QSPI_SR); + mask = qspi_readl(aq, QSPI_IMR); + pending = status & mask; + + if (!pending) + return IRQ_NONE; + + aq->pending |= pending; + if ((aq->pending & QSPI_SR_CMD_COMPLETED) == QSPI_SR_CMD_COMPLETED) + complete(&aq->cmd_completion); + + return IRQ_HANDLED; +} + +static int atmel_qspi_probe(struct platform_device *pdev) +{ + struct device_node *child, *np = pdev->dev.of_node; + struct atmel_qspi *aq; + struct resource *res; + struct spi_nor *nor; + struct mtd_info *mtd; + int irq, err = 0; + + if (of_get_child_count(np) != 1) + return -ENODEV; + child = of_get_next_child(np, NULL); + + aq = devm_kzalloc(&pdev->dev, sizeof(*aq), GFP_KERNEL); + if (!aq) { + err = -ENOMEM; + goto exit; + } + + platform_set_drvdata(pdev, aq); + init_completion(&aq->cmd_completion); + aq->pdev = pdev; + + /* Map the registers */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_base"); + aq->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(aq->regs)) { + dev_err(&pdev->dev, "missing registers\n"); + err = PTR_ERR(aq->regs); + goto exit; + } + + /* Map the AHB memory */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mmap"); + aq->mem = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(aq->mem)) { + dev_err(&pdev->dev, "missing AHB memory\n"); + err = PTR_ERR(aq->mem); + goto exit; + } + + /* Get the peripheral clock */ + aq->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(aq->clk)) { + dev_err(&pdev->dev, "missing peripheral clock\n"); + err = PTR_ERR(aq->clk); + goto exit; + } + + /* Enable the peripheral clock */ + err = clk_prepare_enable(aq->clk); + if (err) { + dev_err(&pdev->dev, "failed to enable the peripheral clock\n"); + goto exit; + } + + /* Request the IRQ */ + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "missing IRQ\n"); + err = irq; + goto disable_clk; + } + err = devm_request_irq(&pdev->dev, irq, atmel_qspi_interrupt, + 0, dev_name(&pdev->dev), aq); + if (err) + goto disable_clk; + + /* Setup the spi-nor */ + nor = &aq->nor; + mtd = &nor->mtd; + + nor->dev = &pdev->dev; + spi_nor_set_flash_node(nor, child); + nor->priv = aq; + mtd->priv = nor; + + nor->read_reg = atmel_qspi_read_reg; + nor->write_reg = atmel_qspi_write_reg; + nor->read = atmel_qspi_read; + nor->write = atmel_qspi_write; + nor->erase = atmel_qspi_erase; + + err = of_property_read_u32(child, "spi-max-frequency", &aq->clk_rate); + if (err < 0) + goto disable_clk; + + err = atmel_qspi_init(aq); + if (err) + goto disable_clk; + + err = spi_nor_scan(nor, NULL, SPI_NOR_QUAD); + if (err) + goto disable_clk; + + err = mtd_device_register(mtd, NULL, 0); + if (err) + goto disable_clk; + + of_node_put(child); + + return 0; + +disable_clk: + clk_disable_unprepare(aq->clk); +exit: + of_node_put(child); + + return err; +} + +static int atmel_qspi_remove(struct platform_device *pdev) +{ + struct atmel_qspi *aq = platform_get_drvdata(pdev); + + mtd_device_unregister(&aq->nor.mtd); + qspi_writel(aq, QSPI_CR, QSPI_CR_QSPIDIS); + clk_disable_unprepare(aq->clk); + return 0; +} + + +static const struct of_device_id atmel_qspi_dt_ids[] = { + { .compatible = "atmel,sama5d2-qspi" }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(of, atmel_qspi_dt_ids); + +static struct platform_driver atmel_qspi_driver = { + .driver = { + .name = "atmel_qspi", + .of_match_table = atmel_qspi_dt_ids, + }, + .probe = atmel_qspi_probe, + .remove = atmel_qspi_remove, +}; +module_platform_driver(atmel_qspi_driver); + +MODULE_AUTHOR("Cyrille Pitchen <cyrille.pitchen@atmel.com>"); +MODULE_DESCRIPTION("Atmel QSPI Controller driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c new file mode 100644 index 000000000000..d403ba7b8f43 --- /dev/null +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -0,0 +1,1299 @@ +/* + * Driver for Cadence QSPI Controller + * + * Copyright Altera Corporation (C) 2012-2014. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/clk.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/spi-nor.h> +#include <linux/of_device.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/sched.h> +#include <linux/spi/spi.h> +#include <linux/timer.h> + +#define CQSPI_NAME "cadence-qspi" +#define CQSPI_MAX_CHIPSELECT 16 + +struct cqspi_st; + +struct cqspi_flash_pdata { + struct spi_nor nor; + struct cqspi_st *cqspi; + u32 clk_rate; + u32 read_delay; + u32 tshsl_ns; + u32 tsd2d_ns; + u32 tchsh_ns; + u32 tslch_ns; + u8 inst_width; + u8 addr_width; + u8 data_width; + u8 cs; + bool registered; +}; + +struct cqspi_st { + struct platform_device *pdev; + + struct clk *clk; + unsigned int sclk; + + void __iomem *iobase; + void __iomem *ahb_base; + struct completion transfer_complete; + struct mutex bus_mutex; + + int current_cs; + int current_page_size; + int current_erase_size; + int current_addr_width; + unsigned long master_ref_clk_hz; + bool is_decoded_cs; + u32 fifo_depth; + u32 fifo_width; + u32 trigger_address; + struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT]; +}; + +/* Operation timeout value */ +#define CQSPI_TIMEOUT_MS 500 +#define CQSPI_READ_TIMEOUT_MS 10 + +/* Instruction type */ +#define CQSPI_INST_TYPE_SINGLE 0 +#define CQSPI_INST_TYPE_DUAL 1 +#define CQSPI_INST_TYPE_QUAD 2 + +#define CQSPI_DUMMY_CLKS_PER_BYTE 8 +#define CQSPI_DUMMY_BYTES_MAX 4 +#define CQSPI_DUMMY_CLKS_MAX 31 + +#define CQSPI_STIG_DATA_LEN_MAX 8 + +/* Register map */ +#define CQSPI_REG_CONFIG 0x00 +#define CQSPI_REG_CONFIG_ENABLE_MASK BIT(0) +#define CQSPI_REG_CONFIG_DECODE_MASK BIT(9) +#define CQSPI_REG_CONFIG_CHIPSELECT_LSB 10 +#define CQSPI_REG_CONFIG_DMA_MASK BIT(15) +#define CQSPI_REG_CONFIG_BAUD_LSB 19 +#define CQSPI_REG_CONFIG_IDLE_LSB 31 +#define CQSPI_REG_CONFIG_CHIPSELECT_MASK 0xF +#define CQSPI_REG_CONFIG_BAUD_MASK 0xF + +#define CQSPI_REG_RD_INSTR 0x04 +#define CQSPI_REG_RD_INSTR_OPCODE_LSB 0 +#define CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB 8 +#define CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB 12 +#define CQSPI_REG_RD_INSTR_TYPE_DATA_LSB 16 +#define CQSPI_REG_RD_INSTR_MODE_EN_LSB 20 +#define CQSPI_REG_RD_INSTR_DUMMY_LSB 24 +#define CQSPI_REG_RD_INSTR_TYPE_INSTR_MASK 0x3 +#define CQSPI_REG_RD_INSTR_TYPE_ADDR_MASK 0x3 +#define CQSPI_REG_RD_INSTR_TYPE_DATA_MASK 0x3 +#define CQSPI_REG_RD_INSTR_DUMMY_MASK 0x1F + +#define CQSPI_REG_WR_INSTR 0x08 +#define CQSPI_REG_WR_INSTR_OPCODE_LSB 0 +#define CQSPI_REG_WR_INSTR_TYPE_ADDR_LSB 12 +#define CQSPI_REG_WR_INSTR_TYPE_DATA_LSB 16 + +#define CQSPI_REG_DELAY 0x0C +#define CQSPI_REG_DELAY_TSLCH_LSB 0 +#define CQSPI_REG_DELAY_TCHSH_LSB 8 +#define CQSPI_REG_DELAY_TSD2D_LSB 16 +#define CQSPI_REG_DELAY_TSHSL_LSB 24 +#define CQSPI_REG_DELAY_TSLCH_MASK 0xFF +#define CQSPI_REG_DELAY_TCHSH_MASK 0xFF +#define CQSPI_REG_DELAY_TSD2D_MASK 0xFF +#define CQSPI_REG_DELAY_TSHSL_MASK 0xFF + +#define CQSPI_REG_READCAPTURE 0x10 +#define CQSPI_REG_READCAPTURE_BYPASS_LSB 0 +#define CQSPI_REG_READCAPTURE_DELAY_LSB 1 +#define CQSPI_REG_READCAPTURE_DELAY_MASK 0xF + +#define CQSPI_REG_SIZE 0x14 +#define CQSPI_REG_SIZE_ADDRESS_LSB 0 +#define CQSPI_REG_SIZE_PAGE_LSB 4 +#define CQSPI_REG_SIZE_BLOCK_LSB 16 +#define CQSPI_REG_SIZE_ADDRESS_MASK 0xF +#define CQSPI_REG_SIZE_PAGE_MASK 0xFFF +#define CQSPI_REG_SIZE_BLOCK_MASK 0x3F + +#define CQSPI_REG_SRAMPARTITION 0x18 +#define CQSPI_REG_INDIRECTTRIGGER 0x1C + +#define CQSPI_REG_DMA 0x20 +#define CQSPI_REG_DMA_SINGLE_LSB 0 +#define CQSPI_REG_DMA_BURST_LSB 8 +#define CQSPI_REG_DMA_SINGLE_MASK 0xFF +#define CQSPI_REG_DMA_BURST_MASK 0xFF + +#define CQSPI_REG_REMAP 0x24 +#define CQSPI_REG_MODE_BIT 0x28 + +#define CQSPI_REG_SDRAMLEVEL 0x2C +#define CQSPI_REG_SDRAMLEVEL_RD_LSB 0 +#define CQSPI_REG_SDRAMLEVEL_WR_LSB 16 +#define CQSPI_REG_SDRAMLEVEL_RD_MASK 0xFFFF +#define CQSPI_REG_SDRAMLEVEL_WR_MASK 0xFFFF + +#define CQSPI_REG_IRQSTATUS 0x40 +#define CQSPI_REG_IRQMASK 0x44 + +#define CQSPI_REG_INDIRECTRD 0x60 +#define CQSPI_REG_INDIRECTRD_START_MASK BIT(0) +#define CQSPI_REG_INDIRECTRD_CANCEL_MASK BIT(1) +#define CQSPI_REG_INDIRECTRD_DONE_MASK BIT(5) + +#define CQSPI_REG_INDIRECTRDWATERMARK 0x64 +#define CQSPI_REG_INDIRECTRDSTARTADDR 0x68 +#define CQSPI_REG_INDIRECTRDBYTES 0x6C + +#define CQSPI_REG_CMDCTRL 0x90 +#define CQSPI_REG_CMDCTRL_EXECUTE_MASK BIT(0) +#define CQSPI_REG_CMDCTRL_INPROGRESS_MASK BIT(1) +#define CQSPI_REG_CMDCTRL_WR_BYTES_LSB 12 +#define CQSPI_REG_CMDCTRL_WR_EN_LSB 15 +#define CQSPI_REG_CMDCTRL_ADD_BYTES_LSB 16 +#define CQSPI_REG_CMDCTRL_ADDR_EN_LSB 19 +#define CQSPI_REG_CMDCTRL_RD_BYTES_LSB 20 +#define CQSPI_REG_CMDCTRL_RD_EN_LSB 23 +#define CQSPI_REG_CMDCTRL_OPCODE_LSB 24 +#define CQSPI_REG_CMDCTRL_WR_BYTES_MASK 0x7 +#define CQSPI_REG_CMDCTRL_ADD_BYTES_MASK 0x3 +#define CQSPI_REG_CMDCTRL_RD_BYTES_MASK 0x7 + +#define CQSPI_REG_INDIRECTWR 0x70 +#define CQSPI_REG_INDIRECTWR_START_MASK BIT(0) +#define CQSPI_REG_INDIRECTWR_CANCEL_MASK BIT(1) +#define CQSPI_REG_INDIRECTWR_DONE_MASK BIT(5) + +#define CQSPI_REG_INDIRECTWRWATERMARK 0x74 +#define CQSPI_REG_INDIRECTWRSTARTADDR 0x78 +#define CQSPI_REG_INDIRECTWRBYTES 0x7C + +#define CQSPI_REG_CMDADDRESS 0x94 +#define CQSPI_REG_CMDREADDATALOWER 0xA0 +#define CQSPI_REG_CMDREADDATAUPPER 0xA4 +#define CQSPI_REG_CMDWRITEDATALOWER 0xA8 +#define CQSPI_REG_CMDWRITEDATAUPPER 0xAC + +/* Interrupt status bits */ +#define CQSPI_REG_IRQ_MODE_ERR BIT(0) +#define CQSPI_REG_IRQ_UNDERFLOW BIT(1) +#define CQSPI_REG_IRQ_IND_COMP BIT(2) +#define CQSPI_REG_IRQ_IND_RD_REJECT BIT(3) +#define CQSPI_REG_IRQ_WR_PROTECTED_ERR BIT(4) +#define CQSPI_REG_IRQ_ILLEGAL_AHB_ERR BIT(5) +#define CQSPI_REG_IRQ_WATERMARK BIT(6) +#define CQSPI_REG_IRQ_IND_SRAM_FULL BIT(12) + +#define CQSPI_IRQ_MASK_RD (CQSPI_REG_IRQ_WATERMARK | \ + CQSPI_REG_IRQ_IND_SRAM_FULL | \ + CQSPI_REG_IRQ_IND_COMP) + +#define CQSPI_IRQ_MASK_WR (CQSPI_REG_IRQ_IND_COMP | \ + CQSPI_REG_IRQ_WATERMARK | \ + CQSPI_REG_IRQ_UNDERFLOW) + +#define CQSPI_IRQ_STATUS_MASK 0x1FFFF + +static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clear) +{ + unsigned long end = jiffies + msecs_to_jiffies(CQSPI_TIMEOUT_MS); + u32 val; + + while (1) { + val = readl(reg); + if (clear) + val = ~val; + val &= mask; + + if (val == mask) + return 0; + + if (time_after(jiffies, end)) + return -ETIMEDOUT; + } +} + +static bool cqspi_is_idle(struct cqspi_st *cqspi) +{ + u32 reg = readl(cqspi->iobase + CQSPI_REG_CONFIG); + + return reg & (1 << CQSPI_REG_CONFIG_IDLE_LSB); +} + +static u32 cqspi_get_rd_sram_level(struct cqspi_st *cqspi) +{ + u32 reg = readl(cqspi->iobase + CQSPI_REG_SDRAMLEVEL); + + reg >>= CQSPI_REG_SDRAMLEVEL_RD_LSB; + return reg & CQSPI_REG_SDRAMLEVEL_RD_MASK; +} + +static irqreturn_t cqspi_irq_handler(int this_irq, void *dev) +{ + struct cqspi_st *cqspi = dev; + unsigned int irq_status; + + /* Read interrupt status */ + irq_status = readl(cqspi->iobase + CQSPI_REG_IRQSTATUS); + + /* Clear interrupt */ + writel(irq_status, cqspi->iobase + CQSPI_REG_IRQSTATUS); + + irq_status &= CQSPI_IRQ_MASK_RD | CQSPI_IRQ_MASK_WR; + + if (irq_status) + complete(&cqspi->transfer_complete); + + return IRQ_HANDLED; +} + +static unsigned int cqspi_calc_rdreg(struct spi_nor *nor, const u8 opcode) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + u32 rdreg = 0; + + rdreg |= f_pdata->inst_width << CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB; + rdreg |= f_pdata->addr_width << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB; + rdreg |= f_pdata->data_width << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB; + + return rdreg; +} + +static int cqspi_wait_idle(struct cqspi_st *cqspi) +{ + const unsigned int poll_idle_retry = 3; + unsigned int count = 0; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(CQSPI_TIMEOUT_MS); + while (1) { + /* + * Read few times in succession to ensure the controller + * is indeed idle, that is, the bit does not transition + * low again. + */ + if (cqspi_is_idle(cqspi)) + count++; + else + count = 0; + + if (count >= poll_idle_retry) + return 0; + + if (time_after(jiffies, timeout)) { + /* Timeout, in busy mode. */ + dev_err(&cqspi->pdev->dev, + "QSPI is still busy after %dms timeout.\n", + CQSPI_TIMEOUT_MS); + return -ETIMEDOUT; + } + + cpu_relax(); + } +} + +static int cqspi_exec_flash_cmd(struct cqspi_st *cqspi, unsigned int reg) +{ + void __iomem *reg_base = cqspi->iobase; + int ret; + + /* Write the CMDCTRL without start execution. */ + writel(reg, reg_base + CQSPI_REG_CMDCTRL); + /* Start execute */ + reg |= CQSPI_REG_CMDCTRL_EXECUTE_MASK; + writel(reg, reg_base + CQSPI_REG_CMDCTRL); + + /* Polling for completion. */ + ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_CMDCTRL, + CQSPI_REG_CMDCTRL_INPROGRESS_MASK, 1); + if (ret) { + dev_err(&cqspi->pdev->dev, + "Flash command execution timed out.\n"); + return ret; + } + + /* Polling QSPI idle status. */ + return cqspi_wait_idle(cqspi); +} + +static int cqspi_command_read(struct spi_nor *nor, + const u8 *txbuf, const unsigned n_tx, + u8 *rxbuf, const unsigned n_rx) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + unsigned int rdreg; + unsigned int reg; + unsigned int read_len; + int status; + + if (!n_rx || n_rx > CQSPI_STIG_DATA_LEN_MAX || !rxbuf) { + dev_err(nor->dev, "Invalid input argument, len %d rxbuf 0x%p\n", + n_rx, rxbuf); + return -EINVAL; + } + + reg = txbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB; + + rdreg = cqspi_calc_rdreg(nor, txbuf[0]); + writel(rdreg, reg_base + CQSPI_REG_RD_INSTR); + + reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB); + + /* 0 means 1 byte. */ + reg |= (((n_rx - 1) & CQSPI_REG_CMDCTRL_RD_BYTES_MASK) + << CQSPI_REG_CMDCTRL_RD_BYTES_LSB); + status = cqspi_exec_flash_cmd(cqspi, reg); + if (status) + return status; + + reg = readl(reg_base + CQSPI_REG_CMDREADDATALOWER); + + /* Put the read value into rx_buf */ + read_len = (n_rx > 4) ? 4 : n_rx; + memcpy(rxbuf, ®, read_len); + rxbuf += read_len; + + if (n_rx > 4) { + reg = readl(reg_base + CQSPI_REG_CMDREADDATAUPPER); + + read_len = n_rx - read_len; + memcpy(rxbuf, ®, read_len); + } + + return 0; +} + +static int cqspi_command_write(struct spi_nor *nor, const u8 opcode, + const u8 *txbuf, const unsigned n_tx) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + unsigned int reg; + unsigned int data; + int ret; + + if (n_tx > 4 || (n_tx && !txbuf)) { + dev_err(nor->dev, + "Invalid input argument, cmdlen %d txbuf 0x%p\n", + n_tx, txbuf); + return -EINVAL; + } + + reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB; + if (n_tx) { + reg |= (0x1 << CQSPI_REG_CMDCTRL_WR_EN_LSB); + reg |= ((n_tx - 1) & CQSPI_REG_CMDCTRL_WR_BYTES_MASK) + << CQSPI_REG_CMDCTRL_WR_BYTES_LSB; + data = 0; + memcpy(&data, txbuf, n_tx); + writel(data, reg_base + CQSPI_REG_CMDWRITEDATALOWER); + } + + ret = cqspi_exec_flash_cmd(cqspi, reg); + return ret; +} + +static int cqspi_command_write_addr(struct spi_nor *nor, + const u8 opcode, const unsigned int addr) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + unsigned int reg; + + reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB; + reg |= (0x1 << CQSPI_REG_CMDCTRL_ADDR_EN_LSB); + reg |= ((nor->addr_width - 1) & CQSPI_REG_CMDCTRL_ADD_BYTES_MASK) + << CQSPI_REG_CMDCTRL_ADD_BYTES_LSB; + + writel(addr, reg_base + CQSPI_REG_CMDADDRESS); + + return cqspi_exec_flash_cmd(cqspi, reg); +} + +static int cqspi_indirect_read_setup(struct spi_nor *nor, + const unsigned int from_addr) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + unsigned int dummy_clk = 0; + unsigned int reg; + + writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR); + + reg = nor->read_opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB; + reg |= cqspi_calc_rdreg(nor, nor->read_opcode); + + /* Setup dummy clock cycles */ + dummy_clk = nor->read_dummy; + if (dummy_clk > CQSPI_DUMMY_CLKS_MAX) + dummy_clk = CQSPI_DUMMY_CLKS_MAX; + + if (dummy_clk / 8) { + reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB); + /* Set mode bits high to ensure chip doesn't enter XIP */ + writel(0xFF, reg_base + CQSPI_REG_MODE_BIT); + + /* Need to subtract the mode byte (8 clocks). */ + if (f_pdata->inst_width != CQSPI_INST_TYPE_QUAD) + dummy_clk -= 8; + + if (dummy_clk) + reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK) + << CQSPI_REG_RD_INSTR_DUMMY_LSB; + } + + writel(reg, reg_base + CQSPI_REG_RD_INSTR); + + /* Set address width */ + reg = readl(reg_base + CQSPI_REG_SIZE); + reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK; + reg |= (nor->addr_width - 1); + writel(reg, reg_base + CQSPI_REG_SIZE); + return 0; +} + +static int cqspi_indirect_read_execute(struct spi_nor *nor, + u8 *rxbuf, const unsigned n_rx) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + void __iomem *ahb_base = cqspi->ahb_base; + unsigned int remaining = n_rx; + unsigned int bytes_to_read = 0; + int ret = 0; + + writel(remaining, reg_base + CQSPI_REG_INDIRECTRDBYTES); + + /* Clear all interrupts. */ + writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS); + + writel(CQSPI_IRQ_MASK_RD, reg_base + CQSPI_REG_IRQMASK); + + reinit_completion(&cqspi->transfer_complete); + writel(CQSPI_REG_INDIRECTRD_START_MASK, + reg_base + CQSPI_REG_INDIRECTRD); + + while (remaining > 0) { + ret = wait_for_completion_timeout(&cqspi->transfer_complete, + msecs_to_jiffies + (CQSPI_READ_TIMEOUT_MS)); + + bytes_to_read = cqspi_get_rd_sram_level(cqspi); + + if (!ret && bytes_to_read == 0) { + dev_err(nor->dev, "Indirect read timeout, no bytes\n"); + ret = -ETIMEDOUT; + goto failrd; + } + + while (bytes_to_read != 0) { + bytes_to_read *= cqspi->fifo_width; + bytes_to_read = bytes_to_read > remaining ? + remaining : bytes_to_read; + readsl(ahb_base, rxbuf, DIV_ROUND_UP(bytes_to_read, 4)); + rxbuf += bytes_to_read; + remaining -= bytes_to_read; + bytes_to_read = cqspi_get_rd_sram_level(cqspi); + } + + if (remaining > 0) + reinit_completion(&cqspi->transfer_complete); + } + + /* Check indirect done status */ + ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_INDIRECTRD, + CQSPI_REG_INDIRECTRD_DONE_MASK, 0); + if (ret) { + dev_err(nor->dev, + "Indirect read completion error (%i)\n", ret); + goto failrd; + } + + /* Disable interrupt */ + writel(0, reg_base + CQSPI_REG_IRQMASK); + + /* Clear indirect completion status */ + writel(CQSPI_REG_INDIRECTRD_DONE_MASK, reg_base + CQSPI_REG_INDIRECTRD); + + return 0; + +failrd: + /* Disable interrupt */ + writel(0, reg_base + CQSPI_REG_IRQMASK); + + /* Cancel the indirect read */ + writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK, + reg_base + CQSPI_REG_INDIRECTRD); + return ret; +} + +static int cqspi_indirect_write_setup(struct spi_nor *nor, + const unsigned int to_addr) +{ + unsigned int reg; + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + + /* Set opcode. */ + reg = nor->program_opcode << CQSPI_REG_WR_INSTR_OPCODE_LSB; + writel(reg, reg_base + CQSPI_REG_WR_INSTR); + reg = cqspi_calc_rdreg(nor, nor->program_opcode); + writel(reg, reg_base + CQSPI_REG_RD_INSTR); + + writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR); + + reg = readl(reg_base + CQSPI_REG_SIZE); + reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK; + reg |= (nor->addr_width - 1); + writel(reg, reg_base + CQSPI_REG_SIZE); + return 0; +} + +static int cqspi_indirect_write_execute(struct spi_nor *nor, + const u8 *txbuf, const unsigned n_tx) +{ + const unsigned int page_size = nor->page_size; + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + unsigned int remaining = n_tx; + unsigned int write_bytes; + int ret; + + writel(remaining, reg_base + CQSPI_REG_INDIRECTWRBYTES); + + /* Clear all interrupts. */ + writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS); + + writel(CQSPI_IRQ_MASK_WR, reg_base + CQSPI_REG_IRQMASK); + + reinit_completion(&cqspi->transfer_complete); + writel(CQSPI_REG_INDIRECTWR_START_MASK, + reg_base + CQSPI_REG_INDIRECTWR); + + while (remaining > 0) { + write_bytes = remaining > page_size ? page_size : remaining; + writesl(cqspi->ahb_base, txbuf, DIV_ROUND_UP(write_bytes, 4)); + + ret = wait_for_completion_timeout(&cqspi->transfer_complete, + msecs_to_jiffies + (CQSPI_TIMEOUT_MS)); + if (!ret) { + dev_err(nor->dev, "Indirect write timeout\n"); + ret = -ETIMEDOUT; + goto failwr; + } + + txbuf += write_bytes; + remaining -= write_bytes; + + if (remaining > 0) + reinit_completion(&cqspi->transfer_complete); + } + + /* Check indirect done status */ + ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_INDIRECTWR, + CQSPI_REG_INDIRECTWR_DONE_MASK, 0); + if (ret) { + dev_err(nor->dev, + "Indirect write completion error (%i)\n", ret); + goto failwr; + } + + /* Disable interrupt. */ + writel(0, reg_base + CQSPI_REG_IRQMASK); + + /* Clear indirect completion status */ + writel(CQSPI_REG_INDIRECTWR_DONE_MASK, reg_base + CQSPI_REG_INDIRECTWR); + + cqspi_wait_idle(cqspi); + + return 0; + +failwr: + /* Disable interrupt. */ + writel(0, reg_base + CQSPI_REG_IRQMASK); + + /* Cancel the indirect write */ + writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK, + reg_base + CQSPI_REG_INDIRECTWR); + return ret; +} + +static void cqspi_chipselect(struct spi_nor *nor) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + unsigned int chip_select = f_pdata->cs; + unsigned int reg; + + reg = readl(reg_base + CQSPI_REG_CONFIG); + if (cqspi->is_decoded_cs) { + reg |= CQSPI_REG_CONFIG_DECODE_MASK; + } else { + reg &= ~CQSPI_REG_CONFIG_DECODE_MASK; + + /* Convert CS if without decoder. + * CS0 to 4b'1110 + * CS1 to 4b'1101 + * CS2 to 4b'1011 + * CS3 to 4b'0111 + */ + chip_select = 0xF & ~(1 << chip_select); + } + + reg &= ~(CQSPI_REG_CONFIG_CHIPSELECT_MASK + << CQSPI_REG_CONFIG_CHIPSELECT_LSB); + reg |= (chip_select & CQSPI_REG_CONFIG_CHIPSELECT_MASK) + << CQSPI_REG_CONFIG_CHIPSELECT_LSB; + writel(reg, reg_base + CQSPI_REG_CONFIG); +} + +static void cqspi_configure_cs_and_sizes(struct spi_nor *nor) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *iobase = cqspi->iobase; + unsigned int reg; + + /* configure page size and block size. */ + reg = readl(iobase + CQSPI_REG_SIZE); + reg &= ~(CQSPI_REG_SIZE_PAGE_MASK << CQSPI_REG_SIZE_PAGE_LSB); + reg &= ~(CQSPI_REG_SIZE_BLOCK_MASK << CQSPI_REG_SIZE_BLOCK_LSB); + reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK; + reg |= (nor->page_size << CQSPI_REG_SIZE_PAGE_LSB); + reg |= (ilog2(nor->mtd.erasesize) << CQSPI_REG_SIZE_BLOCK_LSB); + reg |= (nor->addr_width - 1); + writel(reg, iobase + CQSPI_REG_SIZE); + + /* configure the chip select */ + cqspi_chipselect(nor); + + /* Store the new configuration of the controller */ + cqspi->current_page_size = nor->page_size; + cqspi->current_erase_size = nor->mtd.erasesize; + cqspi->current_addr_width = nor->addr_width; +} + +static unsigned int calculate_ticks_for_ns(const unsigned int ref_clk_hz, + const unsigned int ns_val) +{ + unsigned int ticks; + + ticks = ref_clk_hz / 1000; /* kHz */ + ticks = DIV_ROUND_UP(ticks * ns_val, 1000000); + + return ticks; +} + +static void cqspi_delay(struct spi_nor *nor) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *iobase = cqspi->iobase; + const unsigned int ref_clk_hz = cqspi->master_ref_clk_hz; + unsigned int tshsl, tchsh, tslch, tsd2d; + unsigned int reg; + unsigned int tsclk; + + /* calculate the number of ref ticks for one sclk tick */ + tsclk = DIV_ROUND_UP(ref_clk_hz, cqspi->sclk); + + tshsl = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tshsl_ns); + /* this particular value must be at least one sclk */ + if (tshsl < tsclk) + tshsl = tsclk; + + tchsh = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tchsh_ns); + tslch = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tslch_ns); + tsd2d = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tsd2d_ns); + + reg = (tshsl & CQSPI_REG_DELAY_TSHSL_MASK) + << CQSPI_REG_DELAY_TSHSL_LSB; + reg |= (tchsh & CQSPI_REG_DELAY_TCHSH_MASK) + << CQSPI_REG_DELAY_TCHSH_LSB; + reg |= (tslch & CQSPI_REG_DELAY_TSLCH_MASK) + << CQSPI_REG_DELAY_TSLCH_LSB; + reg |= (tsd2d & CQSPI_REG_DELAY_TSD2D_MASK) + << CQSPI_REG_DELAY_TSD2D_LSB; + writel(reg, iobase + CQSPI_REG_DELAY); +} + +static void cqspi_config_baudrate_div(struct cqspi_st *cqspi) +{ + const unsigned int ref_clk_hz = cqspi->master_ref_clk_hz; + void __iomem *reg_base = cqspi->iobase; + u32 reg, div; + + /* Recalculate the baudrate divisor based on QSPI specification. */ + div = DIV_ROUND_UP(ref_clk_hz, 2 * cqspi->sclk) - 1; + + reg = readl(reg_base + CQSPI_REG_CONFIG); + reg &= ~(CQSPI_REG_CONFIG_BAUD_MASK << CQSPI_REG_CONFIG_BAUD_LSB); + reg |= (div & CQSPI_REG_CONFIG_BAUD_MASK) << CQSPI_REG_CONFIG_BAUD_LSB; + writel(reg, reg_base + CQSPI_REG_CONFIG); +} + +static void cqspi_readdata_capture(struct cqspi_st *cqspi, + const unsigned int bypass, + const unsigned int delay) +{ + void __iomem *reg_base = cqspi->iobase; + unsigned int reg; + + reg = readl(reg_base + CQSPI_REG_READCAPTURE); + + if (bypass) + reg |= (1 << CQSPI_REG_READCAPTURE_BYPASS_LSB); + else + reg &= ~(1 << CQSPI_REG_READCAPTURE_BYPASS_LSB); + + reg &= ~(CQSPI_REG_READCAPTURE_DELAY_MASK + << CQSPI_REG_READCAPTURE_DELAY_LSB); + + reg |= (delay & CQSPI_REG_READCAPTURE_DELAY_MASK) + << CQSPI_REG_READCAPTURE_DELAY_LSB; + + writel(reg, reg_base + CQSPI_REG_READCAPTURE); +} + +static void cqspi_controller_enable(struct cqspi_st *cqspi, bool enable) +{ + void __iomem *reg_base = cqspi->iobase; + unsigned int reg; + + reg = readl(reg_base + CQSPI_REG_CONFIG); + + if (enable) + reg |= CQSPI_REG_CONFIG_ENABLE_MASK; + else + reg &= ~CQSPI_REG_CONFIG_ENABLE_MASK; + + writel(reg, reg_base + CQSPI_REG_CONFIG); +} + +static void cqspi_configure(struct spi_nor *nor) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + const unsigned int sclk = f_pdata->clk_rate; + int switch_cs = (cqspi->current_cs != f_pdata->cs); + int switch_ck = (cqspi->sclk != sclk); + + if ((cqspi->current_page_size != nor->page_size) || + (cqspi->current_erase_size != nor->mtd.erasesize) || + (cqspi->current_addr_width != nor->addr_width)) + switch_cs = 1; + + if (switch_cs || switch_ck) + cqspi_controller_enable(cqspi, 0); + + /* Switch chip select. */ + if (switch_cs) { + cqspi->current_cs = f_pdata->cs; + cqspi_configure_cs_and_sizes(nor); + } + + /* Setup baudrate divisor and delays */ + if (switch_ck) { + cqspi->sclk = sclk; + cqspi_config_baudrate_div(cqspi); + cqspi_delay(nor); + cqspi_readdata_capture(cqspi, 1, f_pdata->read_delay); + } + + if (switch_cs || switch_ck) + cqspi_controller_enable(cqspi, 1); +} + +static int cqspi_set_protocol(struct spi_nor *nor, const int read) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + + f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; + f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; + f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; + + if (read) { + switch (nor->flash_read) { + case SPI_NOR_NORMAL: + case SPI_NOR_FAST: + f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; + break; + case SPI_NOR_DUAL: + f_pdata->data_width = CQSPI_INST_TYPE_DUAL; + break; + case SPI_NOR_QUAD: + f_pdata->data_width = CQSPI_INST_TYPE_QUAD; + break; + default: + return -EINVAL; + } + } + + cqspi_configure(nor); + + return 0; +} + +static ssize_t cqspi_write(struct spi_nor *nor, loff_t to, + size_t len, const u_char *buf) +{ + int ret; + + ret = cqspi_set_protocol(nor, 0); + if (ret) + return ret; + + ret = cqspi_indirect_write_setup(nor, to); + if (ret) + return ret; + + ret = cqspi_indirect_write_execute(nor, buf, len); + if (ret) + return ret; + + return (ret < 0) ? ret : len; +} + +static ssize_t cqspi_read(struct spi_nor *nor, loff_t from, + size_t len, u_char *buf) +{ + int ret; + + ret = cqspi_set_protocol(nor, 1); + if (ret) + return ret; + + ret = cqspi_indirect_read_setup(nor, from); + if (ret) + return ret; + + ret = cqspi_indirect_read_execute(nor, buf, len); + if (ret) + return ret; + + return (ret < 0) ? ret : len; +} + +static int cqspi_erase(struct spi_nor *nor, loff_t offs) +{ + int ret; + + ret = cqspi_set_protocol(nor, 0); + if (ret) + return ret; + + /* Send write enable, then erase commands. */ + ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0); + if (ret) + return ret; + + /* Set up command buffer. */ + ret = cqspi_command_write_addr(nor, nor->erase_opcode, offs); + if (ret) + return ret; + + return 0; +} + +static int cqspi_prep(struct spi_nor *nor, enum spi_nor_ops ops) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + + mutex_lock(&cqspi->bus_mutex); + + return 0; +} + +static void cqspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops) +{ + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; + + mutex_unlock(&cqspi->bus_mutex); +} + +static int cqspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) +{ + int ret; + + ret = cqspi_set_protocol(nor, 0); + if (!ret) + ret = cqspi_command_read(nor, &opcode, 1, buf, len); + + return ret; +} + +static int cqspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) +{ + int ret; + + ret = cqspi_set_protocol(nor, 0); + if (!ret) + ret = cqspi_command_write(nor, opcode, buf, len); + + return ret; +} + +static int cqspi_of_get_flash_pdata(struct platform_device *pdev, + struct cqspi_flash_pdata *f_pdata, + struct device_node *np) +{ + if (of_property_read_u32(np, "cdns,read-delay", &f_pdata->read_delay)) { + dev_err(&pdev->dev, "couldn't determine read-delay\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "cdns,tshsl-ns", &f_pdata->tshsl_ns)) { + dev_err(&pdev->dev, "couldn't determine tshsl-ns\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "cdns,tsd2d-ns", &f_pdata->tsd2d_ns)) { + dev_err(&pdev->dev, "couldn't determine tsd2d-ns\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "cdns,tchsh-ns", &f_pdata->tchsh_ns)) { + dev_err(&pdev->dev, "couldn't determine tchsh-ns\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "cdns,tslch-ns", &f_pdata->tslch_ns)) { + dev_err(&pdev->dev, "couldn't determine tslch-ns\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "spi-max-frequency", &f_pdata->clk_rate)) { + dev_err(&pdev->dev, "couldn't determine spi-max-frequency\n"); + return -ENXIO; + } + + return 0; +} + +static int cqspi_of_get_pdata(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct cqspi_st *cqspi = platform_get_drvdata(pdev); + + cqspi->is_decoded_cs = of_property_read_bool(np, "cdns,is-decoded-cs"); + + if (of_property_read_u32(np, "cdns,fifo-depth", &cqspi->fifo_depth)) { + dev_err(&pdev->dev, "couldn't determine fifo-depth\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "cdns,fifo-width", &cqspi->fifo_width)) { + dev_err(&pdev->dev, "couldn't determine fifo-width\n"); + return -ENXIO; + } + + if (of_property_read_u32(np, "cdns,trigger-address", + &cqspi->trigger_address)) { + dev_err(&pdev->dev, "couldn't determine trigger-address\n"); + return -ENXIO; + } + + return 0; +} + +static void cqspi_controller_init(struct cqspi_st *cqspi) +{ + cqspi_controller_enable(cqspi, 0); + + /* Configure the remap address register, no remap */ + writel(0, cqspi->iobase + CQSPI_REG_REMAP); + + /* Disable all interrupts. */ + writel(0, cqspi->iobase + CQSPI_REG_IRQMASK); + + /* Configure the SRAM split to 1:1 . */ + writel(cqspi->fifo_depth / 2, cqspi->iobase + CQSPI_REG_SRAMPARTITION); + + /* Load indirect trigger address. */ + writel(cqspi->trigger_address, + cqspi->iobase + CQSPI_REG_INDIRECTTRIGGER); + + /* Program read watermark -- 1/2 of the FIFO. */ + writel(cqspi->fifo_depth * cqspi->fifo_width / 2, + cqspi->iobase + CQSPI_REG_INDIRECTRDWATERMARK); + /* Program write watermark -- 1/8 of the FIFO. */ + writel(cqspi->fifo_depth * cqspi->fifo_width / 8, + cqspi->iobase + CQSPI_REG_INDIRECTWRWATERMARK); + + cqspi_controller_enable(cqspi, 1); +} + +static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np) +{ + struct platform_device *pdev = cqspi->pdev; + struct device *dev = &pdev->dev; + struct cqspi_flash_pdata *f_pdata; + struct spi_nor *nor; + struct mtd_info *mtd; + unsigned int cs; + int i, ret; + + /* Get flash device data */ + for_each_available_child_of_node(dev->of_node, np) { + if (of_property_read_u32(np, "reg", &cs)) { + dev_err(dev, "Couldn't determine chip select.\n"); + goto err; + } + + if (cs > CQSPI_MAX_CHIPSELECT) { + dev_err(dev, "Chip select %d out of range.\n", cs); + goto err; + } + + f_pdata = &cqspi->f_pdata[cs]; + f_pdata->cqspi = cqspi; + f_pdata->cs = cs; + + ret = cqspi_of_get_flash_pdata(pdev, f_pdata, np); + if (ret) + goto err; + + nor = &f_pdata->nor; + mtd = &nor->mtd; + + mtd->priv = nor; + + nor->dev = dev; + spi_nor_set_flash_node(nor, np); + nor->priv = f_pdata; + + nor->read_reg = cqspi_read_reg; + nor->write_reg = cqspi_write_reg; + nor->read = cqspi_read; + nor->write = cqspi_write; + nor->erase = cqspi_erase; + nor->prepare = cqspi_prep; + nor->unprepare = cqspi_unprep; + + mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%s.%d", + dev_name(dev), cs); + if (!mtd->name) { + ret = -ENOMEM; + goto err; + } + + ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD); + if (ret) + goto err; + + ret = mtd_device_register(mtd, NULL, 0); + if (ret) + goto err; + + f_pdata->registered = true; + } + + return 0; + +err: + for (i = 0; i < CQSPI_MAX_CHIPSELECT; i++) + if (cqspi->f_pdata[i].registered) + mtd_device_unregister(&cqspi->f_pdata[i].nor.mtd); + return ret; +} + +static int cqspi_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct cqspi_st *cqspi; + struct resource *res; + struct resource *res_ahb; + int ret; + int irq; + + cqspi = devm_kzalloc(dev, sizeof(*cqspi), GFP_KERNEL); + if (!cqspi) + return -ENOMEM; + + mutex_init(&cqspi->bus_mutex); + cqspi->pdev = pdev; + platform_set_drvdata(pdev, cqspi); + + /* Obtain configuration from OF. */ + ret = cqspi_of_get_pdata(pdev); + if (ret) { + dev_err(dev, "Cannot get mandatory OF data.\n"); + return -ENODEV; + } + + /* Obtain QSPI clock. */ + cqspi->clk = devm_clk_get(dev, NULL); + if (IS_ERR(cqspi->clk)) { + dev_err(dev, "Cannot claim QSPI clock.\n"); + return PTR_ERR(cqspi->clk); + } + + /* Obtain and remap controller address. */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + cqspi->iobase = devm_ioremap_resource(dev, res); + if (IS_ERR(cqspi->iobase)) { + dev_err(dev, "Cannot remap controller address.\n"); + return PTR_ERR(cqspi->iobase); + } + + /* Obtain and remap AHB address. */ + res_ahb = platform_get_resource(pdev, IORESOURCE_MEM, 1); + cqspi->ahb_base = devm_ioremap_resource(dev, res_ahb); + if (IS_ERR(cqspi->ahb_base)) { + dev_err(dev, "Cannot remap AHB address.\n"); + return PTR_ERR(cqspi->ahb_base); + } + + init_completion(&cqspi->transfer_complete); + + /* Obtain IRQ line. */ + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "Cannot obtain IRQ.\n"); + return -ENXIO; + } + + ret = clk_prepare_enable(cqspi->clk); + if (ret) { + dev_err(dev, "Cannot enable QSPI clock.\n"); + return ret; + } + + cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk); + + ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0, + pdev->name, cqspi); + if (ret) { + dev_err(dev, "Cannot request IRQ.\n"); + goto probe_irq_failed; + } + + cqspi_wait_idle(cqspi); + cqspi_controller_init(cqspi); + cqspi->current_cs = -1; + cqspi->sclk = 0; + + ret = cqspi_setup_flash(cqspi, np); + if (ret) { + dev_err(dev, "Cadence QSPI NOR probe failed %d\n", ret); + goto probe_setup_failed; + } + + return ret; +probe_irq_failed: + cqspi_controller_enable(cqspi, 0); +probe_setup_failed: + clk_disable_unprepare(cqspi->clk); + return ret; +} + +static int cqspi_remove(struct platform_device *pdev) +{ + struct cqspi_st *cqspi = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < CQSPI_MAX_CHIPSELECT; i++) + if (cqspi->f_pdata[i].registered) + mtd_device_unregister(&cqspi->f_pdata[i].nor.mtd); + + cqspi_controller_enable(cqspi, 0); + + clk_disable_unprepare(cqspi->clk); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cqspi_suspend(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + cqspi_controller_enable(cqspi, 0); + return 0; +} + +static int cqspi_resume(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + cqspi_controller_enable(cqspi, 1); + return 0; +} + +static const struct dev_pm_ops cqspi__dev_pm_ops = { + .suspend = cqspi_suspend, + .resume = cqspi_resume, +}; + +#define CQSPI_DEV_PM_OPS (&cqspi__dev_pm_ops) +#else +#define CQSPI_DEV_PM_OPS NULL +#endif + +static struct of_device_id const cqspi_dt_ids[] = { + {.compatible = "cdns,qspi-nor",}, + { /* end of table */ } +}; + +MODULE_DEVICE_TABLE(of, cqspi_dt_ids); + +static struct platform_driver cqspi_platform_driver = { + .probe = cqspi_probe, + .remove = cqspi_remove, + .driver = { + .name = CQSPI_NAME, + .pm = CQSPI_DEV_PM_OPS, + .of_match_table = cqspi_dt_ids, + }, +}; + +module_platform_driver(cqspi_platform_driver); + +MODULE_DESCRIPTION("Cadence QSPI Controller Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:" CQSPI_NAME); +MODULE_AUTHOR("Ley Foon Tan <lftan@altera.com>"); +MODULE_AUTHOR("Graham Moore <grmoore@opensource.altera.com>"); diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index 9ab2b51d54b8..5c82e4ef1904 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -618,9 +618,9 @@ static inline void fsl_qspi_invalid(struct fsl_qspi *q) qspi_writel(q, reg, q->iobase + QUADSPI_MCR); } -static int fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor, +static ssize_t fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor, u8 opcode, unsigned int to, u32 *txbuf, - unsigned count, size_t *retlen) + unsigned count) { int ret, i, j; u32 tmp; @@ -647,8 +647,8 @@ static int fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor, /* Trigger it */ ret = fsl_qspi_runcmd(q, opcode, to, count); - if (ret == 0 && retlen) - *retlen += count; + if (ret == 0) + return count; return ret; } @@ -859,7 +859,9 @@ static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) } else if (len > 0) { ret = fsl_qspi_nor_write(q, nor, opcode, 0, - (u32 *)buf, len, NULL); + (u32 *)buf, len); + if (ret > 0) + return 0; } else { dev_err(q->dev, "invalid cmd %d\n", opcode); ret = -EINVAL; @@ -868,20 +870,20 @@ static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) return ret; } -static void fsl_qspi_write(struct spi_nor *nor, loff_t to, - size_t len, size_t *retlen, const u_char *buf) +static ssize_t fsl_qspi_write(struct spi_nor *nor, loff_t to, + size_t len, const u_char *buf) { struct fsl_qspi *q = nor->priv; - - fsl_qspi_nor_write(q, nor, nor->program_opcode, to, - (u32 *)buf, len, retlen); + ssize_t ret = fsl_qspi_nor_write(q, nor, nor->program_opcode, to, + (u32 *)buf, len); /* invalid the data in the AHB buffer. */ fsl_qspi_invalid(q); + return ret; } -static int fsl_qspi_read(struct spi_nor *nor, loff_t from, - size_t len, size_t *retlen, u_char *buf) +static ssize_t fsl_qspi_read(struct spi_nor *nor, loff_t from, + size_t len, u_char *buf) { struct fsl_qspi *q = nor->priv; u8 cmd = nor->read_opcode; @@ -923,8 +925,7 @@ static int fsl_qspi_read(struct spi_nor *nor, loff_t from, memcpy(buf, q->ahb_addr + q->chip_base_addr + from - q->memmap_offs, len); - *retlen += len; - return 0; + return len; } static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs) diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c new file mode 100644 index 000000000000..20378b0d55e9 --- /dev/null +++ b/drivers/mtd/spi-nor/hisi-sfc.c @@ -0,0 +1,489 @@ +/* + * HiSilicon SPI Nor Flash Controller Driver + * + * Copyright (c) 2015-2016 HiSilicon Technologies Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/spi-nor.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +/* Hardware register offsets and field definitions */ +#define FMC_CFG 0x00 +#define FMC_CFG_OP_MODE_MASK BIT_MASK(0) +#define FMC_CFG_OP_MODE_BOOT 0 +#define FMC_CFG_OP_MODE_NORMAL 1 +#define FMC_CFG_FLASH_SEL(type) (((type) & 0x3) << 1) +#define FMC_CFG_FLASH_SEL_MASK 0x6 +#define FMC_ECC_TYPE(type) (((type) & 0x7) << 5) +#define FMC_ECC_TYPE_MASK GENMASK(7, 5) +#define SPI_NOR_ADDR_MODE_MASK BIT_MASK(10) +#define SPI_NOR_ADDR_MODE_3BYTES (0x0 << 10) +#define SPI_NOR_ADDR_MODE_4BYTES (0x1 << 10) +#define FMC_GLOBAL_CFG 0x04 +#define FMC_GLOBAL_CFG_WP_ENABLE BIT(6) +#define FMC_SPI_TIMING_CFG 0x08 +#define TIMING_CFG_TCSH(nr) (((nr) & 0xf) << 8) +#define TIMING_CFG_TCSS(nr) (((nr) & 0xf) << 4) +#define TIMING_CFG_TSHSL(nr) ((nr) & 0xf) +#define CS_HOLD_TIME 0x6 +#define CS_SETUP_TIME 0x6 +#define CS_DESELECT_TIME 0xf +#define FMC_INT 0x18 +#define FMC_INT_OP_DONE BIT(0) +#define FMC_INT_CLR 0x20 +#define FMC_CMD 0x24 +#define FMC_CMD_CMD1(cmd) ((cmd) & 0xff) +#define FMC_ADDRL 0x2c +#define FMC_OP_CFG 0x30 +#define OP_CFG_FM_CS(cs) ((cs) << 11) +#define OP_CFG_MEM_IF_TYPE(type) (((type) & 0x7) << 7) +#define OP_CFG_ADDR_NUM(addr) (((addr) & 0x7) << 4) +#define OP_CFG_DUMMY_NUM(dummy) ((dummy) & 0xf) +#define FMC_DATA_NUM 0x38 +#define FMC_DATA_NUM_CNT(cnt) ((cnt) & GENMASK(13, 0)) +#define FMC_OP 0x3c +#define FMC_OP_DUMMY_EN BIT(8) +#define FMC_OP_CMD1_EN BIT(7) +#define FMC_OP_ADDR_EN BIT(6) +#define FMC_OP_WRITE_DATA_EN BIT(5) +#define FMC_OP_READ_DATA_EN BIT(2) +#define FMC_OP_READ_STATUS_EN BIT(1) +#define FMC_OP_REG_OP_START BIT(0) +#define FMC_DMA_LEN 0x40 +#define FMC_DMA_LEN_SET(len) ((len) & GENMASK(27, 0)) +#define FMC_DMA_SADDR_D0 0x4c +#define HIFMC_DMA_MAX_LEN (4096) +#define HIFMC_DMA_MASK (HIFMC_DMA_MAX_LEN - 1) +#define FMC_OP_DMA 0x68 +#define OP_CTRL_RD_OPCODE(code) (((code) & 0xff) << 16) +#define OP_CTRL_WR_OPCODE(code) (((code) & 0xff) << 8) +#define OP_CTRL_RW_OP(op) ((op) << 1) +#define OP_CTRL_DMA_OP_READY BIT(0) +#define FMC_OP_READ 0x0 +#define FMC_OP_WRITE 0x1 +#define FMC_WAIT_TIMEOUT 1000000 + +enum hifmc_iftype { + IF_TYPE_STD, + IF_TYPE_DUAL, + IF_TYPE_DIO, + IF_TYPE_QUAD, + IF_TYPE_QIO, +}; + +struct hifmc_priv { + u32 chipselect; + u32 clkrate; + struct hifmc_host *host; +}; + +#define HIFMC_MAX_CHIP_NUM 2 +struct hifmc_host { + struct device *dev; + struct mutex lock; + + void __iomem *regbase; + void __iomem *iobase; + struct clk *clk; + void *buffer; + dma_addr_t dma_buffer; + + struct spi_nor *nor[HIFMC_MAX_CHIP_NUM]; + u32 num_chip; +}; + +static inline int wait_op_finish(struct hifmc_host *host) +{ + u32 reg; + + return readl_poll_timeout(host->regbase + FMC_INT, reg, + (reg & FMC_INT_OP_DONE), 0, FMC_WAIT_TIMEOUT); +} + +static int get_if_type(enum read_mode flash_read) +{ + enum hifmc_iftype if_type; + + switch (flash_read) { + case SPI_NOR_DUAL: + if_type = IF_TYPE_DUAL; + break; + case SPI_NOR_QUAD: + if_type = IF_TYPE_QUAD; + break; + case SPI_NOR_NORMAL: + case SPI_NOR_FAST: + default: + if_type = IF_TYPE_STD; + break; + } + + return if_type; +} + +static void hisi_spi_nor_init(struct hifmc_host *host) +{ + u32 reg; + + reg = TIMING_CFG_TCSH(CS_HOLD_TIME) + | TIMING_CFG_TCSS(CS_SETUP_TIME) + | TIMING_CFG_TSHSL(CS_DESELECT_TIME); + writel(reg, host->regbase + FMC_SPI_TIMING_CFG); +} + +static int hisi_spi_nor_prep(struct spi_nor *nor, enum spi_nor_ops ops) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + int ret; + + mutex_lock(&host->lock); + + ret = clk_set_rate(host->clk, priv->clkrate); + if (ret) + goto out; + + ret = clk_prepare_enable(host->clk); + if (ret) + goto out; + + return 0; + +out: + mutex_unlock(&host->lock); + return ret; +} + +static void hisi_spi_nor_unprep(struct spi_nor *nor, enum spi_nor_ops ops) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + + clk_disable_unprepare(host->clk); + mutex_unlock(&host->lock); +} + +static int hisi_spi_nor_op_reg(struct spi_nor *nor, + u8 opcode, int len, u8 optype) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + u32 reg; + + reg = FMC_CMD_CMD1(opcode); + writel(reg, host->regbase + FMC_CMD); + + reg = FMC_DATA_NUM_CNT(len); + writel(reg, host->regbase + FMC_DATA_NUM); + + reg = OP_CFG_FM_CS(priv->chipselect); + writel(reg, host->regbase + FMC_OP_CFG); + + writel(0xff, host->regbase + FMC_INT_CLR); + reg = FMC_OP_CMD1_EN | FMC_OP_REG_OP_START | optype; + writel(reg, host->regbase + FMC_OP); + + return wait_op_finish(host); +} + +static int hisi_spi_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, + int len) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + int ret; + + ret = hisi_spi_nor_op_reg(nor, opcode, len, FMC_OP_READ_DATA_EN); + if (ret) + return ret; + + memcpy_fromio(buf, host->iobase, len); + return 0; +} + +static int hisi_spi_nor_write_reg(struct spi_nor *nor, u8 opcode, + u8 *buf, int len) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + + if (len) + memcpy_toio(host->iobase, buf, len); + + return hisi_spi_nor_op_reg(nor, opcode, len, FMC_OP_WRITE_DATA_EN); +} + +static int hisi_spi_nor_dma_transfer(struct spi_nor *nor, loff_t start_off, + dma_addr_t dma_buf, size_t len, u8 op_type) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + u8 if_type = 0; + u32 reg; + + reg = readl(host->regbase + FMC_CFG); + reg &= ~(FMC_CFG_OP_MODE_MASK | SPI_NOR_ADDR_MODE_MASK); + reg |= FMC_CFG_OP_MODE_NORMAL; + reg |= (nor->addr_width == 4) ? SPI_NOR_ADDR_MODE_4BYTES + : SPI_NOR_ADDR_MODE_3BYTES; + writel(reg, host->regbase + FMC_CFG); + + writel(start_off, host->regbase + FMC_ADDRL); + writel(dma_buf, host->regbase + FMC_DMA_SADDR_D0); + writel(FMC_DMA_LEN_SET(len), host->regbase + FMC_DMA_LEN); + + reg = OP_CFG_FM_CS(priv->chipselect); + if_type = get_if_type(nor->flash_read); + reg |= OP_CFG_MEM_IF_TYPE(if_type); + if (op_type == FMC_OP_READ) + reg |= OP_CFG_DUMMY_NUM(nor->read_dummy >> 3); + writel(reg, host->regbase + FMC_OP_CFG); + + writel(0xff, host->regbase + FMC_INT_CLR); + reg = OP_CTRL_RW_OP(op_type) | OP_CTRL_DMA_OP_READY; + reg |= (op_type == FMC_OP_READ) + ? OP_CTRL_RD_OPCODE(nor->read_opcode) + : OP_CTRL_WR_OPCODE(nor->program_opcode); + writel(reg, host->regbase + FMC_OP_DMA); + + return wait_op_finish(host); +} + +static ssize_t hisi_spi_nor_read(struct spi_nor *nor, loff_t from, size_t len, + u_char *read_buf) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + size_t offset; + int ret; + + for (offset = 0; offset < len; offset += HIFMC_DMA_MAX_LEN) { + size_t trans = min_t(size_t, HIFMC_DMA_MAX_LEN, len - offset); + + ret = hisi_spi_nor_dma_transfer(nor, + from + offset, host->dma_buffer, trans, FMC_OP_READ); + if (ret) { + dev_warn(nor->dev, "DMA read timeout\n"); + return ret; + } + memcpy(read_buf + offset, host->buffer, trans); + } + + return len; +} + +static ssize_t hisi_spi_nor_write(struct spi_nor *nor, loff_t to, + size_t len, const u_char *write_buf) +{ + struct hifmc_priv *priv = nor->priv; + struct hifmc_host *host = priv->host; + size_t offset; + int ret; + + for (offset = 0; offset < len; offset += HIFMC_DMA_MAX_LEN) { + size_t trans = min_t(size_t, HIFMC_DMA_MAX_LEN, len - offset); + + memcpy(host->buffer, write_buf + offset, trans); + ret = hisi_spi_nor_dma_transfer(nor, + to + offset, host->dma_buffer, trans, FMC_OP_WRITE); + if (ret) { + dev_warn(nor->dev, "DMA write timeout\n"); + return ret; + } + } + + return len; +} + +/** + * Get spi flash device information and register it as a mtd device. + */ +static int hisi_spi_nor_register(struct device_node *np, + struct hifmc_host *host) +{ + struct device *dev = host->dev; + struct spi_nor *nor; + struct hifmc_priv *priv; + struct mtd_info *mtd; + int ret; + + nor = devm_kzalloc(dev, sizeof(*nor), GFP_KERNEL); + if (!nor) + return -ENOMEM; + + nor->dev = dev; + spi_nor_set_flash_node(nor, np); + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ret = of_property_read_u32(np, "reg", &priv->chipselect); + if (ret) { + dev_err(dev, "There's no reg property for %s\n", + np->full_name); + return ret; + } + + ret = of_property_read_u32(np, "spi-max-frequency", + &priv->clkrate); + if (ret) { + dev_err(dev, "There's no spi-max-frequency property for %s\n", + np->full_name); + return ret; + } + priv->host = host; + nor->priv = priv; + + nor->prepare = hisi_spi_nor_prep; + nor->unprepare = hisi_spi_nor_unprep; + nor->read_reg = hisi_spi_nor_read_reg; + nor->write_reg = hisi_spi_nor_write_reg; + nor->read = hisi_spi_nor_read; + nor->write = hisi_spi_nor_write; + nor->erase = NULL; + ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD); + if (ret) + return ret; + + mtd = &nor->mtd; + mtd->name = np->name; + ret = mtd_device_register(mtd, NULL, 0); + if (ret) + return ret; + + host->nor[host->num_chip] = nor; + host->num_chip++; + return 0; +} + +static void hisi_spi_nor_unregister_all(struct hifmc_host *host) +{ + int i; + + for (i = 0; i < host->num_chip; i++) + mtd_device_unregister(&host->nor[i]->mtd); +} + +static int hisi_spi_nor_register_all(struct hifmc_host *host) +{ + struct device *dev = host->dev; + struct device_node *np; + int ret; + + for_each_available_child_of_node(dev->of_node, np) { + ret = hisi_spi_nor_register(np, host); + if (ret) + goto fail; + + if (host->num_chip == HIFMC_MAX_CHIP_NUM) { + dev_warn(dev, "Flash device number exceeds the maximum chipselect number\n"); + break; + } + } + + return 0; + +fail: + hisi_spi_nor_unregister_all(host); + return ret; +} + +static int hisi_spi_nor_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct hifmc_host *host; + int ret; + + host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL); + if (!host) + return -ENOMEM; + + platform_set_drvdata(pdev, host); + host->dev = dev; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "control"); + host->regbase = devm_ioremap_resource(dev, res); + if (IS_ERR(host->regbase)) + return PTR_ERR(host->regbase); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "memory"); + host->iobase = devm_ioremap_resource(dev, res); + if (IS_ERR(host->iobase)) + return PTR_ERR(host->iobase); + + host->clk = devm_clk_get(dev, NULL); + if (IS_ERR(host->clk)) + return PTR_ERR(host->clk); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_warn(dev, "Unable to set dma mask\n"); + return ret; + } + + host->buffer = dmam_alloc_coherent(dev, HIFMC_DMA_MAX_LEN, + &host->dma_buffer, GFP_KERNEL); + if (!host->buffer) + return -ENOMEM; + + mutex_init(&host->lock); + clk_prepare_enable(host->clk); + hisi_spi_nor_init(host); + ret = hisi_spi_nor_register_all(host); + if (ret) + mutex_destroy(&host->lock); + + clk_disable_unprepare(host->clk); + return ret; +} + +static int hisi_spi_nor_remove(struct platform_device *pdev) +{ + struct hifmc_host *host = platform_get_drvdata(pdev); + + hisi_spi_nor_unregister_all(host); + mutex_destroy(&host->lock); + clk_disable_unprepare(host->clk); + return 0; +} + +static const struct of_device_id hisi_spi_nor_dt_ids[] = { + { .compatible = "hisilicon,fmc-spi-nor"}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, hisi_spi_nor_dt_ids); + +static struct platform_driver hisi_spi_nor_driver = { + .driver = { + .name = "hisi-sfc", + .of_match_table = hisi_spi_nor_dt_ids, + }, + .probe = hisi_spi_nor_probe, + .remove = hisi_spi_nor_remove, +}; +module_platform_driver(hisi_spi_nor_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("HiSilicon SPI Nor Flash Controller Driver"); diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c index 8bed1a4cb79c..e661877c23de 100644 --- a/drivers/mtd/spi-nor/mtk-quadspi.c +++ b/drivers/mtd/spi-nor/mtk-quadspi.c @@ -21,7 +21,6 @@ #include <linux/ioport.h> #include <linux/math64.h> #include <linux/module.h> -#include <linux/mtd/mtd.h> #include <linux/mutex.h> #include <linux/of.h> #include <linux/of_device.h> @@ -243,8 +242,8 @@ static void mt8173_nor_set_addr(struct mt8173_nor *mt8173_nor, u32 addr) writeb(addr & 0xff, mt8173_nor->base + MTK_NOR_RADR3_REG); } -static int mt8173_nor_read(struct spi_nor *nor, loff_t from, size_t length, - size_t *retlen, u_char *buffer) +static ssize_t mt8173_nor_read(struct spi_nor *nor, loff_t from, size_t length, + u_char *buffer) { int i, ret; int addr = (int)from; @@ -255,13 +254,13 @@ static int mt8173_nor_read(struct spi_nor *nor, loff_t from, size_t length, mt8173_nor_set_read_mode(mt8173_nor); mt8173_nor_set_addr(mt8173_nor, addr); - for (i = 0; i < length; i++, (*retlen)++) { + for (i = 0; i < length; i++) { ret = mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_PIO_READ_CMD); if (ret < 0) return ret; buf[i] = readb(mt8173_nor->base + MTK_NOR_RDATA_REG); } - return 0; + return length; } static int mt8173_nor_write_single_byte(struct mt8173_nor *mt8173_nor, @@ -297,36 +296,44 @@ static int mt8173_nor_write_buffer(struct mt8173_nor *mt8173_nor, int addr, return mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_WR_CMD); } -static void mt8173_nor_write(struct spi_nor *nor, loff_t to, size_t len, - size_t *retlen, const u_char *buf) +static ssize_t mt8173_nor_write(struct spi_nor *nor, loff_t to, size_t len, + const u_char *buf) { int ret; struct mt8173_nor *mt8173_nor = nor->priv; + size_t i; ret = mt8173_nor_write_buffer_enable(mt8173_nor); - if (ret < 0) + if (ret < 0) { dev_warn(mt8173_nor->dev, "write buffer enable failed!\n"); + return ret; + } - while (len >= SFLASH_WRBUF_SIZE) { + for (i = 0; i + SFLASH_WRBUF_SIZE <= len; i += SFLASH_WRBUF_SIZE) { ret = mt8173_nor_write_buffer(mt8173_nor, to, buf); - if (ret < 0) + if (ret < 0) { dev_err(mt8173_nor->dev, "write buffer failed!\n"); - len -= SFLASH_WRBUF_SIZE; + return ret; + } to += SFLASH_WRBUF_SIZE; buf += SFLASH_WRBUF_SIZE; - (*retlen) += SFLASH_WRBUF_SIZE; } ret = mt8173_nor_write_buffer_disable(mt8173_nor); - if (ret < 0) + if (ret < 0) { dev_warn(mt8173_nor->dev, "write buffer disable failed!\n"); + return ret; + } - if (len) { - ret = mt8173_nor_write_single_byte(mt8173_nor, to, (int)len, - (u8 *)buf); - if (ret < 0) + if (i < len) { + ret = mt8173_nor_write_single_byte(mt8173_nor, to, + (int)(len - i), (u8 *)buf); + if (ret < 0) { dev_err(mt8173_nor->dev, "write single byte failed!\n"); - (*retlen) += len; + return ret; + } } + + return len; } static int mt8173_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c index ae428cb0e04b..73a14f40928b 100644 --- a/drivers/mtd/spi-nor/nxp-spifi.c +++ b/drivers/mtd/spi-nor/nxp-spifi.c @@ -172,8 +172,8 @@ static int nxp_spifi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) return nxp_spifi_wait_for_cmd(spifi); } -static int nxp_spifi_read(struct spi_nor *nor, loff_t from, size_t len, - size_t *retlen, u_char *buf) +static ssize_t nxp_spifi_read(struct spi_nor *nor, loff_t from, size_t len, + u_char *buf) { struct nxp_spifi *spifi = nor->priv; int ret; @@ -183,24 +183,23 @@ static int nxp_spifi_read(struct spi_nor *nor, loff_t from, size_t len, return ret; memcpy_fromio(buf, spifi->flash_base + from, len); - *retlen += len; - return 0; + return len; } -static void nxp_spifi_write(struct spi_nor *nor, loff_t to, size_t len, - size_t *retlen, const u_char *buf) +static ssize_t nxp_spifi_write(struct spi_nor *nor, loff_t to, size_t len, + const u_char *buf) { struct nxp_spifi *spifi = nor->priv; u32 cmd; int ret; + size_t i; ret = nxp_spifi_set_memory_mode_off(spifi); if (ret) - return; + return ret; writel(to, spifi->io_base + SPIFI_ADDR); - *retlen += len; cmd = SPIFI_CMD_DOUT | SPIFI_CMD_DATALEN(len) | @@ -209,10 +208,14 @@ static void nxp_spifi_write(struct spi_nor *nor, loff_t to, size_t len, SPIFI_CMD_FRAMEFORM(spifi->nor.addr_width + 1); writel(cmd, spifi->io_base + SPIFI_CMD); - while (len--) - writeb(*buf++, spifi->io_base + SPIFI_DATA); + for (i = 0; i < len; i++) + writeb(buf[i], spifi->io_base + SPIFI_DATA); + + ret = nxp_spifi_wait_for_cmd(spifi); + if (ret) + return ret; - nxp_spifi_wait_for_cmd(spifi); + return len; } static int nxp_spifi_erase(struct spi_nor *nor, loff_t offs) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index c52e45594bfd..d0fc165d7d66 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -661,7 +661,7 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len) status_new = (status_old & ~mask & ~SR_TB) | val; /* Don't protect status register if we're fully unlocked */ - if (lock_len == mtd->size) + if (lock_len == 0) status_new &= ~SR_SRWD; if (!use_top) @@ -830,10 +830,26 @@ static const struct flash_info spi_nor_ids[] = { { "mb85rs1mt", INFO(0x047f27, 0, 128 * 1024, 1, SPI_NOR_NO_ERASE) }, /* GigaDevice */ - { "gd25q32", INFO(0xc84016, 0, 64 * 1024, 64, SECT_4K) }, - { "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) }, - { "gd25lq64c", INFO(0xc86017, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, - { "gd25q128", INFO(0xc84018, 0, 64 * 1024, 256, SECT_4K) }, + { + "gd25q32", INFO(0xc84016, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) + }, + { + "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) + }, + { + "gd25lq64c", INFO(0xc86017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) + }, + { + "gd25q128", INFO(0xc84018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) + }, /* Intel/Numonyx -- xxxs33b */ { "160s33b", INFO(0x898911, 0, 64 * 1024, 32, 0) }, @@ -871,6 +887,7 @@ static const struct flash_info spi_nor_ids[] = { { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, { "n25q512ax3", INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, { "n25q00", INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, + { "n25q00a", INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, /* PMC */ { "pm25lv512", INFO(0, 0, 32 * 1024, 2, SECT_4K_PMC) }, @@ -1031,8 +1048,25 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len, if (ret) return ret; - ret = nor->read(nor, from, len, retlen, buf); + while (len) { + ret = nor->read(nor, from, len, buf); + if (ret == 0) { + /* We shouldn't see 0-length reads */ + ret = -EIO; + goto read_err; + } + if (ret < 0) + goto read_err; + + WARN_ON(ret > len); + *retlen += ret; + buf += ret; + from += ret; + len -= ret; + } + ret = 0; +read_err: spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_READ); return ret; } @@ -1060,10 +1094,14 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, nor->program_opcode = SPINOR_OP_BP; /* write one byte. */ - nor->write(nor, to, 1, retlen, buf); + ret = nor->write(nor, to, 1, buf); + if (ret < 0) + goto sst_write_err; + WARN(ret != 1, "While writing 1 byte written %i bytes\n", + (int)ret); ret = spi_nor_wait_till_ready(nor); if (ret) - goto time_out; + goto sst_write_err; } to += actual; @@ -1072,10 +1110,14 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, nor->program_opcode = SPINOR_OP_AAI_WP; /* write two bytes. */ - nor->write(nor, to, 2, retlen, buf + actual); + ret = nor->write(nor, to, 2, buf + actual); + if (ret < 0) + goto sst_write_err; + WARN(ret != 2, "While writing 2 bytes written %i bytes\n", + (int)ret); ret = spi_nor_wait_till_ready(nor); if (ret) - goto time_out; + goto sst_write_err; to += 2; nor->sst_write_second = true; } @@ -1084,21 +1126,26 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, write_disable(nor); ret = spi_nor_wait_till_ready(nor); if (ret) - goto time_out; + goto sst_write_err; /* Write out trailing byte if it exists. */ if (actual != len) { write_enable(nor); nor->program_opcode = SPINOR_OP_BP; - nor->write(nor, to, 1, retlen, buf + actual); - + ret = nor->write(nor, to, 1, buf + actual); + if (ret < 0) + goto sst_write_err; + WARN(ret != 1, "While writing 1 byte written %i bytes\n", + (int)ret); ret = spi_nor_wait_till_ready(nor); if (ret) - goto time_out; + goto sst_write_err; write_disable(nor); + actual += 1; } -time_out: +sst_write_err: + *retlen += actual; spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE); return ret; } @@ -1112,8 +1159,8 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct spi_nor *nor = mtd_to_spi_nor(mtd); - u32 page_offset, page_size, i; - int ret; + size_t page_offset, page_remain, i; + ssize_t ret; dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len); @@ -1121,35 +1168,37 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len, if (ret) return ret; - write_enable(nor); - - page_offset = to & (nor->page_size - 1); + for (i = 0; i < len; ) { + ssize_t written; - /* do all the bytes fit onto one page? */ - if (page_offset + len <= nor->page_size) { - nor->write(nor, to, len, retlen, buf); - } else { + page_offset = (to + i) & (nor->page_size - 1); + WARN_ONCE(page_offset, + "Writing at offset %zu into a NOR page. Writing partial pages may decrease reliability and increase wear of NOR flash.", + page_offset); /* the size of data remaining on the first page */ - page_size = nor->page_size - page_offset; - nor->write(nor, to, page_size, retlen, buf); - - /* write everything in nor->page_size chunks */ - for (i = page_size; i < len; i += page_size) { - page_size = len - i; - if (page_size > nor->page_size) - page_size = nor->page_size; + page_remain = min_t(size_t, + nor->page_size - page_offset, len - i); - ret = spi_nor_wait_till_ready(nor); - if (ret) - goto write_err; - - write_enable(nor); + write_enable(nor); + ret = nor->write(nor, to + i, page_remain, buf + i); + if (ret < 0) + goto write_err; + written = ret; - nor->write(nor, to + i, page_size, retlen, buf + i); + ret = spi_nor_wait_till_ready(nor); + if (ret) + goto write_err; + *retlen += written; + i += written; + if (written != page_remain) { + dev_err(nor->dev, + "While writing %zu bytes written %zd bytes\n", + page_remain, written); + ret = -EIO; + goto write_err; } } - ret = spi_nor_wait_till_ready(nor); write_err: spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE); return ret; diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c index daf82ba7aba0..41b13d1cdcc4 100644 --- a/drivers/mtd/ssfdc.c +++ b/drivers/mtd/ssfdc.c @@ -380,8 +380,7 @@ static int ssfdcr_readsect(struct mtd_blktrans_dev *dev, " block_addr=%d\n", logic_sect_no, sectors_per_block, offset, block_address); - if (block_address >= ssfdc->map_len) - BUG(); + BUG_ON(block_address >= ssfdc->map_len); block_address = ssfdc->logic_block_map[block_address]; diff --git a/drivers/mtd/tests/nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c index 09a4ccac53a2..f26dec896afa 100644 --- a/drivers/mtd/tests/nandbiterrs.c +++ b/drivers/mtd/tests/nandbiterrs.c @@ -290,7 +290,7 @@ static int overwrite_test(void) while (opno < max_overwrite) { - err = rewrite_page(0); + err = write_page(0); if (err) break; diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index c1aaf0336cf2..903becd31410 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -175,6 +175,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec) } /** + * add_fastmap - add a Fastmap related physical eraseblock. + * @ai: attaching information + * @pnum: physical eraseblock number the VID header came from + * @vid_hdr: the volume identifier header + * @ec: erase counter of the physical eraseblock + * + * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp + * physical eraseblock @pnum and adds it to the 'fastmap' list. + * Such blocks can be Fastmap super and data blocks from both the most + * recent Fastmap we're attaching from or from old Fastmaps which will + * be erased. + */ +static int add_fastmap(struct ubi_attach_info *ai, int pnum, + struct ubi_vid_hdr *vid_hdr, int ec) +{ + struct ubi_ainf_peb *aeb; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->vol_id = be32_to_cpu(vidh->vol_id); + aeb->sqnum = be64_to_cpu(vidh->sqnum); + aeb->ec = ec; + list_add(&aeb->u.list, &ai->fastmap); + + dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum, + aeb->vol_id, aeb->sqnum); + + return 0; +} + +/** * validate_vid_hdr - check volume identifier header. * @ubi: UBI device description object * @vid_hdr: the volume identifier header to check @@ -803,13 +837,26 @@ out_unlock: return err; } +static bool vol_ignored(int vol_id) +{ + switch (vol_id) { + case UBI_LAYOUT_VOLUME_ID: + return true; + } + +#ifdef CONFIG_MTD_UBI_FASTMAP + return ubi_is_fm_vol(vol_id); +#else + return false; +#endif +} + /** * scan_peb - scan and process UBI headers of a PEB. * @ubi: UBI device description object * @ai: attaching information * @pnum: the physical eraseblock number - * @vid: The volume ID of the found volume will be stored in this pointer - * @sqnum: The sqnum of the found volume will be stored in this pointer + * @fast: true if we're scanning for a Fastmap * * This function reads UBI headers of PEB @pnum, checks them, and adds * information about this PEB to the corresponding list or RB-tree in the @@ -817,9 +864,9 @@ out_unlock: * successfully handled and a negative error code in case of failure. */ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, - int pnum, int *vid, unsigned long long *sqnum) + int pnum, bool fast) { - long long uninitialized_var(ec); + long long ec; int err, bitflips = 0, vol_id = -1, ec_err = 0; dbg_bld("scan PEB %d", pnum); @@ -935,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, */ ai->maybe_bad_peb_count += 1; case UBI_IO_BAD_HDR: + /* + * If we're facing a bad VID header we have to drop *all* + * Fastmap data structures we find. The most recent Fastmap + * could be bad and therefore there is a chance that we attach + * from an old one. On a fine MTD stack a PEB must not render + * bad all of a sudden, but the reality is different. + * So, let's be paranoid and help finding the root cause by + * falling back to scanning mode instead of attaching with a + * bad EBA table and cause data corruption which is hard to + * analyze. + */ + if (fast) + ai->force_full_scan = 1; + if (ec_err) /* * Both headers are corrupted. There is a possibility @@ -991,21 +1052,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, } vol_id = be32_to_cpu(vidh->vol_id); - if (vid) - *vid = vol_id; - if (sqnum) - *sqnum = be64_to_cpu(vidh->sqnum); - if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) { int lnum = be32_to_cpu(vidh->lnum); /* Unsupported internal volume */ switch (vidh->compat) { case UBI_COMPAT_DELETE: - if (vol_id != UBI_FM_SB_VOLUME_ID - && vol_id != UBI_FM_DATA_VOLUME_ID) { - ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", - vol_id, lnum); - } + ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", + vol_id, lnum); + err = add_to_list(ai, pnum, vol_id, lnum, ec, 1, &ai->erase); if (err) @@ -1037,7 +1092,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, if (ec_err) ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d", pnum); - err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + + if (ubi_is_fm_vol(vol_id)) + err = add_fastmap(ai, pnum, vidh, ec); + else + err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + if (err) return err; @@ -1186,6 +1246,10 @@ static void destroy_ai(struct ubi_attach_info *ai) list_del(&aeb->u.list); kmem_cache_free(ai->aeb_slab_cache, aeb); } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } /* Destroy the volume RB-tree */ rb = ai->volumes.rb_node; @@ -1245,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, ai, pnum, NULL, NULL); + err = scan_peb(ubi, ai, pnum, false); if (err < 0) goto out_vidh; } @@ -1311,6 +1375,7 @@ static struct ubi_attach_info *alloc_ai(void) INIT_LIST_HEAD(&ai->free); INIT_LIST_HEAD(&ai->erase); INIT_LIST_HEAD(&ai->alien); + INIT_LIST_HEAD(&ai->fastmap); ai->volumes = RB_ROOT; ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", sizeof(struct ubi_ainf_peb), @@ -1326,7 +1391,7 @@ static struct ubi_attach_info *alloc_ai(void) #ifdef CONFIG_MTD_UBI_FASTMAP /** - * scan_fastmap - try to find a fastmap and attach from it. + * scan_fast - try to find a fastmap and attach from it. * @ubi: UBI device description object * @ai: attach info object * @@ -1337,52 +1402,58 @@ static struct ubi_attach_info *alloc_ai(void) */ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) { - int err, pnum, fm_anchor = -1; - unsigned long long max_sqnum = 0; + int err, pnum; + struct ubi_attach_info *scan_ai; err = -ENOMEM; + scan_ai = alloc_ai(); + if (!scan_ai) + goto out; + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); if (!ech) - goto out; + goto out_ai; vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vidh) goto out_ech; for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) { - int vol_id = -1; - unsigned long long sqnum = -1; cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum); + err = scan_peb(ubi, scan_ai, pnum, true); if (err < 0) goto out_vidh; - - if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) { - max_sqnum = sqnum; - fm_anchor = pnum; - } } ubi_free_vid_hdr(ubi, vidh); kfree(ech); - if (fm_anchor < 0) - return UBI_NO_FASTMAP; + if (scan_ai->force_full_scan) + err = UBI_NO_FASTMAP; + else + err = ubi_scan_fastmap(ubi, *ai, scan_ai); - destroy_ai(*ai); - *ai = alloc_ai(); - if (!*ai) - return -ENOMEM; + if (err) { + /* + * Didn't attach via fastmap, do a full scan but reuse what + * we've aready scanned. + */ + destroy_ai(*ai); + *ai = scan_ai; + } else + destroy_ai(scan_ai); - return ubi_scan_fastmap(ubi, *ai, fm_anchor); + return err; out_vidh: ubi_free_vid_hdr(ubi, vidh); out_ech: kfree(ech); +out_ai: + destroy_ai(scan_ai); out: return err; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index ef3618299494..0680516bb472 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -874,7 +874,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, for (i = 0; i < UBI_MAX_DEVICES; i++) { ubi = ubi_devices[i]; if (ubi && mtd->index == ubi->mtd->index) { - ubi_err(ubi, "mtd%d is already attached to ubi%d", + pr_err("ubi: mtd%d is already attached to ubi%d", mtd->index, i); return -EEXIST; } @@ -889,7 +889,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, * no sense to attach emulated MTD devices, so we prohibit this. */ if (mtd->type == MTD_UBIVOLUME) { - ubi_err(ubi, "refuse attaching mtd%d - it is already emulated on top of UBI", + pr_err("ubi: refuse attaching mtd%d - it is already emulated on top of UBI", mtd->index); return -EINVAL; } @@ -900,7 +900,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, if (!ubi_devices[ubi_num]) break; if (ubi_num == UBI_MAX_DEVICES) { - ubi_err(ubi, "only %d UBI devices may be created", + pr_err("ubi: only %d UBI devices may be created", UBI_MAX_DEVICES); return -ENFILE; } @@ -910,7 +910,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, /* Make sure ubi_num is not busy */ if (ubi_devices[ubi_num]) { - ubi_err(ubi, "already exists"); + pr_err("ubi: ubi%i already exists", ubi_num); return -EEXIST; } } @@ -992,6 +992,9 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, goto out_detach; } + /* Make device "available" before it becomes accessible via sysfs */ + ubi_devices[ubi_num] = ubi; + err = uif_init(ubi, &ref); if (err) goto out_detach; @@ -1036,7 +1039,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); - ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; @@ -1047,6 +1049,7 @@ out_uif: ubi_assert(ref); uif_close(ubi); out_detach: + ubi_devices[ubi_num] = NULL; ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 990898b9dc72..48eb55f344eb 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -15,20 +15,22 @@ */ #include <linux/crc32.h> +#include <linux/bitmap.h> #include "ubi.h" /** * init_seen - allocate memory for used for debugging. * @ubi: UBI device description object */ -static inline int *init_seen(struct ubi_device *ubi) +static inline unsigned long *init_seen(struct ubi_device *ubi) { - int *ret; + unsigned long *ret; if (!ubi_dbg_chk_fastmap(ubi)) return NULL; - ret = kcalloc(ubi->peb_count, sizeof(int), GFP_KERNEL); + ret = kcalloc(BITS_TO_LONGS(ubi->peb_count), sizeof(unsigned long), + GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); @@ -39,7 +41,7 @@ static inline int *init_seen(struct ubi_device *ubi) * free_seen - free the seen logic integer array. * @seen: integer array of @ubi->peb_count size */ -static inline void free_seen(int *seen) +static inline void free_seen(unsigned long *seen) { kfree(seen); } @@ -50,12 +52,12 @@ static inline void free_seen(int *seen) * @pnum: The PEB to be makred as seen * @seen: integer array of @ubi->peb_count size */ -static inline void set_seen(struct ubi_device *ubi, int pnum, int *seen) +static inline void set_seen(struct ubi_device *ubi, int pnum, unsigned long *seen) { if (!ubi_dbg_chk_fastmap(ubi) || !seen) return; - seen[pnum] = 1; + set_bit(pnum, seen); } /** @@ -63,7 +65,7 @@ static inline void set_seen(struct ubi_device *ubi, int pnum, int *seen) * @ubi: UBI device description object * @seen: integer array of @ubi->peb_count size */ -static int self_check_seen(struct ubi_device *ubi, int *seen) +static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) { int pnum, ret = 0; @@ -71,7 +73,7 @@ static int self_check_seen(struct ubi_device *ubi, int *seen) return 0; for (pnum = 0; pnum < ubi->peb_count; pnum++) { - if (!seen[pnum] && ubi->lookuptbl[pnum]) { + if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); ret = -EINVAL; } @@ -578,7 +580,7 @@ static int count_fastmap_pebs(struct ubi_attach_info *ai) list_for_each_entry(aeb, &ai->free, u.list) n++; - ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) n++; @@ -850,27 +852,57 @@ fail: } /** + * find_fm_anchor - find the most recent Fastmap superblock (anchor) + * @ai: UBI attach info to be filled + */ +static int find_fm_anchor(struct ubi_attach_info *ai) +{ + int ret = -1; + struct ubi_ainf_peb *aeb; + unsigned long long max_sqnum = 0; + + list_for_each_entry(aeb, &ai->fastmap, u.list) { + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) { + max_sqnum = aeb->sqnum; + ret = aeb->pnum; + } + } + + return ret; +} + +/** * ubi_scan_fastmap - scan the fastmap. * @ubi: UBI device object * @ai: UBI attach info to be filled - * @fm_anchor: The fastmap starts at this PEB + * @scan_ai: UBI attach info from the first 64 PEBs, + * used to find the most recent Fastmap data structure * * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found, * UBI_BAD_FASTMAP if one was found but is not usable. * < 0 indicates an internal error. */ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, - int fm_anchor) + struct ubi_attach_info *scan_ai) { struct ubi_fm_sb *fmsb, *fmsb2; struct ubi_vid_hdr *vh; struct ubi_ec_hdr *ech; struct ubi_fastmap_layout *fm; - int i, used_blocks, pnum, ret = 0; + struct ubi_ainf_peb *tmp_aeb, *aeb; + int i, used_blocks, pnum, fm_anchor, ret = 0; size_t fm_size; __be32 crc, tmp_crc; unsigned long long sqnum = 0; + fm_anchor = find_fm_anchor(scan_ai); + if (fm_anchor < 0) + return UBI_NO_FASTMAP; + + /* Move all (possible) fastmap blocks into our new attach structure. */ + list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list) + list_move_tail(&aeb->u.list, &ai->fastmap); + down_write(&ubi->fm_protect); memset(ubi->fm_buf, 0, ubi->fm_size); @@ -945,6 +977,13 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, goto free_hdr; } + if (i == 0 && pnum != fm_anchor) { + ubi_err(ubi, "Fastmap anchor PEB mismatch: PEB: %i vs. %i", + pnum, fm_anchor); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + ret = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); if (ret && ret != UBI_IO_BITFLIPS) { ubi_err(ubi, "unable to read fastmap block# %i EC (PEB: %i)", @@ -1102,7 +1141,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, struct rb_node *tmp_rb; int ret, i, j, free_peb_count, used_peb_count, vol_count; int scrub_peb_count, erase_peb_count; - int *seen_pebs = NULL; + unsigned long *seen_pebs = NULL; fm_raw = ubi->fm_buf; memset(ubi->fm_buf, 0, ubi->fm_size); diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c index cb7c075f2144..1cb287ec32ad 100644 --- a/drivers/mtd/ubi/gluebi.c +++ b/drivers/mtd/ubi/gluebi.c @@ -99,9 +99,6 @@ static int gluebi_get_device(struct mtd_info *mtd) struct gluebi_device *gluebi; int ubi_mode = UBI_READONLY; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - if (mtd->flags & MTD_WRITEABLE) ubi_mode = UBI_READWRITE; @@ -129,7 +126,6 @@ static int gluebi_get_device(struct mtd_info *mtd) ubi_mode); if (IS_ERR(gluebi->desc)) { mutex_unlock(&devices_mutex); - module_put(THIS_MODULE); return PTR_ERR(gluebi->desc); } gluebi->refcnt += 1; @@ -153,7 +149,6 @@ static void gluebi_put_device(struct mtd_info *mtd) gluebi->refcnt -= 1; if (gluebi->refcnt == 0) ubi_close_volume(gluebi->desc); - module_put(THIS_MODULE); mutex_unlock(&devices_mutex); } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 10cf3b549959..ff8cafe1e5cd 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -1019,7 +1019,7 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, p = (char *)vid_hdr - ubi->vid_hdr_shift; read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, - ubi->vid_hdr_alsize); + ubi->vid_hdr_shift + UBI_VID_HDR_SIZE); if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) return read_err; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 61d4e99755a4..b616a115c9d3 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -703,6 +703,8 @@ struct ubi_ainf_volume { * @erase: list of physical eraseblocks which have to be erased * @alien: list of physical eraseblocks which should not be used by UBI (e.g., * those belonging to "preserve"-compatible internal volumes) + * @fastmap: list of physical eraseblocks which relate to fastmap (e.g., + * eraseblocks of the current and not yet erased old fastmap blocks) * @corr_peb_count: count of PEBs in the @corr list * @empty_peb_count: count of PEBs which are presumably empty (contain only * 0xFF bytes) @@ -713,6 +715,8 @@ struct ubi_ainf_volume { * @vols_found: number of volumes found * @highest_vol_id: highest volume ID * @is_empty: flag indicating whether the MTD device is empty or not + * @force_full_scan: flag indicating whether we need to do a full scan and drop + all existing Fastmap data structures * @min_ec: lowest erase counter value * @max_ec: highest erase counter value * @max_sqnum: highest sequence number value @@ -731,6 +735,7 @@ struct ubi_attach_info { struct list_head free; struct list_head erase; struct list_head alien; + struct list_head fastmap; int corr_peb_count; int empty_peb_count; int alien_peb_count; @@ -739,6 +744,7 @@ struct ubi_attach_info { int vols_found; int highest_vol_id; int is_empty; + int force_full_scan; int min_ec; int max_ec; unsigned long long max_sqnum; @@ -911,7 +917,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, size_t ubi_calc_fm_size(struct ubi_device *ubi); int ubi_update_fastmap(struct ubi_device *ubi); int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, - int fm_anchor); + struct ubi_attach_info *scan_ai); #else static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } #endif @@ -1105,4 +1111,42 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx) return idx; } +/** + * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume. + * @vol_id: volume ID + */ +static inline bool ubi_is_fm_vol(int vol_id) +{ + switch (vol_id) { + case UBI_FM_SB_VOLUME_ID: + case UBI_FM_DATA_VOLUME_ID: + return true; + } + + return false; +} + +/** + * ubi_find_fm_block - check whether a PEB is part of the current Fastmap. + * @ubi: UBI device description object + * @pnum: physical eraseblock to look for + * + * This function returns a wear leveling object if @pnum relates to the current + * fastmap, @NULL otherwise. + */ +static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi, + int pnum) +{ + int i; + + if (ubi->fm) { + for (i = 0; i < ubi->fm->used_blocks; i++) { + if (ubi->fm->e[i]->pnum == pnum) + return ubi->fm->e[i]; + } + } + + return NULL; +} + #endif /* !__UBI_UBI_H__ */ diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 10059dfdc1b6..0138f526474a 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -488,13 +488,6 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_unlock(&ubi->volumes_lock); } - /* Change volume table record */ - vtbl_rec = ubi->vtbl[vol_id]; - vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); - err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); - if (err) - goto out_acc; - if (pebs < 0) { for (i = 0; i < -pebs; i++) { err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); @@ -512,6 +505,24 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_unlock(&ubi->volumes_lock); } + /* + * When we shrink a volume we have to flush all pending (erase) work. + * Otherwise it can happen that upon next attach UBI finds a LEB with + * lnum > highest_lnum and refuses to attach. + */ + if (pebs < 0) { + err = ubi_wl_flush(ubi, vol_id, UBI_ALL); + if (err) + goto out_acc; + } + + /* Change volume table record */ + vtbl_rec = ubi->vtbl[vol_id]; + vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_acc; + vol->reserved_pebs = reserved_pebs; if (vol->vol_type == UBI_DYNAMIC_VOLUME) { vol->used_ebs = reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 959c7b12e0b1..f4533266d7b2 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1598,19 +1598,44 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) } } - dbg_wl("found %i PEBs", found_pebs); + list_for_each_entry(aeb, &ai->fastmap, u.list) { + cond_resched(); + + e = ubi_find_fm_block(ubi, aeb->pnum); - if (ubi->fm) { - ubi_assert(ubi->good_peb_count == - found_pebs + ubi->fm->used_blocks); + if (e) { + ubi_assert(!ubi->lookuptbl[e->pnum]); + ubi->lookuptbl[e->pnum] = e; + } else { + /* + * Usually old Fastmap PEBs are scheduled for erasure + * and we don't have to care about them but if we face + * an power cut before scheduling them we need to + * take care of them here. + */ + if (ubi->lookuptbl[aeb->pnum]) + continue; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; - for (i = 0; i < ubi->fm->used_blocks; i++) { - e = ubi->fm->e[i]; + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { + wl_entry_destroy(ubi, e); + goto out_free; + } } + + found_pebs++; } - else - ubi_assert(ubi->good_peb_count == found_pebs); + + dbg_wl("found %i PEBs", found_pebs); + + ubi_assert(ubi->good_peb_count == found_pebs); reserved_pebs = WL_RESERVED_PEBS; ubi_fastmap_init(ubi, &reserved_pebs); diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 547098086773..f81df91a9ce1 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -52,5 +52,5 @@ config CAIF_VIRTIO The caif driver for CAIF over Virtio. if CAIF_VIRTIO -source "drivers/vhost/Kconfig" +source "drivers/vhost/Kconfig.vringh" endif diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index 4721948a92f6..3a529fbe539f 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -185,8 +185,8 @@ static ssize_t print_frame(char *buf, size_t size, char *frm, /* Fast forward. */ i = count - cut; len += snprintf((buf + len), (size - len), - "--- %u bytes skipped ---\n", - (int)(count - (cut * 2))); + "--- %zu bytes skipped ---\n", + count - (cut * 2)); } if ((!(i % 10)) && i) { diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 21f1068b0804..77ffc4312808 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -233,8 +233,7 @@ static int b53_mmap_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; - if (pdata) - dev->pdata = pdata; + dev->pdata = pdata; platform_set_drvdata(pdev, dev); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cd1d630ae3a9..b2b838724a9b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1622,7 +1622,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) "switch_0", priv); if (ret < 0) { pr_err("failed to request switch_0 IRQ\n"); - goto out_unmap; + goto out_mdio; } ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0, @@ -1679,6 +1679,8 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) out_free_irq0: free_irq(priv->irq0, priv); +out_mdio: + bcm_sf2_mdio_unregister(priv); out_unmap: base = &priv->core; for (i = 0; i < BCM_SF2_REGS_NUM; i++) { @@ -1686,7 +1688,6 @@ out_unmap: iounmap(*base); base++; } - bcm_sf2_mdio_unregister(priv); return ret; } diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 5698f5354c0b..39ca9350d1b2 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -910,7 +910,8 @@ static int ax_probe(struct platform_device *pdev) iounmap(ax->map2); exit_mem2: - release_mem_region(mem2->start, mem2_size); + if (mem2) + release_mem_region(mem2->start, mem2_size); exit_mem1: iounmap(ei_local->mem); diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 49025e99fb0e..bda31f308cc2 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -815,6 +815,7 @@ static int init_phy(struct net_device *dev) phydev = of_phy_connect(dev, phynode, &altera_tse_adjust_link, 0, priv->phy_iface); } + of_node_put(phynode); if (!phydev) { netdev_err(dev, "Could not find the PHY\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index ebf9224b2d31..a9b2709567ec 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -154,7 +154,7 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) goto err_rx_ring; for (i = 0, channel = channel_mem; i < count; i++, channel++) { - snprintf(channel->name, sizeof(channel->name), "channel-%d", i); + snprintf(channel->name, sizeof(channel->name), "channel-%u", i); channel->pdata = pdata; channel->queue_index = i; channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE + diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 7714b7d4026a..37a0f463b8de 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -772,6 +772,7 @@ int xgene_enet_phy_connect(struct net_device *ndev) phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link, 0, pdata->phy_mode); + of_node_put(np); if (!phy_dev) { netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 586bedac457d..4bff0f3040df 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -749,14 +749,16 @@ int arc_emac_probe(struct net_device *ndev, int interface) err = of_address_to_resource(dev->of_node, 0, &res_regs); if (err) { dev_err(dev, "failed to retrieve registers base from device tree\n"); - return -ENODEV; + err = -ENODEV; + goto out_put_node; } /* Get IRQ from device tree */ irq = irq_of_parse_and_map(dev->of_node, 0); if (!irq) { dev_err(dev, "failed to retrieve <irq> value from device tree\n"); - return -ENODEV; + err = -ENODEV; + goto out_put_node; } ndev->netdev_ops = &arc_emac_netdev_ops; @@ -778,7 +780,7 @@ int arc_emac_probe(struct net_device *ndev, int interface) err = clk_prepare_enable(priv->clk); if (err) { dev_err(dev, "failed to enable clock\n"); - return err; + goto out_put_node; } clock_frequency = clk_get_rate(priv->clk); @@ -787,7 +789,8 @@ int arc_emac_probe(struct net_device *ndev, int interface) if (of_property_read_u32(dev->of_node, "clock-frequency", &clock_frequency)) { dev_err(dev, "failed to retrieve <clock-frequency> from device tree\n"); - return -EINVAL; + err = -EINVAL; + goto out_put_node; } } @@ -867,6 +870,7 @@ int arc_emac_probe(struct net_device *ndev, int interface) goto out_netif_api; } + of_node_put(phy_node); return 0; out_netif_api: @@ -877,6 +881,9 @@ out_mdio: out_clken: if (priv->clk) clk_disable_unprepare(priv->clk); +out_put_node: + of_node_put(phy_node); + return err; } EXPORT_SYMBOL_GPL(arc_emac_probe); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index e708e360a9e3..6453148d066a 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1251,7 +1251,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct alx_priv *alx; struct alx_hw *hw; bool phy_configured; - int bars, err; + int err; err = pci_enable_device_mem(pdev); if (err) @@ -1271,11 +1271,10 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - bars = pci_select_bars(pdev, IORESOURCE_MEM); - err = pci_request_selected_regions(pdev, bars, alx_drv_name); + err = pci_request_mem_regions(pdev, alx_drv_name); if (err) { dev_err(&pdev->dev, - "pci_request_selected_regions failed(bars:%d)\n", bars); + "pci_request_mem_regions failed\n"); goto out_pci_disable; } @@ -1401,7 +1400,7 @@ out_unmap: out_free_netdev: free_netdev(netdev); out_pci_release: - pci_release_selected_regions(pdev, bars); + pci_release_mem_regions(pdev); out_pci_disable: pci_disable_device(pdev); return err; @@ -1420,8 +1419,7 @@ static void alx_remove(struct pci_dev *pdev) unregister_netdev(alx->dev); iounmap(hw->hw_addr); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c index 0d4ea92a0d37..b047fd607b83 100644 --- a/drivers/net/ethernet/aurora/nb8800.c +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -1504,6 +1504,7 @@ static int nb8800_probe(struct platform_device *pdev) err_free_dma: nb8800_dma_free(dev); err_free_bus: + of_node_put(priv->phy_node); mdiobus_unregister(bus); err_disable_clk: clk_disable_unprepare(priv->clk); @@ -1519,6 +1520,7 @@ static int nb8800_remove(struct platform_device *pdev) struct nb8800_priv *priv = netdev_priv(ndev); unregister_netdev(ndev); + of_node_put(priv->phy_node); mdiobus_unregister(priv->mii_bus); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 87c6b5bdd616..6c8bc5fadac7 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1859,7 +1859,7 @@ static int bcm_enet_probe(struct platform_device *pdev) } else { /* run platform code to initialize PHY device */ - if (pd->mii_config && + if (pd && pd->mii_config && pd->mii_config(dev, 1, bcm_enet_mdio_read_mii, bcm_enet_mdio_write_mii)) { dev_err(&pdev->dev, "unable to configure mdio bus\n"); diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 8fc246ea1fb8..05c1c1dd7751 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -312,7 +312,8 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, struct bnad_debug_info *regrd_debug = file->private_data; struct bnad *bnad = (struct bnad *)regrd_debug->i_private; struct bfa_ioc *ioc = &bnad->bna.ioceth.ioc; - int addr, len, rc, i; + int rc, i; + u32 addr, len; u32 *regbuf; void __iomem *rb, *reg_addr; unsigned long flags; @@ -372,7 +373,8 @@ bnad_debugfs_write_regwr(struct file *file, const char __user *buf, struct bnad_debug_info *debug = file->private_data; struct bnad *bnad = (struct bnad *)debug->i_private; struct bfa_ioc *ioc = &bnad->bna.ioceth.ioc; - int addr, val, rc; + int rc; + u32 addr, val; void __iomem *reg_addr; unsigned long flags; void *kern_buf; diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index e8bc15bcde70..4ab404f45b21 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1513,6 +1513,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev) return 0; err: + of_node_put(p->phy_np); free_netdev(netdev); return result; } @@ -1520,8 +1521,10 @@ err: static int octeon_mgmt_remove(struct platform_device *pdev) { struct net_device *netdev = platform_get_drvdata(pdev); + struct octeon_mgmt *p = netdev_priv(netdev); unregister_netdev(netdev); + of_node_put(p->phy_np); free_netdev(netdev); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index bad253beb8c8..ad3552df0545 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1192,7 +1192,7 @@ out_free: dev_kfree_skb_any(skb); /* Discard the packet if the length is greater than mtu */ max_pkt_len = ETH_HLEN + dev->mtu; - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tagged(skb)) max_pkt_len += VLAN_HLEN; if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) goto out_free; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 4705e2dea423..e0ebe1378cb2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -104,6 +104,8 @@ enum { enum CPL_error { CPL_ERR_NONE = 0, + CPL_ERR_TCAM_PARITY = 1, + CPL_ERR_TCAM_MISS = 2, CPL_ERR_TCAM_FULL = 3, CPL_ERR_BAD_LENGTH = 15, CPL_ERR_BAD_ROUTE = 18, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 1bb57d3fbbe8..c8fd4f8fe1fa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1188,7 +1188,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) /* Discard the packet if the length is greater than mtu */ max_pkt_len = ETH_HLEN + dev->mtu; - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tagged(skb)) max_pkt_len += VLAN_HLEN; if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) goto out_free; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index f15560a06718..48f82ab6c25b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1566,7 +1566,7 @@ static int enic_request_intr(struct enic *enic) intr = enic_msix_rq_intr(enic, i); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), - "%.11s-rx-%d", netdev->name, i); + "%.11s-rx-%u", netdev->name, i); enic->msix[intr].isr = enic_isr_msix; enic->msix[intr].devid = &enic->napi[i]; } @@ -1577,7 +1577,7 @@ static int enic_request_intr(struct enic *enic) intr = enic_msix_wq_intr(enic, i); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), - "%.11s-tx-%d", netdev->name, i); + "%.11s-tx-%u", netdev->name, i); enic->msix[intr].isr = enic_isr_msix; enic->msix[intr].devid = &enic->napi[wq]; } diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index cbe84972ff7a..f0e9e2ef62a0 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1319,7 +1319,7 @@ de4x5_open(struct net_device *dev) if (request_irq(dev->irq, de4x5_interrupt, IRQF_SHARED, lp->adapter_name, dev)) { - printk("de4x5_open(): Requested IRQ%d is busy - attemping FAST/SHARE...", dev->irq); + printk("de4x5_open(): Requested IRQ%d is busy - attempting FAST/SHARE...", dev->irq); if (request_irq(dev->irq, de4x5_interrupt, IRQF_SHARED, lp->adapter_name, dev)) { printk("\n Cannot get IRQ- reconfigure your hardware.\n"); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 3fb87e233c49..5c8afe1a5ccb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -795,6 +795,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n", mac_cb->mac_id, np->name); } + of_node_put(np); return 0; } @@ -812,10 +813,12 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n", mac_cb->mac_id, np->name); } + of_node_put(np); - syscon = syscon_node_to_regmap( - of_parse_phandle(to_of_node(mac_cb->fw_port), - "serdes-syscon", 0)); + np = of_parse_phandle(to_of_node(mac_cb->fw_port), + "serdes-syscon", 0); + syscon = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR_OR_NULL(syscon)) { dev_err(mac_cb->dev, "serdes-syscon is needed!\n"); return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 2ef4277d00b3..afb5daa3721d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -51,7 +51,7 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) const char *mode_str; struct regmap *syscon; struct resource *res; - struct device_node *np = dsaf_dev->dev->of_node; + struct device_node *np = dsaf_dev->dev->of_node, *np_temp; struct platform_device *pdev = to_platform_device(dsaf_dev->dev); if (dev_of_node(dsaf_dev->dev)) { @@ -102,8 +102,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->dsaf_tc_mode = HRD_DSAF_4TC_MODE; if (dev_of_node(dsaf_dev->dev)) { - syscon = syscon_node_to_regmap( - of_parse_phandle(np, "subctrl-syscon", 0)); + np_temp = of_parse_phandle(np, "subctrl-syscon", 0); + syscon = syscon_node_to_regmap(np_temp); + of_node_put(np_temp); if (IS_ERR_OR_NULL(syscon)) { res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 41f32c0b341e..02f443958f31 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7330,8 +7330,7 @@ err_flashmap: err_ioremap: free_netdev(netdev); err_alloc_etherdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -7398,8 +7397,7 @@ static void e1000_remove(struct pci_dev *pdev) if ((adapter->hw.flash_address) && (adapter->hw.mac.type < e1000_pch_spt)) iounmap(adapter->hw.flash_address); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); free_netdev(netdev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index b8245c734c96..774a5654bf42 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1963,10 +1963,7 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_dma; } - err = pci_request_selected_regions(pdev, - pci_select_bars(pdev, - IORESOURCE_MEM), - fm10k_driver_name); + err = pci_request_mem_regions(pdev, fm10k_driver_name); if (err) { dev_err(&pdev->dev, "pci_request_selected_regions failed: %d\n", err); @@ -2070,8 +2067,7 @@ err_sw_init: err_ioremap: free_netdev(netdev); err_alloc_netdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -2119,8 +2115,7 @@ static void fm10k_remove(struct pci_dev *pdev) free_netdev(netdev); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 339d99be4702..81c99e1be708 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10710,8 +10710,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* set up pci connections */ - err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), i40e_driver_name); + err = pci_request_mem_regions(pdev, i40e_driver_name); if (err) { dev_info(&pdev->dev, "pci_request_selected_regions failed %d\n", err); @@ -11208,8 +11207,7 @@ err_ioremap: kfree(pf); err_pf_alloc: pci_disable_pcie_error_reporting(pdev); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -11320,8 +11318,7 @@ static void i40e_remove(struct pci_dev *pdev) iounmap(hw->hw_addr); kfree(pf); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9bcba42abb91..942a89fb0090 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2324,9 +2324,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), - igb_driver_name); + err = pci_request_mem_regions(pdev, igb_driver_name); if (err) goto err_pci_reg; @@ -2750,8 +2748,7 @@ err_sw_init: err_ioremap: free_netdev(netdev); err_alloc_etherdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -2916,8 +2913,7 @@ static void igb_remove(struct pci_dev *pdev) pci_iounmap(pdev, adapter->io_addr); if (hw->flash_address) iounmap(hw->flash_address); - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); kfree(adapter->shadow_vfta); free_netdev(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7871f538f0ad..5418c69a7463 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9353,8 +9353,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_using_dac = 0; } - err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), ixgbe_driver_name); + err = pci_request_mem_regions(pdev, ixgbe_driver_name); if (err) { dev_err(&pdev->dev, "pci_request_selected_regions failed 0x%x\n", err); @@ -9740,8 +9739,7 @@ err_ioremap: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); err_alloc_etherdev: - pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_release_mem_regions(pdev); err_pci_reg: err_dma: if (!adapter || disable_dev) @@ -9808,8 +9806,7 @@ static void ixgbe_remove(struct pci_dev *pdev) #endif iounmap(adapter->io_addr); - pci_release_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM)); + pci_release_mem_regions(pdev); e_dev_info("complete\n"); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f92018b13d28..d41c28d00b57 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4118,6 +4118,7 @@ static int mvneta_probe(struct platform_device *pdev) pp->bm_priv = NULL; } } + of_node_put(bm_node); err = mvneta_init(&pdev->dev, pp); if (err < 0) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 0b047178cda1..60227a3452a4 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6234,6 +6234,7 @@ err_free_stats: err_free_irq: irq_dispose_mapping(port->irq); err_free_netdev: + of_node_put(phy_node); free_netdev(dev); return err; } @@ -6244,6 +6245,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port) int i; unregister_netdev(port->dev); + of_node_put(port->phy_node); free_percpu(port->pcpu); free_percpu(port->stats); for (i = 0; i < txq_number; i++) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index aeeb2e79a91a..5d5000c8edf1 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1506,6 +1506,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) } of_property_read_u32(np, "reg", &pep->phy_addr); pep->phy_intf = of_get_phy_mode(pdev->dev.of_node); + of_node_put(np); } /* Hardware supports only 3 ports */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index f4497cf4d06d..d728704d0c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -721,6 +721,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c +#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 @@ -935,6 +936,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; if (field32 & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT); + if (field32 & (1 << 17)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT; MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; @@ -2457,6 +2461,42 @@ int mlx4_NOP(struct mlx4_dev *dev) MLX4_CMD_NATIVE); } +int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, + const u32 offset[], + u32 value[], size_t array_len, u8 port) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + size_t i; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + outbox = mailbox->buf; + + ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier, + MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + for (i = 0; i < array_len; i++) { + if (offset[i] > MLX4_MAILBOX_SIZE) { + ret = -EINVAL; + goto out; + } + + MLX4_GET(value[i], outbox, offset[i]); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} +EXPORT_SYMBOL(mlx4_query_diag_counters); + int mlx4_get_phys_port_id(struct mlx4_dev *dev) { u8 port; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 04bc522605a0..c07f4d01b70e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -63,12 +63,12 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } -static int get_pas_size(void *srqc) +static int get_pas_size(struct mlx5_srq_attr *in) { - u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; - u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); - u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); - u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; u32 po_quanta = 1 << (log_page_size - 6); u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); u32 page_size = 1 << log_page_size; @@ -78,57 +78,58 @@ static int get_pas_size(void *srqc) return rq_num_pas * sizeof(u64); } -static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +static void set_wq(void *wq, struct mlx5_srq_attr *in) { - void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - if (srqc_to_rmpc) { - switch (MLX5_GET(srqc, srqc, state)) { - case MLX5_SRQC_STATE_GOOD: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - break; - case MLX5_SRQC_STATE_ERROR: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); - break; - default: - pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, - __LINE__, MLX5_GET(srqc, srqc, state)); - MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); - } - - MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); - MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); - MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); - MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); - MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); - MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); - } else { - switch (MLX5_GET(rmpc, rmpc, state)) { - case MLX5_RMPC_STATE_RDY: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); - break; - case MLX5_RMPC_STATE_ERR: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); - break; - default: - pr_warn("%s: %d: Unknown rmp state = 0x%x\n", - __func__, __LINE__, - MLX5_GET(rmpc, rmpc, state)); - MLX5_SET(srqc, srqc, state, - MLX5_GET(rmpc, rmpc, state)); - } - - MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); - MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); - MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); - MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); - MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); - MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); - MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); - MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); - } + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); } struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) @@ -149,19 +150,36 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) EXPORT_SYMBOL(mlx5_core_get_srq); static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) + struct mlx5_srq_attr *in) { - struct mlx5_create_srq_mbox_out out; + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; int err; - memset(&out, 0, sizeof(out)); + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); - err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), - sizeof(out)); + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); - srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); return err; } @@ -169,67 +187,75 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)(&out), sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + /* arm_srq structs missing using identical xrc ones */ + u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); + MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), - sizeof(in), (u32 *)(&out), - sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { - struct mlx5_query_srq_mbox_in in; + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; - memset(&in, 0, sizeof(in)); + srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out)); + if (!srq_out) + return -ENOMEM; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)out, sizeof(*out)); + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; } static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int srq_inlen) + struct mlx5_srq_attr *in) { u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; void *create_in; - void *srqc; void *xrc_srqc; void *pas; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) @@ -239,7 +265,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -293,11 +320,10 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; u32 *xrcsrq_out; - void *srqc; void *xrc_srqc; int err; @@ -317,8 +343,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, xrc_srq_context_entry); - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); - memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(xrcsrq_out); @@ -326,26 +353,27 @@ out: } static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int srq_inlen) + struct mlx5_srq_attr *in) { void *create_in; void *rmpc; - void *srqc; + void *wq; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) return -ENOMEM; rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - rmpc_srqc_reformat(srqc, rmpc, true); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); @@ -390,11 +418,10 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, } static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 *rmp_out; void *rmpc; - void *srqc; int err; rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); @@ -405,9 +432,10 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - rmpc_srqc_reformat(srqc, rmpc, false); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(rmp_out); @@ -416,15 +444,14 @@ out: static int create_srq_split(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int inlen, int is_xrc) + struct mlx5_srq_attr *in) { if (!dev->issi) - return create_srq_cmd(dev, srq, in, inlen); + return create_srq_cmd(dev, srq, in); else if (srq->common.res == MLX5_RES_XSRQ) - return create_xrc_srq_cmd(dev, srq, in, inlen); + return create_xrc_srq_cmd(dev, srq, in); else - return create_rmp_cmd(dev, srq, in, inlen); + return create_rmp_cmd(dev, srq, in); } static int destroy_srq_split(struct mlx5_core_dev *dev, @@ -439,15 +466,17 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, } int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen, - int is_xrc) + struct mlx5_srq_attr *in) { int err; struct mlx5_srq_table *table = &dev->priv.srq_table; - srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + if (in->type == IB_SRQT_XRC) + srq->common.res = MLX5_RES_XSRQ; + else + srq->common.res = MLX5_RES_SRQ; - err = create_srq_split(dev, srq, in, inlen, is_xrc); + err = create_srq_split(dev, srq, in); if (err) return err; @@ -502,7 +531,7 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { if (!dev->issi) return query_srq_cmd(dev, srq, out); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 03a5093ffeb7..28274a6fbafe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -85,6 +85,7 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) return err; } +EXPORT_SYMBOL(mlx5_core_create_rq); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) { @@ -110,6 +111,7 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rq); int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) { @@ -430,6 +432,7 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +EXPORT_SYMBOL(mlx5_core_create_rqt); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen) @@ -455,3 +458,4 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rqt); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 2874dffe77de..eaa37c079a7c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7412,7 +7412,7 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!ring_data->lro) || - (ring_data->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) && + (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG))) && (dev->features & NETIF_F_RXCSUM)) { l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b26fe267a150..0e4f4a9306b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -509,6 +509,7 @@ int qed_resc_alloc(struct qed_dev *cdev) DP_ERR(p_hwfn, "Cannot allocate 0x%x EQ elements. The maximum of a u16 chain is 0x%x\n", n_eqes, 0xFFFF); + rc = -EINVAL; goto alloc_err; } @@ -888,7 +889,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, if (hw_mode & (1 << MODE_MF_SI)) { u8 pf_id = 0; - u32 val; + u32 val = 0; if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) { if (p_hwfn->rel_pf_id == pf_id) { @@ -2539,7 +2540,7 @@ int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate) rc = __qed_configure_vport_wfq(p_hwfn, p_ptt, vp_id, rate); - if (!rc) { + if (rc) { qed_ptt_release(p_hwfn, p_ptt); return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index a12c6caa6c66..401e738543b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -590,7 +590,7 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, u16 cqe_pbl_size, void __iomem **pp_prod) { struct qed_hw_cid_data *p_rx_cid; - u64 init_prod_val = 0; + u32 init_prod_val = 0; u16 abs_l2_queue = 0; u8 abs_stats_id = 0; int rc; @@ -618,7 +618,7 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, MSTORM_ETH_PF_PRODS_OFFSET(abs_l2_queue); /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */ - __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u64), + __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32), (u32 *)(&init_prod_val)); /* Allocate a CID for the queue */ @@ -1664,6 +1664,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, info->num_tc = 1; if (IS_PF(cdev)) { + int max_vf_vlan_filters = 0; + if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { for_each_hwfn(cdev, i) info->num_queues += @@ -1676,7 +1678,12 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, info->num_queues = cdev->num_hwfns; } - info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN); + if (IS_QED_SRIOV(cdev)) + max_vf_vlan_filters = cdev->p_iov_info->total_vfs * + QED_ETH_VF_NUM_VLAN_FILTERS; + info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN) - + max_vf_vlan_filters; + ether_addr_copy(info->port_mac, cdev->hwfns[0].hw_info.hw_mac_addr); } else { diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 1f13abb5c316..c7dc34bfdd0a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -659,8 +659,13 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, struct qed_sb_cnt_info sb_cnt_info; int rc; int i; - memset(&cdev->int_params, 0, sizeof(struct qed_int_params)); + if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) { + DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n"); + return -EINVAL; + } + + memset(&cdev->int_params, 0, sizeof(struct qed_int_params)); cdev->int_params.in.int_mode = int_mode; for_each_hwfn(cdev, i) { memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 4d161c751c12..15399da268d9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1404,7 +1404,7 @@ static int __qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, params.anti_spoofing_en = val; rc = qed_sp_vport_update(p_hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); - if (rc) { + if (!rc) { p_vf->spoof_chk = val; p_vf->req_spoofchk_val = p_vf->spoof_chk; DP_VERBOSE(p_hwfn, QED_MSG_IOV, diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 9819230947bf..9b780b31b15c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -388,7 +388,7 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, /* Learn the address of the producer from the response */ if (pp_prod) { - u64 init_prod_val = 0; + u32 init_prod_val = 0; *pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset; DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -396,7 +396,7 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, rx_qid, *pp_prod, resp->offset); /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */ - __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u64), + __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32), (u32 *)&init_prod_val); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 91e7bb0b85c8..e4bd02e46e57 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2064,10 +2064,13 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) } /* Remove vlan */ - rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_DEL, vid); - if (rc) { - DP_ERR(edev, "Failed to remove VLAN %d\n", vid); - return -EINVAL; + if (vlan->configured) { + rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_DEL, + vid); + if (rc) { + DP_ERR(edev, "Failed to remove VLAN %d\n", vid); + return -EINVAL; + } } qede_del_vlan_from_list(edev, vlan); @@ -3268,6 +3271,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) start.vport_id = 0; start.drop_ttl0 = true; start.remove_inner_vlan = vlan_removal_en; + start.clear_stats = clear_stats; rc = edev->ops->vport_start(cdev, &start); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h index 9777e5713525..f4aa6331b367 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h @@ -45,7 +45,6 @@ struct qlcnic_dcb { static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb) { kfree(dcb); - dcb = NULL; } static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index fd5d1c93b55b..fd4a8e473f11 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1892,7 +1892,6 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev, skb->len += length; skb->data_len += length; skb->truesize += length; - length -= length; ql_update_mac_hdr_len(qdev, ib_mac_rsp, lbq_desc->p.pg_chunk.va, &hlen); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index ef668d300800..da4c2d8a4173 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1667,6 +1667,10 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) int i; u8 tmp8; + napi_disable(&tp->napi); + netif_stop_queue(dev); + synchronize_sched(); + netdev_dbg(dev, "Transmit timeout, status %02x %04x %04x media %02x\n", RTL_R8(ChipCmd), RTL_R16(IntrStatus), RTL_R16(IntrMask), RTL_R8(MediaStatus)); @@ -1696,10 +1700,10 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) spin_unlock_irq(&tp->lock); /* ...and finally, reset everything */ - if (netif_running(dev)) { - rtl8139_hw_start (dev); - netif_wake_queue (dev); - } + napi_enable(&tp->napi); + rtl8139_hw_start(dev); + netif_wake_queue(dev); + spin_unlock_bh(&tp->rx_lock); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 0e62d74b09b3..e55638c7505a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1749,13 +1749,21 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp) static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct rtl8169_private *tp = netdev_priv(dev); + struct device *d = &tp->pci_dev->dev; + + pm_runtime_get_noresume(d); rtl_lock_work(tp); wol->supported = WAKE_ANY; - wol->wolopts = __rtl8169_get_wol(tp); + if (pm_runtime_active(d)) + wol->wolopts = __rtl8169_get_wol(tp); + else + wol->wolopts = tp->saved_wolopts; rtl_unlock_work(tp); + + pm_runtime_put_noidle(d); } static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) @@ -1845,6 +1853,9 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct rtl8169_private *tp = netdev_priv(dev); + struct device *d = &tp->pci_dev->dev; + + pm_runtime_get_noresume(d); rtl_lock_work(tp); @@ -1852,12 +1863,17 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) tp->features |= RTL_FEATURE_WOL; else tp->features &= ~RTL_FEATURE_WOL; - __rtl8169_set_wol(tp, wol->wolopts); + if (pm_runtime_active(d)) + __rtl8169_set_wol(tp, wol->wolopts); + else + tp->saved_wolopts = wol->wolopts; rtl_unlock_work(tp); device_set_wakeup_enable(&tp->pci_dev->dev, wol->wolopts); + pm_runtime_put_noidle(d); + return 0; } @@ -2292,11 +2308,17 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct rtl8169_private *tp = netdev_priv(dev); + struct device *d = &tp->pci_dev->dev; struct rtl8169_counters *counters = tp->counters; ASSERT_RTNL(); - rtl8169_update_counters(dev); + pm_runtime_get_noresume(d); + + if (pm_runtime_active(d)) + rtl8169_update_counters(dev); + + pm_runtime_put_noidle(d); data[0] = le64_to_cpu(counters->tx_packets); data[1] = le64_to_cpu(counters->rx_packets); @@ -4458,6 +4480,7 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) static int rtl_set_mac_address(struct net_device *dev, void *p) { struct rtl8169_private *tp = netdev_priv(dev); + struct device *d = &tp->pci_dev->dev; struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) @@ -4465,7 +4488,12 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - rtl_rar_set(tp, dev->dev_addr); + pm_runtime_get_noresume(d); + + if (pm_runtime_active(d)) + rtl_rar_set(tp, dev->dev_addr); + + pm_runtime_put_noidle(d); return 0; } @@ -7868,6 +7896,7 @@ static int rtl8169_runtime_resume(struct device *device) struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); struct rtl8169_private *tp = netdev_priv(dev); + rtl_rar_set(tp, dev->dev_addr); if (!tp->TxDescArray) return 0; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8377d0220fa8..1e1cc0fad17f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1005,6 +1005,7 @@ static int ravb_phy_init(struct net_device *ndev) } phydev = of_phy_connect(ndev, pn, ravb_adjust_link, 0, priv->phy_interface); + of_node_put(pn); if (!phydev) { netdev_err(ndev, "failed to connect PHY\n"); return -ENOENT; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7bd910ce8b34..799d58d86e6d 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1780,6 +1780,7 @@ static int sh_eth_phy_init(struct net_device *ndev) sh_eth_adjust_link, 0, mdp->phy_interface); + of_node_put(pn); if (!phydev) phydev = ERR_PTR(-ENOENT); } else { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index edd20c3b2b3d..bec6963ac71e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -135,7 +135,9 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * np_splitter = of_parse_phandle(np, "altr,emac-splitter", 0); if (np_splitter) { - if (of_address_to_resource(np_splitter, 0, &res_splitter)) { + ret = of_address_to_resource(np_splitter, 0, &res_splitter); + of_node_put(np_splitter); + if (ret) { dev_info(dev, "Missing emac splitter address\n"); return -EINVAL; } @@ -159,14 +161,17 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * dev_err(dev, "%s: ERROR: missing emac splitter address\n", __func__); - return -EINVAL; + ret = -EINVAL; + goto err_node_put; } dwmac->splitter_base = devm_ioremap_resource(dev, &res_splitter); - if (IS_ERR(dwmac->splitter_base)) - return PTR_ERR(dwmac->splitter_base); + if (IS_ERR(dwmac->splitter_base)) { + ret = PTR_ERR(dwmac->splitter_base); + goto err_node_put; + } } index = of_property_match_string(np_sgmii_adapter, "reg-names", @@ -178,14 +183,17 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * dev_err(dev, "%s: ERROR: failed mapping adapter\n", __func__); - return -EINVAL; + ret = -EINVAL; + goto err_node_put; } dwmac->pcs.sgmii_adapter_base = devm_ioremap_resource(dev, &res_sgmii_adapter); - if (IS_ERR(dwmac->pcs.sgmii_adapter_base)) - return PTR_ERR(dwmac->pcs.sgmii_adapter_base); + if (IS_ERR(dwmac->pcs.sgmii_adapter_base)) { + ret = PTR_ERR(dwmac->pcs.sgmii_adapter_base); + goto err_node_put; + } } index = of_property_match_string(np_sgmii_adapter, "reg-names", @@ -197,22 +205,30 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * dev_err(dev, "%s: ERROR: failed mapping tse control port\n", __func__); - return -EINVAL; + ret = -EINVAL; + goto err_node_put; } dwmac->pcs.tse_pcs_base = devm_ioremap_resource(dev, &res_tse_pcs); - if (IS_ERR(dwmac->pcs.tse_pcs_base)) - return PTR_ERR(dwmac->pcs.tse_pcs_base); + if (IS_ERR(dwmac->pcs.tse_pcs_base)) { + ret = PTR_ERR(dwmac->pcs.tse_pcs_base); + goto err_node_put; + } } } dwmac->reg_offset = reg_offset; dwmac->reg_shift = reg_shift; dwmac->sys_mgr_base_addr = sys_mgr_base_addr; dwmac->dev = dev; + of_node_put(np_sgmii_adapter); return 0; + +err_node_put: + of_node_put(np_sgmii_adapter); + return ret; } static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c23ccabc2d8a..4c8c60af7985 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3397,6 +3397,7 @@ int stmmac_dvr_remove(struct device *dev) stmmac_set_mac(priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); + of_node_put(priv->plat->phy_node); if (priv->stmmac_rst) reset_control_assert(priv->stmmac_rst); clk_disable_unprepare(priv->pclk); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index f7dfc0ae8e9c..756bb548e81a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -113,8 +113,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) return NULL; axi = kzalloc(sizeof(*axi), GFP_KERNEL); - if (!axi) + if (!axi) { + of_node_put(np); return ERR_PTR(-ENOMEM); + } axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); @@ -127,6 +129,7 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt); of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt); of_property_read_u32_array(np, "snps,blen", axi->axi_blen, AXI_BLEN); + of_node_put(np); return axi; } @@ -302,7 +305,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), GFP_KERNEL); if (!dma_cfg) { - of_node_put(np); + of_node_put(plat->phy_node); return ERR_PTR(-ENOMEM); } plat->dma_cfg = dma_cfg; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1a93a1f28433..c51f34693eae 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2564,19 +2564,17 @@ clean_ndev_ret: return ret; } -static int cpsw_remove_child_device(struct device *dev, void *c) -{ - struct platform_device *pdev = to_platform_device(dev); - - of_device_unregister(pdev); - - return 0; -} - static int cpsw_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct cpsw_priv *priv = netdev_priv(ndev); + int ret; + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + pm_runtime_put_noidle(&pdev->dev); + return ret; + } if (priv->data.dual_emac) unregister_netdev(cpsw_get_slave_ndev(priv, 1)); @@ -2584,8 +2582,9 @@ static int cpsw_remove(struct platform_device *pdev) cpsw_ale_destroy(priv->ale); cpdma_ctlr_destroy(priv->dma); + of_platform_depopulate(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device); if (priv->data.dual_emac) free_netdev(cpsw_get_slave_ndev(priv, 1)); free_netdev(ndev); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 73638f7a55d4..19e5f32a8a64 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -357,13 +357,11 @@ EXPORT_SYMBOL_GPL(cpdma_ctlr_stop); int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr) { - unsigned long flags; int ret = 0, i; if (!ctlr) return -EINVAL; - spin_lock_irqsave(&ctlr->lock, flags); if (ctlr->state != CPDMA_STATE_IDLE) cpdma_ctlr_stop(ctlr); @@ -371,7 +369,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr) cpdma_chan_destroy(ctlr->channels[i]); cpdma_desc_pool_destroy(ctlr->pool); - spin_unlock_irqrestore(&ctlr->lock, flags); return ret; } EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 6e305a82ed43..727a79f3c7dd 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1964,6 +1964,7 @@ static int davinci_emac_remove(struct platform_device *pdev) cpdma_ctlr_destroy(priv->dma); unregister_netdev(ndev); + of_node_put(priv->phy_node); pm_runtime_disable(&pdev->dev); free_netdev(ndev); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 2d0beb1b801c..d13e6e15d7b5 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -344,7 +344,6 @@ static void free_rxsa(struct rcu_head *head) crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); - macsec_rxsc_put(sa->sc); kfree(sa); } @@ -863,6 +862,7 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err) struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; + struct macsec_rx_sc *rx_sc = rx_sa->sc; int len, ret; u32 pn; @@ -891,6 +891,7 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err) out: macsec_rxsa_put(rx_sa); + macsec_rxsc_put(rx_sc); dev_put(dev); } @@ -1106,6 +1107,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); + sc = sc ? macsec_rxsc_get(sc) : NULL; if (sc) { secy = &macsec->secy; @@ -1180,8 +1182,10 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) if (IS_ERR(skb)) { /* the decrypt callback needs the reference */ - if (PTR_ERR(skb) != -EINPROGRESS) + if (PTR_ERR(skb) != -EINPROGRESS) { macsec_rxsa_put(rx_sa); + macsec_rxsc_put(rx_sc); + } rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; @@ -1197,6 +1201,7 @@ deliver: if (rx_sa) macsec_rxsa_put(rx_sa); + macsec_rxsc_put(rx_sc); ret = gro_cells_receive(&macsec->gro_cells, skb); if (ret == NET_RX_SUCCESS) @@ -1212,6 +1217,7 @@ deliver: drop: macsec_rxsa_put(rx_sa); drop_nosa: + macsec_rxsc_put(rx_sc); rcu_read_unlock(); drop_direct: kfree_skb(skb); @@ -1646,7 +1652,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); - if (IS_ERR(rx_sc) || !macsec_rxsc_get(rx_sc)) { + if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } @@ -3173,6 +3179,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) return err; + dev_hold(real_dev); + /* need to be already registered so that ->init has run and * the MAC addr is set */ @@ -3201,8 +3209,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev, macsec_generation++; - dev_hold(real_dev); - return 0; del_dev: diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index d94a978024d9..775674808249 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -345,10 +345,8 @@ static int xgene_mdio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); csr_base = devm_ioremap_resource(dev, res); - if (IS_ERR(csr_base)) { - dev_err(dev, "Unable to retrieve mac CSR region\n"); + if (IS_ERR(csr_base)) return PTR_ERR(csr_base); - } pdata->mac_csr_addr = csr_base; pdata->mdio_csr_addr = csr_base + BLOCK_XG_MDIO_CSR_OFFSET; pdata->diag_csr_addr = csr_base + BLOCK_DIAG_CSR_OFFSET; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 059f13b60fe0..1882d9828c99 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -869,7 +869,7 @@ static struct phy_driver ksphy_driver[] = { }, { .phy_id = PHY_ID_KSZ8001, .name = "Micrel KSZ8001 or KS8721", - .phy_id_mask = 0x00ffffff, + .phy_id_mask = 0x00fffffc, .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .driver_data = &ksz8041_type, @@ -993,7 +993,7 @@ MODULE_LICENSE("GPL"); static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, { PHY_ID_KSZ9031, MICREL_PHY_ID_MASK }, - { PHY_ID_KSZ8001, 0x00ffffff }, + { PHY_ID_KSZ8001, 0x00fffffc }, { PHY_ID_KS8737, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ8021, 0x00ffffff }, { PHY_ID_KSZ8031, 0x00ffffff }, diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 2fc50ec453d0..6f044450b702 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -862,7 +862,7 @@ static int uhdlc_suspend(struct device *dev) static int uhdlc_resume(struct device *dev) { struct ucc_hdlc_private *priv = dev_get_drvdata(dev); - struct ucc_tdm *utdm = priv->utdm; + struct ucc_tdm *utdm; struct ucc_tdm_info *ut_info; struct ucc_fast __iomem *uf_regs; struct ucc_fast_private *uccf; @@ -877,6 +877,7 @@ static int uhdlc_resume(struct device *dev) if (!netif_running(priv->ndev)) return 0; + utdm = priv->utdm; ut_info = priv->ut_info; uf_info = &ut_info->uf_info; uf_regs = priv->uf_regs; diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 355e1ae665f9..8f0fd41dfd4b 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -139,11 +139,11 @@ void ath10k_debug_print_hwfw_info(struct ath10k *ar) ar->id.subsystem_vendor, ar->id.subsystem_device); ath10k_info(ar, "kconfig debug %d debugfs %d tracing %d dfs %d testmode %d\n", - config_enabled(CONFIG_ATH10K_DEBUG), - config_enabled(CONFIG_ATH10K_DEBUGFS), - config_enabled(CONFIG_ATH10K_TRACING), - config_enabled(CONFIG_ATH10K_DFS_CERTIFIED), - config_enabled(CONFIG_NL80211_TESTMODE)); + IS_ENABLED(CONFIG_ATH10K_DEBUG), + IS_ENABLED(CONFIG_ATH10K_DEBUGFS), + IS_ENABLED(CONFIG_ATH10K_TRACING), + IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED), + IS_ENABLED(CONFIG_NL80211_TESTMODE)); firmware = ar->normal_mode_fw.fw_file.firmware; if (firmware) @@ -2424,7 +2424,7 @@ int ath10k_debug_register(struct ath10k *ar) debugfs_create_file("nf_cal_period", S_IRUSR | S_IWUSR, ar->debug.debugfs_phy, ar, &fops_nf_cal_period); - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED)) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) { debugfs_create_file("dfs_simulate_radar", S_IWUSR, ar->debug.debugfs_phy, ar, &fops_simulate_radar); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index fb8e38df9446..0bbd0a00edcc 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3039,7 +3039,7 @@ static void ath10k_regd_update(struct ath10k *ar) regpair = ar->ath_common.regulatory.regpair; - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { nl_dfs_reg = ar->dfs_detector->region; wmi_dfs_reg = ath10k_mac_get_dfs_region(nl_dfs_reg); } else { @@ -3068,7 +3068,7 @@ static void ath10k_reg_notifier(struct wiphy *wiphy, ath_reg_notifier_apply(wiphy, request, &ar->ath_common.regulatory); - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { ath10k_dbg(ar, ATH10K_DBG_REGULATORY, "dfs region 0x%x\n", request->dfs_region); result = ar->dfs_detector->set_dfs_domain(ar->dfs_detector, @@ -7955,7 +7955,7 @@ int ath10k_mac_register(struct ath10k *ar) if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) ar->hw->netdev_features = NETIF_F_HW_CSUM; - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED)) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) { /* Init ath dfs pattern detector */ ar->ath_common.debug_mask = ATH_DBG_DFS; ar->dfs_detector = dfs_pattern_detector_init(&ar->ath_common, @@ -8003,7 +8003,7 @@ err_unregister: ieee80211_unregister_hw(ar->hw); err_dfs_detector_exit: - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) ar->dfs_detector->exit(ar->dfs_detector); err_free: @@ -8018,7 +8018,7 @@ void ath10k_mac_unregister(struct ath10k *ar) { ieee80211_unregister_hw(ar->hw); - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) ar->dfs_detector->exit(ar->dfs_detector); kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 169cd2e783eb..d2462886b75c 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3704,7 +3704,7 @@ void ath10k_wmi_event_dfs(struct ath10k *ar, phyerr->tsf_timestamp, tsf, buf_len); /* Skip event if DFS disabled */ - if (!config_enabled(CONFIG_ATH10K_DFS_CERTIFIED)) + if (!IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) return; ATH10K_DFS_STAT_INC(ar, pulses_total); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 4ad6284fc37d..72e2ec67768d 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3881,7 +3881,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) BIT(NL80211_IFTYPE_P2P_CLIENT); } - if (config_enabled(CONFIG_ATH6KL_REGDOMAIN) && + if (IS_ENABLED(CONFIG_ATH6KL_REGDOMAIN) && test_bit(ATH6KL_FW_CAPABILITY_REGDOMAIN, ar->fw_capabilities)) { wiphy->reg_notifier = ath6kl_cfg80211_reg_notify; ar->wiphy->features |= NL80211_FEATURE_CELL_BASE_REG_HINTS; diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c index a8762711ad74..e2512d5bc0e1 100644 --- a/drivers/net/wireless/ath/ath9k/common-spectral.c +++ b/drivers/net/wireless/ath/ath9k/common-spectral.c @@ -731,7 +731,7 @@ void ath9k_cmn_spectral_scan_trigger(struct ath_common *common, struct ath_hw *ah = spec_priv->ah; u32 rxfilter; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; if (!ath9k_hw_ops(ah)->spectral_scan_trigger) { @@ -806,7 +806,7 @@ static ssize_t write_file_spec_scan_ctl(struct file *file, char buf[32]; ssize_t len; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return -EOPNOTSUPP; len = min(count, sizeof(buf) - 1); @@ -1072,7 +1072,7 @@ static struct rchan_callbacks rfs_spec_scan_cb = { void ath9k_cmn_spectral_deinit_debug(struct ath_spec_scan_priv *spec_priv) { - if (config_enabled(CONFIG_ATH9K_DEBUGFS)) { + if (IS_ENABLED(CONFIG_ATH9K_DEBUGFS)) { relay_close(spec_priv->rfs_chan_spec_scan); spec_priv->rfs_chan_spec_scan = NULL; } diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index edc74fca60aa..cfa3fe82ade3 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -843,7 +843,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | NL80211_FEATURE_P2P_GO_CTWIN; - if (!config_enabled(CONFIG_ATH9K_TX99)) { + if (!IS_ENABLED(CONFIG_ATH9K_TX99)) { hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_P2P_CLIENT) | diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 7594650f214f..a394622c9022 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1250,7 +1250,7 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, mutex_lock(&sc->mutex); - if (config_enabled(CONFIG_ATH9K_TX99)) { + if (IS_ENABLED(CONFIG_ATH9K_TX99)) { if (sc->cur_chan->nvifs >= 1) { mutex_unlock(&sc->mutex); return -EOPNOTSUPP; @@ -1300,7 +1300,7 @@ static int ath9k_change_interface(struct ieee80211_hw *hw, mutex_lock(&sc->mutex); - if (config_enabled(CONFIG_ATH9K_TX99)) { + if (IS_ENABLED(CONFIG_ATH9K_TX99)) { mutex_unlock(&sc->mutex); return -EOPNOTSUPP; } @@ -1360,7 +1360,7 @@ static void ath9k_enable_ps(struct ath_softc *sc) struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; sc->ps_enabled = true; @@ -1379,7 +1379,7 @@ static void ath9k_disable_ps(struct ath_softc *sc) struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; sc->ps_enabled = false; @@ -1953,7 +1953,7 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, struct ieee80211_channel *chan; int pos; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return -EOPNOTSUPP; spin_lock_bh(&common->cc_lock); @@ -2003,7 +2003,7 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; mutex_lock(&sc->mutex); diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 32160fca876a..669734252664 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -377,7 +377,7 @@ u32 ath_calcrxfilter(struct ath_softc *sc) struct ath_common *common = ath9k_hw_common(sc->sc_ah); u32 rfilt; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return 0; rfilt = ATH9K_RX_FILTER_UCAST | ATH9K_RX_FILTER_BCAST diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index 2303ef96299d..2f8136d50f78 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -352,7 +352,7 @@ dfs_pattern_detector_init(struct ath_common *common, { struct dfs_pattern_detector *dpd; - if (!config_enabled(CONFIG_CFG80211_CERTIFICATION_ONUS)) + if (!IS_ENABLED(CONFIG_CFG80211_CERTIFICATION_ONUS)) return NULL; dpd = kmalloc(sizeof(*dpd), GFP_KERNEL); diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index 7e15ed9ed31f..f8506037736f 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -116,7 +116,7 @@ static const struct ieee80211_regdomain ath_world_regdom_67_68_6A_6C = { static bool dynamic_country_user_possible(struct ath_regulatory *reg) { - if (config_enabled(CONFIG_ATH_REG_DYNAMIC_USER_CERT_TESTING)) + if (IS_ENABLED(CONFIG_ATH_REG_DYNAMIC_USER_CERT_TESTING)) return true; switch (reg->country_code) { @@ -188,7 +188,7 @@ static bool dynamic_country_user_possible(struct ath_regulatory *reg) static bool ath_reg_dyn_country_user_allow(struct ath_regulatory *reg) { - if (!config_enabled(CONFIG_ATH_REG_DYNAMIC_USER_REG_HINTS)) + if (!IS_ENABLED(CONFIG_ATH_REG_DYNAMIC_USER_REG_HINTS)) return false; if (!dynamic_country_user_possible(reg)) return false; diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 40d04ef5da9e..0d5c29ae51de 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct intel_ntb_dev *ndev; + struct pci_dev *pdev; void __iomem *mmio; char *buf; size_t buf_size; ssize_t ret, off; - union { u64 v64; u32 v32; u16 v16; } u; + union { u64 v64; u32 v32; u16 v16; u8 v8; } u; ndev = filp->private_data; + pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; buf_size = min(count, 0x800ul); @@ -632,6 +634,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "Doorbell Bell -\t\t%#llx\n", u.v64); off += scnprintf(buf + off, buf_size - off, + "\nNTB Window Size:\n"); + + pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR5SZ %hhu\n", u.v8); + } + + pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR5SZ %hhu\n", u.v8); + } + + off += scnprintf(buf + off, buf_size - off, "\nNTB Incoming XLAT:\n"); u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2)); @@ -669,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "LMT45 -\t\t\t%#018llx\n", u.v64); } - if (pdev_is_xeon(ndev->ntb.pdev)) { + if (pdev_is_xeon(pdev)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) { off += scnprintf(buf + off, buf_size - off, "\nNTB Outgoing B2B XLAT:\n"); @@ -750,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "\nXEON NTB Hardware Errors:\n"); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_DEVSTS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "DEVSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_LINK_STATUS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "LNKSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_UNCERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "UNCERRSTS -\t\t%#06x\n", u.v32); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_CORERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "CORERRSTS -\t\t%#06x\n", u.v32); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 2ef9d9130864..d5c5894f252e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -153,6 +153,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + unsigned int rx_alloc_entry; dma_cookie_t last_cookie; struct tasklet_struct rxc_db_work; @@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n", qp->rx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, { struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; struct ntb_transport_mw *mw; + struct ntb_dev *ndev = nt->ndev; + struct ntb_queue_entry *entry; unsigned int rx_size, num_qps_mw; unsigned int mw_num, mw_count, qp_count; unsigned int i; + int node; mw_count = nt->mw_count; qp_count = nt->qp_count; @@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, qp->rx_max_entry = rx_size / qp->rx_max_frame; qp->rx_index = 0; + /* + * Checking to see if we have more entries than the default. + * We should add additional entries if that is the case so we + * can be in sync with the transport frames. + */ + node = dev_to_node(&ndev->dev); + for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) { + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); + if (!entry) + return -ENOMEM; + + entry->qp = qp; + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, + &qp->rx_free_q); + qp->rx_alloc_entry++; + } + qp->remote_rx_info->entry = qp->rx_max_entry - 1; /* setup the hdr offsets with 0's */ @@ -1037,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) int node; int rc, i; + mw_count = ntb_mw_count(ndev); + if (ntb_spad_count(ndev) < (NUM_MWS + 1 + mw_count * 2)) { + dev_err(&ndev->dev, "Not enough scratch pad registers for %s", + NTB_TRANSPORT_NAME); + return -EIO; + } + if (ntb_db_is_unsafe(ndev)) dev_dbg(&ndev->dev, "doorbell is unsafe, proceed anyway...\n"); @@ -1052,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) nt->ndev = ndev; - mw_count = ntb_mw_count(ndev); - nt->mw_count = mw_count; nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), @@ -1722,8 +1750,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev, ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); } + qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES; - for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + for (i = 0; i < qp->tx_max_entry; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err2; @@ -1744,6 +1773,7 @@ err2: while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); err1: + qp->rx_alloc_entry = 0; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->tx_dma_chan) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 8dfce9c9aad0..6a50f20bf1cd 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -58,6 +58,7 @@ #include <linux/delay.h> #include <linux/sizes.h> #include <linux/ntb.h> +#include <linux/mutex.h> #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" @@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); static struct dentry *perf_debugfs_dir; +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); + static unsigned int seg_order = 19; /* 512K */ module_param(seg_order, uint, 0644); MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); @@ -117,6 +122,10 @@ struct pthr_ctx { int dma_prep_err; int src_idx; void *srcs[MAX_SRCS]; + wait_queue_head_t *wq; + int status; + u64 copied; + u64 diff_us; }; struct perf_ctx { @@ -124,23 +133,23 @@ struct perf_ctx { spinlock_t db_lock; struct perf_mw mw; bool link_is_up; - struct work_struct link_cleanup; struct delayed_work link_work; + wait_queue_head_t link_wq; struct dentry *debugfs_node_dir; struct dentry *debugfs_run; struct dentry *debugfs_threads; u8 perf_threads; - bool run; + /* mutex ensures only one set of threads run at once */ + struct mutex run_mutex; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; + atomic_t tdone; }; enum { VERSION = 0, MW_SZ_HIGH, MW_SZ_LOW, - SPAD_MSG, - SPAD_ACK, MAX_SPAD }; @@ -148,10 +157,16 @@ static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; - if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) + if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { schedule_delayed_work(&perf->link_work, 2*HZ); - else - schedule_work(&perf->link_cleanup); + } else { + dev_dbg(&perf->ntb->pdev->dev, "link down\n"); + + if (!perf->link_is_up) + cancel_delayed_work_sync(&perf->link_work); + + perf->link_is_up = false; + } } static void perf_db_event(void *ctx, int vec) @@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, char __iomem *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; + unsigned long last_sleep = jiffies; chunks = div64_u64(win_size, buf_size); total_chunks = div64_u64(total, buf_size); @@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, } else tmp += buf_size; - /* Probably should schedule every 4GB to prevent soft hang. */ - if (((copied % SZ_4G) == 0) && !use_dma) { + /* Probably should schedule every 5s to prevent soft hang. */ + if (unlikely((jiffies - last_sleep) > 5 * HZ)) { + last_sleep = jiffies; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } + + if (unlikely(kthread_should_stop())) + break; } if (use_dma) { - pr_info("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) + pr_debug("%s: All DMA descriptors submitted\n", current->comm); + while (atomic_read(&pctx->dma_sync) != 0) { + if (kthread_should_stop()) + break; msleep(20); + } } kstop = ktime_get(); kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); - pr_info("%s: copied %llu bytes\n", current->comm, copied); + pr_debug("%s: copied %llu bytes\n", current->comm, copied); - pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); + pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); perf = div64_u64(copied, diff_us); - pr_info("%s: MBytes/s: %llu\n", current->comm, perf); + pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); + + pctx->copied = copied; + pctx->diff_us = diff_us; return 0; } @@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data) int rc, node, i; struct dma_chan *dma_chan = NULL; - pr_info("kthread %s starting...\n", current->comm); + pr_debug("kthread %s starting...\n", current->comm); node = dev_to_node(&pdev->dev); @@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data) pctx->srcs[i] = NULL; } - return 0; + atomic_inc(&perf->tdone); + wake_up(pctx->wq); + rc = 0; + goto done; err: for (i = 0; i < MAX_SRCS; i++) { @@ -402,6 +431,16 @@ err: pctx->dma_chan = NULL; } +done: + /* Wait until we are told to stop */ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return rc; } @@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work) dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); size = perf->mw.phys_size; + + if (max_mw_size && size > max_mw_size) + size = max_mw_size; + ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); @@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work) goto out1; perf->link_is_up = true; + wake_up(&perf->link_wq); return; @@ -508,18 +552,6 @@ out: msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); } -static void perf_link_cleanup(struct work_struct *work) -{ - struct perf_ctx *perf = container_of(work, - struct perf_ctx, - link_cleanup); - - dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); - - if (!perf->link_is_up) - cancel_delayed_work_sync(&perf->link_work); -} - static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) { struct perf_mw *mw; @@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, { struct perf_ctx *perf = filp->private_data; char *buf; - ssize_t ret, out_offset; + ssize_t ret, out_off = 0; + struct pthr_ctx *pctx; + int i; + u64 rate; if (!perf) return 0; - buf = kmalloc(64, GFP_KERNEL); + buf = kmalloc(1024, GFP_KERNEL); if (!buf) return -ENOMEM; - out_offset = snprintf(buf, 64, "%d\n", perf->run); - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + + if (mutex_is_locked(&perf->run_mutex)) { + out_off = snprintf(buf, 64, "running\n"); + goto read_from_buf; + } + + for (i = 0; i < MAX_THREADS; i++) { + pctx = &perf->pthr_ctx[i]; + + if (pctx->status == -ENODATA) + break; + + if (pctx->status) { + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: error %d\n", i, + pctx->status); + continue; + } + + rate = div64_u64(pctx->copied, pctx->diff_us); + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", + i, pctx->copied, pctx->diff_us, rate); + } + +read_from_buf: + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); kfree(buf); return ret; @@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf) struct pthr_ctx *pctx; int i; - perf->run = false; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { - kthread_stop(pctx->thread); + pctx->status = kthread_stop(pctx->thread); pctx->thread = NULL; } } } +static void perf_clear_thread_status(struct perf_ctx *perf) +{ + int i; + + for (i = 0; i < MAX_THREADS; i++) + perf->pthr_ctx[i].status = -ENODATA; +} + static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; + DECLARE_WAIT_QUEUE_HEAD(wq); - if (!perf->link_is_up) - return 0; + if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) + return -ENOLINK; if (perf->perf_threads == 0) - return 0; + return -EINVAL; - if (atomic_read(&perf->tsync) == 0) - perf->run = false; + if (!mutex_trylock(&perf->run_mutex)) + return -EBUSY; - if (perf->run) - threads_cleanup(perf); - else { - perf->run = true; + perf_clear_thread_status(perf); - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); + } - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } + /* no greater than 1M */ + if (seg_order > MAX_SEG_ORDER) { + seg_order = MAX_SEG_ORDER; + pr_info("Fix seg_order to %u\n", seg_order); + } - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } - node = dev_to_node(&perf->ntb->pdev->dev); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; - - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else - wake_up_process(pctx->thread); - - if (perf->run == false) - return -ENXIO; - } + node = dev_to_node(&perf->ntb->pdev->dev); + atomic_set(&perf->tdone, 0); + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->wq = &wq; + pctx->thread = + kthread_create_on_node(ntb_perf_thread, + (void *)pctx, + node, "ntb_perf %d", i); + if (IS_ERR(pctx->thread)) { + pctx->thread = NULL; + goto err; + } else { + wake_up_process(pctx->thread); + } } + wait_event_interruptible(wq, + atomic_read(&perf->tdone) == perf->perf_threads); + + threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return -ENXIO; } @@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) int node; int rc = 0; + if (ntb_spad_count(ntb) < MAX_SPAD) { + dev_err(&ntb->dev, "Not enough scratch pad registers for %s", + DRIVER_NAME); + return -EIO; + } + node = dev_to_node(&pdev->dev); perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); @@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf->ntb = ntb; perf->perf_threads = 1; atomic_set(&perf->tsync, 0); - perf->run = false; + mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); + init_waitqueue_head(&perf->link_wq); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); - INIT_WORK(&perf->link_cleanup, perf_link_cleanup); rc = ntb_set_ctx(ntb, perf, &perf_ops); if (rc) @@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) if (rc) goto err_ctx; + perf_clear_thread_status(perf); + return 0; err_ctx: cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); kfree(perf); err_perf: return rc; @@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + mutex_lock(&perf->run_mutex); + cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); ntb_clear_ctx(ntb); ntb_link_disable(ntb); diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index fe1600566981..7d311799fca1 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -61,6 +61,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/debugfs.h> #include <linux/ntb.h> @@ -96,8 +97,13 @@ struct pp_ctx { spinlock_t db_lock; struct timer_list db_timer; unsigned long db_delay; + struct dentry *debugfs_node_dir; + struct dentry *debugfs_count; + atomic_t count; }; +static struct dentry *pp_debugfs_dir; + static void pp_ping(unsigned long ctx) { struct pp_ctx *pp = (void *)ctx; @@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec) dev_dbg(&pp->ntb->dev, "Pong vec %d bits %#llx\n", vec, db_bits); + atomic_inc(&pp->count); } spin_unlock_irqrestore(&pp->db_lock, irqflags); } +static int pp_debugfs_setup(struct pp_ctx *pp) +{ + struct pci_dev *pdev = pp->ntb->pdev; + + if (!pp_debugfs_dir) + return -ENODEV; + + pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev), + pp_debugfs_dir); + if (!pp->debugfs_node_dir) + return -ENODEV; + + pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR, + pp->debugfs_node_dir, + &pp->count); + if (!pp->debugfs_count) + return -ENODEV; + + return 0; +} + static const struct ntb_ctx_ops pp_ops = { .link_event = pp_link_event, .db_event = pp_db_event, @@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client, pp->ntb = ntb; pp->db_bits = 0; + atomic_set(&pp->count, 0); spin_lock_init(&pp->db_lock); setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); pp->db_delay = msecs_to_jiffies(delay_ms); @@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client, if (rc) goto err_ctx; + rc = pp_debugfs_setup(pp); + if (rc) + goto err_ctx; + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); ntb_link_event(ntb); @@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client, { struct pp_ctx *pp = ntb->ctx; + debugfs_remove_recursive(pp->debugfs_node_dir); + ntb_clear_ctx(ntb); del_timer_sync(&pp->db_timer); ntb_link_disable(ntb); @@ -247,4 +282,29 @@ static struct ntb_client pp_client = { .remove = pp_remove, }, }; -module_ntb_client(pp_client); + +static int __init pp_init(void) +{ + int rc; + + if (debugfs_initialized()) + pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = ntb_register_client(&pp_client); + if (rc) + goto err_client; + + return 0; + +err_client: + debugfs_remove_recursive(pp_debugfs_dir); + return rc; +} +module_init(pp_init); + +static void __exit pp_exit(void) +{ + ntb_unregister_client(&pp_client); + debugfs_remove_recursive(pp_debugfs_dir); +} +module_exit(pp_exit); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 6f5dc6ca673d..61bf2ef87e0e 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -59,6 +59,12 @@ * * Eg: check if clearing the doorbell mask generates an interrupt. * + * # Check the link status + * root@self# cat $DBG_DIR/link + * + * # Block until the link is up + * root@self# echo Y > $DBG_DIR/link_event + * * # Set the doorbell mask * root@self# echo 's 1' > $DBG_DIR/mask * @@ -79,6 +85,13 @@ * root@self# cat $DBG_DIR/spad * * Observe that spad 0 and 1 have the values set by the peer. + * + * # Check the memory window translation info + * cat $DBG_DIR/peer_trans0 + * + * # Setup a 16k memory window buffer + * echo 16384 > $DBG_DIR/peer_trans0 + * */ #include <linux/init.h> @@ -89,6 +102,7 @@ #include <linux/dma-mapping.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/ntb.h> @@ -105,11 +119,27 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +#define MAX_MWS 16 + static struct dentry *tool_dbgfs; +struct tool_mw { + int idx; + struct tool_ctx *tc; + resource_size_t win_size; + resource_size_t size; + u8 __iomem *local; + u8 *peer; + dma_addr_t peer_dma; + struct dentry *peer_dbg_file; +}; + struct tool_ctx { struct ntb_dev *ntb; struct dentry *dbgfs; + wait_queue_head_t link_wq; + int mw_count; + struct tool_mw mws[MAX_MWS]; }; #define SPAD_FNAME_SIZE 0x10 @@ -135,6 +165,8 @@ static void tool_link_event(void *ctx) dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", up ? "up" : "down", speed, width); + + wake_up(&tc->link_wq); } static void tool_db_event(void *ctx, int vec) @@ -239,7 +271,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, if (!spad_read_fn) return -EINVAL; - buf_size = min_t(size_t, size, 0x100); + spad_count = ntb_spad_count(tc->ntb); + + /* + * We multiply the number of spads by 15 to get the buffer size + * this is from 3 for the %d, 10 for the largest hex value + * (0x00000000) and 2 for the tab and line feed. + */ + buf_size = min_t(size_t, size, spad_count * 15); buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) @@ -247,7 +286,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, pos = 0; - spad_count = ntb_spad_count(tc->ntb); for (i = 0; i < spad_count; ++i) { pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", i, spad_read_fn(tc->ntb, i)); @@ -268,7 +306,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, { int spad_idx; u32 spad_val; - char *buf; + char *buf, *buf_ptr; int pos, n; ssize_t rc; @@ -288,14 +326,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, } buf[size] = 0; - - n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); + buf_ptr = buf; + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); while (n == 2) { + buf_ptr += pos; rc = spad_write_fn(tc->ntb, spad_idx, spad_val); if (rc) break; - n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); } if (n < 0) @@ -442,8 +481,384 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops, tool_peer_spad_read, tool_peer_spad_write); +static ssize_t tool_link_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[3]; + + buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + + return simple_read_from_buffer(ubuf, size, offp, buf, 2); +} + +static ssize_t tool_link_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (val) + rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + else + rc = ntb_link_disable(tc->ntb); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_fops, + tool_link_read, + tool_link_write); + +static ssize_t tool_link_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (wait_event_interruptible(tc->link_wq, + ntb_link_is_up(tc->ntb, NULL, NULL) == val)) + return -ERESTART; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_event_fops, + NULL, + tool_link_event_write); + +static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (mw->local == NULL) + return -EIO; + if (pos < 0) + return -EINVAL; + if (pos >= mw->win_size || !size) + return 0; + if (size > mw->win_size - pos) + size = mw->win_size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy_fromio(buf, mw->local + pos, size); + rc = copy_to_user(ubuf, buf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + +err_free: + kfree(buf); + + return rc; +} + +static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (pos < 0) + return -EINVAL; + if (pos >= mw->win_size || !size) + return 0; + if (size > mw->win_size - pos) + size = mw->win_size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = copy_from_user(buf, ubuf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + + memcpy_toio(mw->local + pos, buf, size); + +err_free: + kfree(buf); + + return rc; +} + +static TOOL_FOPS_RDWR(tool_mw_fops, + tool_mw_read, + tool_mw_write); + +static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + if (!mw->peer) + return -ENXIO; + + return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size); +} + +static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + if (!mw->peer) + return -ENXIO; + + return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size); +} + +static TOOL_FOPS_RDWR(tool_peer_mw_fops, + tool_peer_mw_read, + tool_peer_mw_write); + +static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size) +{ + int rc; + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + resource_size_t size, align, align_size; + char buf[16]; + + if (mw->peer) + return 0; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align, + &align_size); + if (rc) + return rc; + + mw->size = min_t(resource_size_t, req_size, size); + mw->size = round_up(mw->size, align); + mw->size = round_up(mw->size, align_size); + mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, + &mw->peer_dma, GFP_KERNEL); + + if (!mw->peer) + return -ENOMEM; + + rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size); + if (rc) + goto err_free_dma; + + snprintf(buf, sizeof(buf), "peer_mw%d", idx); + mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR, + mw->tc->dbgfs, mw, + &tool_peer_mw_fops); + + return 0; + +err_free_dma: + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + mw->peer = NULL; + mw->peer_dma = 0; + mw->size = 0; + + return rc; +} + +static void tool_free_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + + if (mw->peer) { + ntb_mw_clear_trans(tc->ntb, idx); + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + } + + mw->peer = NULL; + mw->peer_dma = 0; + + debugfs_remove(mw->peer_dbg_file); + + mw->peer_dbg_file = NULL; +} + +static ssize_t tool_peer_mw_trans_read(struct file *filep, + char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char *buf; + size_t buf_size; + ssize_t ret, off = 0; + + phys_addr_t base; + resource_size_t mw_size; + resource_size_t align; + resource_size_t align_size; + + buf_size = min_t(size_t, size, 512); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ntb_mw_get_range(mw->tc->ntb, mw->idx, + &base, &mw_size, &align, &align_size); + + off += scnprintf(buf + off, buf_size - off, + "Peer MW %d Information:\n", mw->idx); + + off += scnprintf(buf + off, buf_size - off, + "Physical Address \t%pa[p]\n", + &base); + + off += scnprintf(buf + off, buf_size - off, + "Window Size \t%lld\n", + (unsigned long long)mw_size); + + off += scnprintf(buf + off, buf_size - off, + "Alignment \t%lld\n", + (unsigned long long)align); + + off += scnprintf(buf + off, buf_size - off, + "Size Alignment \t%lld\n", + (unsigned long long)align_size); + + off += scnprintf(buf + off, buf_size - off, + "Ready \t%c\n", + (mw->peer) ? 'Y' : 'N'); + + off += scnprintf(buf + off, buf_size - off, + "Allocated Size \t%zd\n", + (mw->peer) ? (size_t)mw->size : 0); + + ret = simple_read_from_buffer(ubuf, size, offp, buf, off); + kfree(buf); + return ret; +} + +static ssize_t tool_peer_mw_trans_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char buf[32]; + size_t buf_size; + unsigned long long val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + tool_free_mw(mw->tc, mw->idx); + if (val) + rc = tool_setup_mw(mw->tc, mw->idx, val); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops, + tool_peer_mw_trans_read, + tool_peer_mw_trans_write); + +static int tool_init_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + int rc; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size, + NULL, NULL); + if (rc) + return rc; + + mw->tc = tc; + mw->idx = idx; + mw->local = ioremap_wc(base, mw->win_size); + if (!mw->local) + return -EFAULT; + + return 0; +} + +static void tool_free_mws(struct tool_ctx *tc) +{ + int i; + + for (i = 0; i < tc->mw_count; i++) { + tool_free_mw(tc, i); + + if (tc->mws[i].local) + iounmap(tc->mws[i].local); + + tc->mws[i].local = NULL; + } +} + static void tool_setup_dbgfs(struct tool_ctx *tc) { + int i; + /* This modules is useless without dbgfs... */ if (!tool_dbgfs) { tc->dbgfs = NULL; @@ -472,12 +887,31 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, tc, &tool_peer_spad_fops); + + debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_link_fops); + + debugfs_create_file("link_event", S_IWUSR, tc->dbgfs, + tc, &tool_link_event_fops); + + for (i = 0; i < tc->mw_count; i++) { + char buf[30]; + + snprintf(buf, sizeof(buf), "mw%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_mw_fops); + + snprintf(buf, sizeof(buf), "peer_trans%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_peer_mw_trans_fops); + } } static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc; int rc; + int i; if (ntb_db_is_unsafe(ntb)) dev_dbg(&ntb->dev, "doorbell is unsafe\n"); @@ -485,13 +919,21 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) if (ntb_spad_is_unsafe(ntb)) dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); - tc = kmalloc(sizeof(*tc), GFP_KERNEL); + tc = kzalloc(sizeof(*tc), GFP_KERNEL); if (!tc) { rc = -ENOMEM; goto err_tc; } tc->ntb = ntb; + init_waitqueue_head(&tc->link_wq); + + tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); + for (i = 0; i < tc->mw_count; i++) { + rc = tool_init_mw(tc, i); + if (rc) + goto err_ctx; + } tool_setup_dbgfs(tc); @@ -505,6 +947,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) return 0; err_ctx: + tool_free_mws(tc); debugfs_remove_recursive(tc->dbgfs); kfree(tc); err_tc: @@ -515,6 +958,8 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc = ntb->ctx; + tool_free_mws(tc); + ntb_clear_ctx(ntb); ntb_link_disable(ntb); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 9dce03f420eb..7cf3bdfaf809 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1133,11 +1133,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, struct page *page, unsigned int len, unsigned int off, - int rw, sector_t sector) + int op, sector_t sector) { int ret; - if (rw == READ) { + if (!op_is_write(op)) { ret = btt_read_pg(btt, bip, page, off, sector, len); flush_dcache_page(page); } else { @@ -1155,7 +1155,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) struct bvec_iter iter; unsigned long start; struct bio_vec bvec; - int err = 0, rw; + int err = 0; bool do_acct; /* @@ -1170,7 +1170,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } do_acct = nd_iostat_start(bio, &start); - rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; @@ -1181,11 +1180,12 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) BUG_ON(len % btt->sector_size); err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, - rw, iter.bi_sector); + bio_op(bio), iter.bi_sector); if (err) { dev_info(&btt->nd_btt->dev, "io error in %s sector %lld, len %d,\n", - (rw == READ) ? "READ" : "WRITE", + (op_is_write(bio_op(bio))) ? "WRITE" : + "READ", (unsigned long long) iter.bi_sector, len); bio->bi_error = err; break; @@ -1200,12 +1200,12 @@ out: } static int btt_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { struct btt *btt = bdev->bd_disk->private_data; - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, 0); + btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, op, sector); + page_endio(page, op, 0); return 0; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b511099457db..d64d92481c1d 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -67,7 +67,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, } static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, int rw, + unsigned int len, unsigned int off, int op, sector_t sector) { int rc = 0; @@ -79,7 +79,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (rw == READ) { + if (!op_is_write(op)) { if (unlikely(bad_pmem)) rc = -EIO; else { @@ -134,7 +134,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, bio_data_dir(bio), + bvec.bv_offset, bio_op(bio), iter.bi_sector); if (rc) { bio->bi_error = rc; @@ -152,12 +152,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { struct pmem_device *pmem = bdev->bd_queue->queuedata; int rc; - rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); + rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, op, sector); /* * The ->rw_page interface is subtle and tricky. The core @@ -166,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, * caused by double completion. */ if (rc == 0) - page_endio(page, rw & WRITE, 0); + page_endio(page, op, 0); return rc; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4cb9b156cab7..d7c33f9361aa 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1661,14 +1661,9 @@ static int nvme_pci_enable(struct nvme_dev *dev) static void nvme_dev_unmap(struct nvme_dev *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); - int bars; - if (dev->bar) iounmap(dev->bar); - - bars = pci_select_bars(pdev, IORESOURCE_MEM); - pci_release_selected_regions(pdev, bars); + pci_release_mem_regions(to_pci_dev(dev->dev)); } static void nvme_pci_disable(struct nvme_dev *dev) @@ -1897,13 +1892,9 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { static int nvme_dev_map(struct nvme_dev *dev) { - int bars; struct pci_dev *pdev = to_pci_dev(dev->dev); - bars = pci_select_bars(pdev, IORESOURCE_MEM); - if (!bars) - return -ENODEV; - if (pci_request_selected_regions(pdev, bars, "nvme")) + if (pci_request_mem_regions(pdev, "nvme")) return -ENODEV; dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); @@ -1912,7 +1903,7 @@ static int nvme_dev_map(struct nvme_dev *dev) return 0; release: - pci_release_selected_regions(pdev, bars); + pci_release_mem_regions(pdev); return -ENODEV; } diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 765390e3ed8d..8aa197691074 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -499,8 +499,24 @@ EXPORT_SYMBOL_GPL(of_platform_default_populate); static int __init of_platform_default_populate_init(void) { - if (of_have_populated_dt()) - of_platform_default_populate(NULL, NULL, NULL); + struct device_node *node; + + if (!of_have_populated_dt()) + return -ENODEV; + + /* + * Handle ramoops explicitly, since it is inside /reserved-memory, + * which lacks a "compatible" property. + */ + node = of_find_node_by_path("/reserved-memory"); + if (node) { + node = of_find_compatible_node(node, NULL, "ramoops"); + if (node) + of_platform_device_create(node, NULL, NULL); + } + + /* Populate everything else. */ + of_platform_default_populate(NULL, NULL, NULL); return 0; } diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index e24b05996a1b..3ed6238f8f6e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -790,7 +790,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size, static dma_addr_t ccio_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return ccio_map_single(dev, page_address(page) + offset, size, direction); @@ -806,7 +806,7 @@ ccio_map_page(struct device *dev, struct page *page, unsigned long offset, */ static void ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; unsigned long flags; @@ -844,7 +844,7 @@ ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size, */ static void * ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; #if 0 @@ -878,9 +878,9 @@ ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, */ static void ccio_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { - ccio_unmap_page(dev, dma_handle, size, 0, NULL); + ccio_unmap_page(dev, dma_handle, size, 0, 0); free_pages((unsigned long)cpu_addr, get_order(size)); } @@ -907,7 +907,7 @@ ccio_free(struct device *dev, size_t size, void *cpu_addr, */ static int ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -984,7 +984,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, */ static void ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; @@ -1004,7 +1004,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT; #endif ccio_unmap_page(dev, sg_dma_address(sglist), - sg_dma_len(sglist), direction, NULL); + sg_dma_len(sglist), direction, 0); ++sglist; } diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 42ec4600b7e4..151b86b6d2e2 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -783,7 +783,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, static dma_addr_t sba_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return sba_map_single(dev, page_address(page) + offset, size, direction); @@ -801,7 +801,7 @@ sba_map_page(struct device *dev, struct page *page, unsigned long offset, */ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; #if DELAYED_RESOURCE_CNT > 0 @@ -876,7 +876,7 @@ sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, * See Documentation/DMA-API-HOWTO.txt */ static void *sba_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, struct dma_attrs *attrs) + gfp_t gfp, unsigned long attrs) { void *ret; @@ -908,9 +908,9 @@ static void *sba_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle */ static void sba_free(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { - sba_unmap_page(hwdev, dma_handle, size, 0, NULL); + sba_unmap_page(hwdev, dma_handle, size, 0, 0); free_pages((unsigned long) vaddr, get_order(size)); } @@ -943,7 +943,7 @@ int dump_run_sg = 0; */ static int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -1026,7 +1026,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, */ static void sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; #ifdef ASSERT_PDIR_SANITY @@ -1051,7 +1051,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, while (sg_dma_len(sglist) && nents--) { sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), - direction, NULL); + direction, 0); #ifdef SBA_COLLECT_STATS ioc->usg_pages += ((sg_dma_address(sglist) & ~IOVP_MASK) + sg_dma_len(sglist) + IOVP_SIZE - 1) >> PAGE_SHIFT; ioc->usingle_calls--; /* kluge since call is unmap_sg() */ diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 56389be5d08b..67f9916ff14d 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -25,7 +25,7 @@ config PCI_MSI If you don't know what to do here, say Y. config PCI_MSI_IRQ_DOMAIN - bool + def_bool ARM || ARM64 || X86 depends on PCI_MSI select GENERIC_MSI_IRQ_DOMAIN diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index dd7cdbee8029..c288e5a52575 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -91,6 +91,35 @@ void pci_bus_remove_resources(struct pci_bus *bus) } } +int devm_request_pci_bus_resources(struct device *dev, + struct list_head *resources) +{ + struct resource_entry *win; + struct resource *parent, *res; + int err; + + resource_list_for_each_entry(win, resources) { + res = win->res; + switch (resource_type(res)) { + case IORESOURCE_IO: + parent = &ioport_resource; + break; + case IORESOURCE_MEM: + parent = &iomem_resource; + break; + default: + continue; + } + + err = devm_request_resource(dev, parent, res); + if (err) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(devm_request_pci_bus_resources); + static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL}; #ifdef CONFIG_PCI_BUS_ADDR_T_64BIT static struct pci_bus_region pci_64_bit = {0, @@ -291,6 +320,7 @@ void pci_bus_add_device(struct pci_dev *dev) pci_fixup_device(pci_fixup_final, dev); pci_create_sysfs_dev_files(dev); pci_proc_attach_device(dev); + pci_bridge_d3_device_changed(dev); dev->match_driver = true; retval = device_attach(&dev->dev); @@ -397,4 +427,3 @@ void pci_bus_put(struct pci_bus *bus) put_device(&bus->dev); } EXPORT_SYMBOL(pci_bus_put); - diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index f9832ad8efe2..43ed08dd8b01 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -19,16 +19,15 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/pci-ecam.h> #include <linux/slab.h> -#include "ecam.h" - /* * On 64-bit systems, we do a single ioremap for the whole config space * since we have enough virtual address range available. On 32-bit, we * ioremap the config space for each bus individually. */ -static const bool per_bus_mapping = !config_enabled(CONFIG_64BIT); +static const bool per_bus_mapping = !IS_ENABLED(CONFIG_64BIT); /* * Create a PCI config space window @@ -52,6 +51,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, if (!cfg) return ERR_PTR(-ENOMEM); + cfg->parent = dev; cfg->ops = ops; cfg->busr.start = busr->start; cfg->busr.end = busr->end; @@ -95,7 +95,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, } if (ops->init) { - err = ops->init(dev, cfg); + err = ops->init(cfg); if (err) goto err_exit; } diff --git a/drivers/pci/ecam.h b/drivers/pci/ecam.h deleted file mode 100644 index 9878bebd45bb..000000000000 --- a/drivers/pci/ecam.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2016 Broadcom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation (the "GPL"). - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 (GPLv2) for more details. - * - * You should have received a copy of the GNU General Public License - * version 2 (GPLv2) along with this source code. - */ -#ifndef DRIVERS_PCI_ECAM_H -#define DRIVERS_PCI_ECAM_H - -#include <linux/kernel.h> -#include <linux/platform_device.h> - -/* - * struct to hold pci ops and bus shift of the config window - * for a PCI controller. - */ -struct pci_config_window; -struct pci_ecam_ops { - unsigned int bus_shift; - struct pci_ops pci_ops; - int (*init)(struct device *, - struct pci_config_window *); -}; - -/* - * struct to hold the mappings of a config space window. This - * is expected to be used as sysdata for PCI controllers that - * use ECAM. - */ -struct pci_config_window { - struct resource res; - struct resource busr; - void *priv; - struct pci_ecam_ops *ops; - union { - void __iomem *win; /* 64-bit single mapping */ - void __iomem **winp; /* 32-bit per-bus mapping */ - }; -}; - -/* create and free pci_config_window */ -struct pci_config_window *pci_ecam_create(struct device *dev, - struct resource *cfgres, struct resource *busr, - struct pci_ecam_ops *ops); -void pci_ecam_free(struct pci_config_window *cfg); - -/* map_bus when ->sysdata is an instance of pci_config_window */ -void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn, - int where); -/* default ECAM ops */ -extern struct pci_ecam_ops pci_generic_ecam_ops; - -#ifdef CONFIG_PCI_HOST_GENERIC -/* for DT-based PCI controllers that support ECAM */ -int pci_host_common_probe(struct platform_device *pdev, - struct pci_ecam_ops *ops); -#endif -#endif diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig index 5d2374e4ee7f..9b485d873b0d 100644 --- a/drivers/pci/host/Kconfig +++ b/drivers/pci/host/Kconfig @@ -3,8 +3,9 @@ menu "PCI host controller drivers" config PCI_DRA7XX bool "TI DRA7xx PCIe controller" - select PCIE_DW depends on OF && HAS_IOMEM && TI_PIPE3 + depends on PCI_MSI_IRQ_DOMAIN + select PCIE_DW help Enables support for the PCIe controller in the DRA7xx SoC. There are two instances of PCIe controller in DRA7xx. This controller can @@ -16,11 +17,20 @@ config PCI_MVEBU depends on ARM depends on OF +config PCI_AARDVARK + bool "Aardvark PCIe controller" + depends on ARCH_MVEBU && ARM64 + depends on OF + depends on PCI_MSI_IRQ_DOMAIN + help + Add support for Aardvark 64bit PCIe Host Controller. This + controller is part of the South Bridge of the Marvel Armada + 3700 SoC. config PCIE_XILINX_NWL bool "NWL PCIe Core" depends on ARCH_ZYNQMP - select PCI_MSI_IRQ_DOMAIN if PCI_MSI + depends on PCI_MSI_IRQ_DOMAIN help Say 'Y' here if you want kernel support for Xilinx NWL PCIe controller. The controller can act as Root Port @@ -29,6 +39,7 @@ config PCIE_XILINX_NWL config PCIE_DW_PLAT bool "Platform bus based DesignWare PCIe Controller" + depends on PCI_MSI_IRQ_DOMAIN select PCIE_DW ---help--- This selects the DesignWare PCIe controller support. Select this if @@ -40,16 +51,19 @@ config PCIE_DW_PLAT config PCIE_DW bool + depends on PCI_MSI_IRQ_DOMAIN config PCI_EXYNOS bool "Samsung Exynos PCIe controller" depends on SOC_EXYNOS5440 + depends on PCI_MSI_IRQ_DOMAIN select PCIEPORTBUS select PCIE_DW config PCI_IMX6 bool "Freescale i.MX6 PCIe controller" depends on SOC_IMX6Q + depends on PCI_MSI_IRQ_DOMAIN select PCIEPORTBUS select PCIE_DW @@ -72,8 +86,7 @@ config PCI_RCAR_GEN2 config PCIE_RCAR bool "Renesas R-Car PCIe controller" depends on ARCH_RENESAS || (ARM && COMPILE_TEST) - select PCI_MSI - select PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI_IRQ_DOMAIN help Say Y here if you want PCIe controller support on R-Car SoCs. @@ -85,6 +98,7 @@ config PCI_HOST_GENERIC bool "Generic PCI host controller" depends on (ARM || ARM64) && OF select PCI_HOST_COMMON + select IRQ_DOMAIN help Say Y here if you want to support a simple generic PCI host controller, such as the one emulated by kvmtool. @@ -92,6 +106,7 @@ config PCI_HOST_GENERIC config PCIE_SPEAR13XX bool "STMicroelectronics SPEAr PCIe controller" depends on ARCH_SPEAR13XX + depends on PCI_MSI_IRQ_DOMAIN select PCIEPORTBUS select PCIE_DW help @@ -100,6 +115,7 @@ config PCIE_SPEAR13XX config PCI_KEYSTONE bool "TI Keystone PCIe controller" depends on ARCH_KEYSTONE + depends on PCI_MSI_IRQ_DOMAIN select PCIE_DW select PCIEPORTBUS help @@ -120,7 +136,6 @@ config PCI_XGENE depends on ARCH_XGENE depends on OF select PCIEPORTBUS - select PCI_MSI_IRQ_DOMAIN if PCI_MSI help Say Y here if you want internal PCI support on APM X-Gene SoC. There are 5 internal PCIe ports available. Each port is GEN3 capable @@ -128,7 +143,8 @@ config PCI_XGENE config PCI_XGENE_MSI bool "X-Gene v1 PCIe MSI feature" - depends on PCI_XGENE && PCI_MSI + depends on PCI_XGENE + depends on PCI_MSI_IRQ_DOMAIN default y help Say Y here if you want PCIe MSI support for the APM X-Gene v1 SoC. @@ -137,6 +153,7 @@ config PCI_XGENE_MSI config PCI_LAYERSCAPE bool "Freescale Layerscape PCIe controller" depends on OF && (ARM || ARCH_LAYERSCAPE) + depends on PCI_MSI_IRQ_DOMAIN select PCIE_DW select MFD_SYSCON help @@ -177,8 +194,7 @@ config PCIE_IPROC_BCMA config PCIE_IPROC_MSI bool "Broadcom iProc PCIe MSI support" depends on PCIE_IPROC_PLATFORM || PCIE_IPROC_BCMA - depends on PCI_MSI - select PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI_IRQ_DOMAIN default ARCH_BCM_IPROC help Say Y here if you want to enable MSI support for Broadcom's iProc @@ -195,8 +211,8 @@ config PCIE_ALTERA config PCIE_ALTERA_MSI bool "Altera PCIe MSI feature" - depends on PCIE_ALTERA && PCI_MSI - select PCI_MSI_IRQ_DOMAIN + depends on PCIE_ALTERA + depends on PCI_MSI_IRQ_DOMAIN help Say Y here if you want PCIe MSI support for the Altera FPGA. This MSI driver supports Altera MSI to GIC controller IP. @@ -204,6 +220,7 @@ config PCIE_ALTERA_MSI config PCI_HISI depends on OF && ARM64 bool "HiSilicon Hip05 and Hip06 SoCs PCIe controllers" + depends on PCI_MSI_IRQ_DOMAIN select PCIEPORTBUS select PCIE_DW help @@ -213,6 +230,7 @@ config PCI_HISI config PCIE_QCOM bool "Qualcomm PCIe controller" depends on ARCH_QCOM && OF + depends on PCI_MSI_IRQ_DOMAIN select PCIE_DW select PCIEPORTBUS help @@ -237,6 +255,7 @@ config PCI_HOST_THUNDER_ECAM config PCIE_ARMADA_8K bool "Marvell Armada-8K PCIe controller" depends on ARCH_MVEBU + depends on PCI_MSI_IRQ_DOMAIN select PCIE_DW select PCIEPORTBUS help @@ -245,4 +264,14 @@ config PCIE_ARMADA_8K Designware hardware and therefore the driver re-uses the Designware core functions to implement the driver. +config PCIE_ARTPEC6 + bool "Axis ARTPEC-6 PCIe controller" + depends on MACH_ARTPEC6 + depends on PCI_MSI_IRQ_DOMAIN + select PCIE_DW + select PCIEPORTBUS + help + Say Y here to enable PCIe controller support on Axis ARTPEC-6 + SoCs. This PCIe controller uses the DesignWare core. + endmenu diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile index 9c8698e89e96..88434101e4c4 100644 --- a/drivers/pci/host/Makefile +++ b/drivers/pci/host/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o obj-$(CONFIG_PCI_IMX6) += pci-imx6.o obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o +obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o obj-$(CONFIG_PCI_RCAR_GEN2) += pci-rcar-gen2.o obj-$(CONFIG_PCIE_RCAR) += pcie-rcar.o @@ -29,3 +30,4 @@ obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o +obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c new file mode 100644 index 000000000000..ef9893fa3176 --- /dev/null +++ b/drivers/pci/host/pci-aardvark.c @@ -0,0 +1,1001 @@ +/* + * Driver for the Aardvark PCIe controller, used on Marvell Armada + * 3700. + * + * Copyright (C) 2016 Marvell + * + * Author: Hezi Shahmoon <hezi.shahmoon@marvell.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/of_address.h> +#include <linux/of_pci.h> + +/* PCIe core registers */ +#define PCIE_CORE_CMD_STATUS_REG 0x4 +#define PCIE_CORE_CMD_IO_ACCESS_EN BIT(0) +#define PCIE_CORE_CMD_MEM_ACCESS_EN BIT(1) +#define PCIE_CORE_CMD_MEM_IO_REQ_EN BIT(2) +#define PCIE_CORE_DEV_CTRL_STATS_REG 0xc8 +#define PCIE_CORE_DEV_CTRL_STATS_RELAX_ORDER_DISABLE (0 << 4) +#define PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT 5 +#define PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE (0 << 11) +#define PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT 12 +#define PCIE_CORE_LINK_CTRL_STAT_REG 0xd0 +#define PCIE_CORE_LINK_L0S_ENTRY BIT(0) +#define PCIE_CORE_LINK_TRAINING BIT(5) +#define PCIE_CORE_LINK_WIDTH_SHIFT 20 +#define PCIE_CORE_ERR_CAPCTL_REG 0x118 +#define PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX BIT(5) +#define PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN BIT(6) +#define PCIE_CORE_ERR_CAPCTL_ECRC_CHCK BIT(7) +#define PCIE_CORE_ERR_CAPCTL_ECRC_CHCK_RCV BIT(8) + +/* PIO registers base address and register offsets */ +#define PIO_BASE_ADDR 0x4000 +#define PIO_CTRL (PIO_BASE_ADDR + 0x0) +#define PIO_CTRL_TYPE_MASK GENMASK(3, 0) +#define PIO_CTRL_ADDR_WIN_DISABLE BIT(24) +#define PIO_STAT (PIO_BASE_ADDR + 0x4) +#define PIO_COMPLETION_STATUS_SHIFT 7 +#define PIO_COMPLETION_STATUS_MASK GENMASK(9, 7) +#define PIO_COMPLETION_STATUS_OK 0 +#define PIO_COMPLETION_STATUS_UR 1 +#define PIO_COMPLETION_STATUS_CRS 2 +#define PIO_COMPLETION_STATUS_CA 4 +#define PIO_NON_POSTED_REQ BIT(0) +#define PIO_ADDR_LS (PIO_BASE_ADDR + 0x8) +#define PIO_ADDR_MS (PIO_BASE_ADDR + 0xc) +#define PIO_WR_DATA (PIO_BASE_ADDR + 0x10) +#define PIO_WR_DATA_STRB (PIO_BASE_ADDR + 0x14) +#define PIO_RD_DATA (PIO_BASE_ADDR + 0x18) +#define PIO_START (PIO_BASE_ADDR + 0x1c) +#define PIO_ISR (PIO_BASE_ADDR + 0x20) +#define PIO_ISRM (PIO_BASE_ADDR + 0x24) + +/* Aardvark Control registers */ +#define CONTROL_BASE_ADDR 0x4800 +#define PCIE_CORE_CTRL0_REG (CONTROL_BASE_ADDR + 0x0) +#define PCIE_GEN_SEL_MSK 0x3 +#define PCIE_GEN_SEL_SHIFT 0x0 +#define SPEED_GEN_1 0 +#define SPEED_GEN_2 1 +#define SPEED_GEN_3 2 +#define IS_RC_MSK 1 +#define IS_RC_SHIFT 2 +#define LANE_CNT_MSK 0x18 +#define LANE_CNT_SHIFT 0x3 +#define LANE_COUNT_1 (0 << LANE_CNT_SHIFT) +#define LANE_COUNT_2 (1 << LANE_CNT_SHIFT) +#define LANE_COUNT_4 (2 << LANE_CNT_SHIFT) +#define LANE_COUNT_8 (3 << LANE_CNT_SHIFT) +#define LINK_TRAINING_EN BIT(6) +#define LEGACY_INTA BIT(28) +#define LEGACY_INTB BIT(29) +#define LEGACY_INTC BIT(30) +#define LEGACY_INTD BIT(31) +#define PCIE_CORE_CTRL1_REG (CONTROL_BASE_ADDR + 0x4) +#define HOT_RESET_GEN BIT(0) +#define PCIE_CORE_CTRL2_REG (CONTROL_BASE_ADDR + 0x8) +#define PCIE_CORE_CTRL2_RESERVED 0x7 +#define PCIE_CORE_CTRL2_TD_ENABLE BIT(4) +#define PCIE_CORE_CTRL2_STRICT_ORDER_ENABLE BIT(5) +#define PCIE_CORE_CTRL2_OB_WIN_ENABLE BIT(6) +#define PCIE_CORE_CTRL2_MSI_ENABLE BIT(10) +#define PCIE_ISR0_REG (CONTROL_BASE_ADDR + 0x40) +#define PCIE_ISR0_MASK_REG (CONTROL_BASE_ADDR + 0x44) +#define PCIE_ISR0_MSI_INT_PENDING BIT(24) +#define PCIE_ISR0_INTX_ASSERT(val) BIT(16 + (val)) +#define PCIE_ISR0_INTX_DEASSERT(val) BIT(20 + (val)) +#define PCIE_ISR0_ALL_MASK GENMASK(26, 0) +#define PCIE_ISR1_REG (CONTROL_BASE_ADDR + 0x48) +#define PCIE_ISR1_MASK_REG (CONTROL_BASE_ADDR + 0x4C) +#define PCIE_ISR1_POWER_STATE_CHANGE BIT(4) +#define PCIE_ISR1_FLUSH BIT(5) +#define PCIE_ISR1_ALL_MASK GENMASK(5, 4) +#define PCIE_MSI_ADDR_LOW_REG (CONTROL_BASE_ADDR + 0x50) +#define PCIE_MSI_ADDR_HIGH_REG (CONTROL_BASE_ADDR + 0x54) +#define PCIE_MSI_STATUS_REG (CONTROL_BASE_ADDR + 0x58) +#define PCIE_MSI_MASK_REG (CONTROL_BASE_ADDR + 0x5C) +#define PCIE_MSI_PAYLOAD_REG (CONTROL_BASE_ADDR + 0x9C) + +/* PCIe window configuration */ +#define OB_WIN_BASE_ADDR 0x4c00 +#define OB_WIN_BLOCK_SIZE 0x20 +#define OB_WIN_REG_ADDR(win, offset) (OB_WIN_BASE_ADDR + \ + OB_WIN_BLOCK_SIZE * (win) + \ + (offset)) +#define OB_WIN_MATCH_LS(win) OB_WIN_REG_ADDR(win, 0x00) +#define OB_WIN_MATCH_MS(win) OB_WIN_REG_ADDR(win, 0x04) +#define OB_WIN_REMAP_LS(win) OB_WIN_REG_ADDR(win, 0x08) +#define OB_WIN_REMAP_MS(win) OB_WIN_REG_ADDR(win, 0x0c) +#define OB_WIN_MASK_LS(win) OB_WIN_REG_ADDR(win, 0x10) +#define OB_WIN_MASK_MS(win) OB_WIN_REG_ADDR(win, 0x14) +#define OB_WIN_ACTIONS(win) OB_WIN_REG_ADDR(win, 0x18) + +/* PCIe window types */ +#define OB_PCIE_MEM 0x0 +#define OB_PCIE_IO 0x4 + +/* LMI registers base address and register offsets */ +#define LMI_BASE_ADDR 0x6000 +#define CFG_REG (LMI_BASE_ADDR + 0x0) +#define LTSSM_SHIFT 24 +#define LTSSM_MASK 0x3f +#define LTSSM_L0 0x10 +#define RC_BAR_CONFIG 0x300 + +/* PCIe core controller registers */ +#define CTRL_CORE_BASE_ADDR 0x18000 +#define CTRL_CONFIG_REG (CTRL_CORE_BASE_ADDR + 0x0) +#define CTRL_MODE_SHIFT 0x0 +#define CTRL_MODE_MASK 0x1 +#define PCIE_CORE_MODE_DIRECT 0x0 +#define PCIE_CORE_MODE_COMMAND 0x1 + +/* PCIe Central Interrupts Registers */ +#define CENTRAL_INT_BASE_ADDR 0x1b000 +#define HOST_CTRL_INT_STATUS_REG (CENTRAL_INT_BASE_ADDR + 0x0) +#define HOST_CTRL_INT_MASK_REG (CENTRAL_INT_BASE_ADDR + 0x4) +#define PCIE_IRQ_CMDQ_INT BIT(0) +#define PCIE_IRQ_MSI_STATUS_INT BIT(1) +#define PCIE_IRQ_CMD_SENT_DONE BIT(3) +#define PCIE_IRQ_DMA_INT BIT(4) +#define PCIE_IRQ_IB_DXFERDONE BIT(5) +#define PCIE_IRQ_OB_DXFERDONE BIT(6) +#define PCIE_IRQ_OB_RXFERDONE BIT(7) +#define PCIE_IRQ_COMPQ_INT BIT(12) +#define PCIE_IRQ_DIR_RD_DDR_DET BIT(13) +#define PCIE_IRQ_DIR_WR_DDR_DET BIT(14) +#define PCIE_IRQ_CORE_INT BIT(16) +#define PCIE_IRQ_CORE_INT_PIO BIT(17) +#define PCIE_IRQ_DPMU_INT BIT(18) +#define PCIE_IRQ_PCIE_MIS_INT BIT(19) +#define PCIE_IRQ_MSI_INT1_DET BIT(20) +#define PCIE_IRQ_MSI_INT2_DET BIT(21) +#define PCIE_IRQ_RC_DBELL_DET BIT(22) +#define PCIE_IRQ_EP_STATUS BIT(23) +#define PCIE_IRQ_ALL_MASK 0xfff0fb +#define PCIE_IRQ_ENABLE_INTS_MASK PCIE_IRQ_CORE_INT + +/* Transaction types */ +#define PCIE_CONFIG_RD_TYPE0 0x8 +#define PCIE_CONFIG_RD_TYPE1 0x9 +#define PCIE_CONFIG_WR_TYPE0 0xa +#define PCIE_CONFIG_WR_TYPE1 0xb + +/* PCI_BDF shifts 8bit, so we need extra 4bit shift */ +#define PCIE_BDF(dev) (dev << 4) +#define PCIE_CONF_BUS(bus) (((bus) & 0xff) << 20) +#define PCIE_CONF_DEV(dev) (((dev) & 0x1f) << 15) +#define PCIE_CONF_FUNC(fun) (((fun) & 0x7) << 12) +#define PCIE_CONF_REG(reg) ((reg) & 0xffc) +#define PCIE_CONF_ADDR(bus, devfn, where) \ + (PCIE_CONF_BUS(bus) | PCIE_CONF_DEV(PCI_SLOT(devfn)) | \ + PCIE_CONF_FUNC(PCI_FUNC(devfn)) | PCIE_CONF_REG(where)) + +#define PIO_TIMEOUT_MS 1 + +#define LINK_WAIT_MAX_RETRIES 10 +#define LINK_WAIT_USLEEP_MIN 90000 +#define LINK_WAIT_USLEEP_MAX 100000 + +#define LEGACY_IRQ_NUM 4 +#define MSI_IRQ_NUM 32 + +struct advk_pcie { + struct platform_device *pdev; + void __iomem *base; + struct list_head resources; + struct irq_domain *irq_domain; + struct irq_chip irq_chip; + struct msi_controller msi; + struct irq_domain *msi_domain; + struct irq_chip msi_irq_chip; + DECLARE_BITMAP(msi_irq_in_use, MSI_IRQ_NUM); + struct mutex msi_used_lock; + u16 msi_msg; + int root_bus_nr; +}; + +static inline void advk_writel(struct advk_pcie *pcie, u32 val, u64 reg) +{ + writel(val, pcie->base + reg); +} + +static inline u32 advk_readl(struct advk_pcie *pcie, u64 reg) +{ + return readl(pcie->base + reg); +} + +static int advk_pcie_link_up(struct advk_pcie *pcie) +{ + u32 val, ltssm_state; + + val = advk_readl(pcie, CFG_REG); + ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK; + return ltssm_state >= LTSSM_L0; +} + +static int advk_pcie_wait_for_link(struct advk_pcie *pcie) +{ + int retries; + + /* check if the link is up or not */ + for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) { + if (advk_pcie_link_up(pcie)) { + dev_info(&pcie->pdev->dev, "link up\n"); + return 0; + } + + usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX); + } + + dev_err(&pcie->pdev->dev, "link never came up\n"); + + return -ETIMEDOUT; +} + +/* + * Set PCIe address window register which could be used for memory + * mapping. + */ +static void advk_pcie_set_ob_win(struct advk_pcie *pcie, + u32 win_num, u32 match_ms, + u32 match_ls, u32 mask_ms, + u32 mask_ls, u32 remap_ms, + u32 remap_ls, u32 action) +{ + advk_writel(pcie, match_ls, OB_WIN_MATCH_LS(win_num)); + advk_writel(pcie, match_ms, OB_WIN_MATCH_MS(win_num)); + advk_writel(pcie, mask_ms, OB_WIN_MASK_MS(win_num)); + advk_writel(pcie, mask_ls, OB_WIN_MASK_LS(win_num)); + advk_writel(pcie, remap_ms, OB_WIN_REMAP_MS(win_num)); + advk_writel(pcie, remap_ls, OB_WIN_REMAP_LS(win_num)); + advk_writel(pcie, action, OB_WIN_ACTIONS(win_num)); + advk_writel(pcie, match_ls | BIT(0), OB_WIN_MATCH_LS(win_num)); +} + +static void advk_pcie_setup_hw(struct advk_pcie *pcie) +{ + u32 reg; + int i; + + /* Point PCIe unit MBUS decode windows to DRAM space */ + for (i = 0; i < 8; i++) + advk_pcie_set_ob_win(pcie, i, 0, 0, 0, 0, 0, 0, 0); + + /* Set to Direct mode */ + reg = advk_readl(pcie, CTRL_CONFIG_REG); + reg &= ~(CTRL_MODE_MASK << CTRL_MODE_SHIFT); + reg |= ((PCIE_CORE_MODE_DIRECT & CTRL_MODE_MASK) << CTRL_MODE_SHIFT); + advk_writel(pcie, reg, CTRL_CONFIG_REG); + + /* Set PCI global control register to RC mode */ + reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); + reg |= (IS_RC_MSK << IS_RC_SHIFT); + advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + + /* Set Advanced Error Capabilities and Control PF0 register */ + reg = PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX | + PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN | + PCIE_CORE_ERR_CAPCTL_ECRC_CHCK | + PCIE_CORE_ERR_CAPCTL_ECRC_CHCK_RCV; + advk_writel(pcie, reg, PCIE_CORE_ERR_CAPCTL_REG); + + /* Set PCIe Device Control and Status 1 PF0 register */ + reg = PCIE_CORE_DEV_CTRL_STATS_RELAX_ORDER_DISABLE | + (7 << PCIE_CORE_DEV_CTRL_STATS_MAX_PAYLOAD_SZ_SHIFT) | + PCIE_CORE_DEV_CTRL_STATS_SNOOP_DISABLE | + PCIE_CORE_DEV_CTRL_STATS_MAX_RD_REQ_SIZE_SHIFT; + advk_writel(pcie, reg, PCIE_CORE_DEV_CTRL_STATS_REG); + + /* Program PCIe Control 2 to disable strict ordering */ + reg = PCIE_CORE_CTRL2_RESERVED | + PCIE_CORE_CTRL2_TD_ENABLE; + advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG); + + /* Set GEN2 */ + reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); + reg &= ~PCIE_GEN_SEL_MSK; + reg |= SPEED_GEN_2; + advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + + /* Set lane X1 */ + reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); + reg &= ~LANE_CNT_MSK; + reg |= LANE_COUNT_1; + advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + + /* Enable link training */ + reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); + reg |= LINK_TRAINING_EN; + advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + + /* Enable MSI */ + reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); + reg |= PCIE_CORE_CTRL2_MSI_ENABLE; + advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG); + + /* Clear all interrupts */ + advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG); + advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG); + advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG); + + /* Disable All ISR0/1 Sources */ + reg = PCIE_ISR0_ALL_MASK; + reg &= ~PCIE_ISR0_MSI_INT_PENDING; + advk_writel(pcie, reg, PCIE_ISR0_MASK_REG); + + advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG); + + /* Unmask all MSI's */ + advk_writel(pcie, 0, PCIE_MSI_MASK_REG); + + /* Enable summary interrupt for GIC SPI source */ + reg = PCIE_IRQ_ALL_MASK & (~PCIE_IRQ_ENABLE_INTS_MASK); + advk_writel(pcie, reg, HOST_CTRL_INT_MASK_REG); + + reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG); + reg |= PCIE_CORE_CTRL2_OB_WIN_ENABLE; + advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG); + + /* Bypass the address window mapping for PIO */ + reg = advk_readl(pcie, PIO_CTRL); + reg |= PIO_CTRL_ADDR_WIN_DISABLE; + advk_writel(pcie, reg, PIO_CTRL); + + /* Start link training */ + reg = advk_readl(pcie, PCIE_CORE_LINK_CTRL_STAT_REG); + reg |= PCIE_CORE_LINK_TRAINING; + advk_writel(pcie, reg, PCIE_CORE_LINK_CTRL_STAT_REG); + + advk_pcie_wait_for_link(pcie); + + reg = PCIE_CORE_LINK_L0S_ENTRY | + (1 << PCIE_CORE_LINK_WIDTH_SHIFT); + advk_writel(pcie, reg, PCIE_CORE_LINK_CTRL_STAT_REG); + + reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); + reg |= PCIE_CORE_CMD_MEM_ACCESS_EN | + PCIE_CORE_CMD_IO_ACCESS_EN | + PCIE_CORE_CMD_MEM_IO_REQ_EN; + advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG); +} + +static void advk_pcie_check_pio_status(struct advk_pcie *pcie) +{ + u32 reg; + unsigned int status; + char *strcomp_status, *str_posted; + + reg = advk_readl(pcie, PIO_STAT); + status = (reg & PIO_COMPLETION_STATUS_MASK) >> + PIO_COMPLETION_STATUS_SHIFT; + + if (!status) + return; + + switch (status) { + case PIO_COMPLETION_STATUS_UR: + strcomp_status = "UR"; + break; + case PIO_COMPLETION_STATUS_CRS: + strcomp_status = "CRS"; + break; + case PIO_COMPLETION_STATUS_CA: + strcomp_status = "CA"; + break; + default: + strcomp_status = "Unknown"; + break; + } + + if (reg & PIO_NON_POSTED_REQ) + str_posted = "Non-posted"; + else + str_posted = "Posted"; + + dev_err(&pcie->pdev->dev, "%s PIO Response Status: %s, %#x @ %#x\n", + str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS)); +} + +static int advk_pcie_wait_pio(struct advk_pcie *pcie) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(PIO_TIMEOUT_MS); + + while (time_before(jiffies, timeout)) { + u32 start, isr; + + start = advk_readl(pcie, PIO_START); + isr = advk_readl(pcie, PIO_ISR); + if (!start && isr) + return 0; + } + + dev_err(&pcie->pdev->dev, "config read/write timed out\n"); + return -ETIMEDOUT; +} + +static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, + int where, int size, u32 *val) +{ + struct advk_pcie *pcie = bus->sysdata; + u32 reg; + int ret; + + if (PCI_SLOT(devfn) != 0) { + *val = 0xffffffff; + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* Start PIO */ + advk_writel(pcie, 0, PIO_START); + advk_writel(pcie, 1, PIO_ISR); + + /* Program the control register */ + reg = advk_readl(pcie, PIO_CTRL); + reg &= ~PIO_CTRL_TYPE_MASK; + if (bus->number == pcie->root_bus_nr) + reg |= PCIE_CONFIG_RD_TYPE0; + else + reg |= PCIE_CONFIG_RD_TYPE1; + advk_writel(pcie, reg, PIO_CTRL); + + /* Program the address registers */ + reg = PCIE_BDF(devfn) | PCIE_CONF_REG(where); + advk_writel(pcie, reg, PIO_ADDR_LS); + advk_writel(pcie, 0, PIO_ADDR_MS); + + /* Program the data strobe */ + advk_writel(pcie, 0xf, PIO_WR_DATA_STRB); + + /* Start the transfer */ + advk_writel(pcie, 1, PIO_START); + + ret = advk_pcie_wait_pio(pcie); + if (ret < 0) + return PCIBIOS_SET_FAILED; + + advk_pcie_check_pio_status(pcie); + + /* Get the read result */ + *val = advk_readl(pcie, PIO_RD_DATA); + if (size == 1) + *val = (*val >> (8 * (where & 3))) & 0xff; + else if (size == 2) + *val = (*val >> (8 * (where & 3))) & 0xffff; + + return PCIBIOS_SUCCESSFUL; +} + +static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, + int where, int size, u32 val) +{ + struct advk_pcie *pcie = bus->sysdata; + u32 reg; + u32 data_strobe = 0x0; + int offset; + int ret; + + if (PCI_SLOT(devfn) != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (where % size) + return PCIBIOS_SET_FAILED; + + /* Start PIO */ + advk_writel(pcie, 0, PIO_START); + advk_writel(pcie, 1, PIO_ISR); + + /* Program the control register */ + reg = advk_readl(pcie, PIO_CTRL); + reg &= ~PIO_CTRL_TYPE_MASK; + if (bus->number == pcie->root_bus_nr) + reg |= PCIE_CONFIG_WR_TYPE0; + else + reg |= PCIE_CONFIG_WR_TYPE1; + advk_writel(pcie, reg, PIO_CTRL); + + /* Program the address registers */ + reg = PCIE_CONF_ADDR(bus->number, devfn, where); + advk_writel(pcie, reg, PIO_ADDR_LS); + advk_writel(pcie, 0, PIO_ADDR_MS); + + /* Calculate the write strobe */ + offset = where & 0x3; + reg = val << (8 * offset); + data_strobe = GENMASK(size - 1, 0) << offset; + + /* Program the data register */ + advk_writel(pcie, reg, PIO_WR_DATA); + + /* Program the data strobe */ + advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB); + + /* Start the transfer */ + advk_writel(pcie, 1, PIO_START); + + ret = advk_pcie_wait_pio(pcie); + if (ret < 0) + return PCIBIOS_SET_FAILED; + + advk_pcie_check_pio_status(pcie); + + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops advk_pcie_ops = { + .read = advk_pcie_rd_conf, + .write = advk_pcie_wr_conf, +}; + +static int advk_pcie_alloc_msi(struct advk_pcie *pcie) +{ + int hwirq; + + mutex_lock(&pcie->msi_used_lock); + hwirq = find_first_zero_bit(pcie->msi_irq_in_use, MSI_IRQ_NUM); + if (hwirq >= MSI_IRQ_NUM) + hwirq = -ENOSPC; + else + set_bit(hwirq, pcie->msi_irq_in_use); + mutex_unlock(&pcie->msi_used_lock); + + return hwirq; +} + +static void advk_pcie_free_msi(struct advk_pcie *pcie, int hwirq) +{ + mutex_lock(&pcie->msi_used_lock); + if (!test_bit(hwirq, pcie->msi_irq_in_use)) + dev_err(&pcie->pdev->dev, "trying to free unused MSI#%d\n", + hwirq); + else + clear_bit(hwirq, pcie->msi_irq_in_use); + mutex_unlock(&pcie->msi_used_lock); +} + +static int advk_pcie_setup_msi_irq(struct msi_controller *chip, + struct pci_dev *pdev, + struct msi_desc *desc) +{ + struct advk_pcie *pcie = pdev->bus->sysdata; + struct msi_msg msg; + int virq, hwirq; + phys_addr_t msi_msg_phys; + + /* We support MSI, but not MSI-X */ + if (desc->msi_attrib.is_msix) + return -EINVAL; + + hwirq = advk_pcie_alloc_msi(pcie); + if (hwirq < 0) + return hwirq; + + virq = irq_create_mapping(pcie->msi_domain, hwirq); + if (!virq) { + advk_pcie_free_msi(pcie, hwirq); + return -EINVAL; + } + + irq_set_msi_desc(virq, desc); + + msi_msg_phys = virt_to_phys(&pcie->msi_msg); + + msg.address_lo = lower_32_bits(msi_msg_phys); + msg.address_hi = upper_32_bits(msi_msg_phys); + msg.data = virq; + + pci_write_msi_msg(virq, &msg); + + return 0; +} + +static void advk_pcie_teardown_msi_irq(struct msi_controller *chip, + unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + struct msi_desc *msi = irq_data_get_msi_desc(d); + struct advk_pcie *pcie = msi_desc_to_pci_sysdata(msi); + unsigned long hwirq = d->hwirq; + + irq_dispose_mapping(irq); + advk_pcie_free_msi(pcie, hwirq); +} + +static int advk_pcie_msi_map(struct irq_domain *domain, + unsigned int virq, irq_hw_number_t hw) +{ + struct advk_pcie *pcie = domain->host_data; + + irq_set_chip_and_handler(virq, &pcie->msi_irq_chip, + handle_simple_irq); + + return 0; +} + +static const struct irq_domain_ops advk_pcie_msi_irq_ops = { + .map = advk_pcie_msi_map, +}; + +static void advk_pcie_irq_mask(struct irq_data *d) +{ + struct advk_pcie *pcie = d->domain->host_data; + irq_hw_number_t hwirq = irqd_to_hwirq(d); + u32 mask; + + mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); + mask |= PCIE_ISR0_INTX_ASSERT(hwirq); + advk_writel(pcie, mask, PCIE_ISR0_MASK_REG); +} + +static void advk_pcie_irq_unmask(struct irq_data *d) +{ + struct advk_pcie *pcie = d->domain->host_data; + irq_hw_number_t hwirq = irqd_to_hwirq(d); + u32 mask; + + mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); + mask &= ~PCIE_ISR0_INTX_ASSERT(hwirq); + advk_writel(pcie, mask, PCIE_ISR0_MASK_REG); +} + +static int advk_pcie_irq_map(struct irq_domain *h, + unsigned int virq, irq_hw_number_t hwirq) +{ + struct advk_pcie *pcie = h->host_data; + + advk_pcie_irq_mask(irq_get_irq_data(virq)); + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &pcie->irq_chip, + handle_level_irq); + irq_set_chip_data(virq, pcie); + + return 0; +} + +static const struct irq_domain_ops advk_pcie_irq_domain_ops = { + .map = advk_pcie_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie) +{ + struct device *dev = &pcie->pdev->dev; + struct device_node *node = dev->of_node; + struct irq_chip *msi_irq_chip; + struct msi_controller *msi; + phys_addr_t msi_msg_phys; + int ret; + + msi_irq_chip = &pcie->msi_irq_chip; + + msi_irq_chip->name = devm_kasprintf(dev, GFP_KERNEL, "%s-msi", + dev_name(dev)); + if (!msi_irq_chip->name) + return -ENOMEM; + + msi_irq_chip->irq_enable = pci_msi_unmask_irq; + msi_irq_chip->irq_disable = pci_msi_mask_irq; + msi_irq_chip->irq_mask = pci_msi_mask_irq; + msi_irq_chip->irq_unmask = pci_msi_unmask_irq; + + msi = &pcie->msi; + + msi->setup_irq = advk_pcie_setup_msi_irq; + msi->teardown_irq = advk_pcie_teardown_msi_irq; + msi->of_node = node; + + mutex_init(&pcie->msi_used_lock); + + msi_msg_phys = virt_to_phys(&pcie->msi_msg); + + advk_writel(pcie, lower_32_bits(msi_msg_phys), + PCIE_MSI_ADDR_LOW_REG); + advk_writel(pcie, upper_32_bits(msi_msg_phys), + PCIE_MSI_ADDR_HIGH_REG); + + pcie->msi_domain = + irq_domain_add_linear(NULL, MSI_IRQ_NUM, + &advk_pcie_msi_irq_ops, pcie); + if (!pcie->msi_domain) + return -ENOMEM; + + ret = of_pci_msi_chip_add(msi); + if (ret < 0) { + irq_domain_remove(pcie->msi_domain); + return ret; + } + + return 0; +} + +static void advk_pcie_remove_msi_irq_domain(struct advk_pcie *pcie) +{ + of_pci_msi_chip_remove(&pcie->msi); + irq_domain_remove(pcie->msi_domain); +} + +static int advk_pcie_init_irq_domain(struct advk_pcie *pcie) +{ + struct device *dev = &pcie->pdev->dev; + struct device_node *node = dev->of_node; + struct device_node *pcie_intc_node; + struct irq_chip *irq_chip; + + pcie_intc_node = of_get_next_child(node, NULL); + if (!pcie_intc_node) { + dev_err(dev, "No PCIe Intc node found\n"); + return -ENODEV; + } + + irq_chip = &pcie->irq_chip; + + irq_chip->name = devm_kasprintf(dev, GFP_KERNEL, "%s-irq", + dev_name(dev)); + if (!irq_chip->name) { + of_node_put(pcie_intc_node); + return -ENOMEM; + } + + irq_chip->irq_mask = advk_pcie_irq_mask; + irq_chip->irq_mask_ack = advk_pcie_irq_mask; + irq_chip->irq_unmask = advk_pcie_irq_unmask; + + pcie->irq_domain = + irq_domain_add_linear(pcie_intc_node, LEGACY_IRQ_NUM, + &advk_pcie_irq_domain_ops, pcie); + if (!pcie->irq_domain) { + dev_err(dev, "Failed to get a INTx IRQ domain\n"); + of_node_put(pcie_intc_node); + return -ENOMEM; + } + + return 0; +} + +static void advk_pcie_remove_irq_domain(struct advk_pcie *pcie) +{ + irq_domain_remove(pcie->irq_domain); +} + +static void advk_pcie_handle_msi(struct advk_pcie *pcie) +{ + u32 msi_val, msi_mask, msi_status, msi_idx; + u16 msi_data; + + msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG); + msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG); + msi_status = msi_val & ~msi_mask; + + for (msi_idx = 0; msi_idx < MSI_IRQ_NUM; msi_idx++) { + if (!(BIT(msi_idx) & msi_status)) + continue; + + advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG); + msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & 0xFF; + generic_handle_irq(msi_data); + } + + advk_writel(pcie, PCIE_ISR0_MSI_INT_PENDING, + PCIE_ISR0_REG); +} + +static void advk_pcie_handle_int(struct advk_pcie *pcie) +{ + u32 val, mask, status; + int i, virq; + + val = advk_readl(pcie, PCIE_ISR0_REG); + mask = advk_readl(pcie, PCIE_ISR0_MASK_REG); + status = val & ((~mask) & PCIE_ISR0_ALL_MASK); + + if (!status) { + advk_writel(pcie, val, PCIE_ISR0_REG); + return; + } + + /* Process MSI interrupts */ + if (status & PCIE_ISR0_MSI_INT_PENDING) + advk_pcie_handle_msi(pcie); + + /* Process legacy interrupts */ + for (i = 0; i < LEGACY_IRQ_NUM; i++) { + if (!(status & PCIE_ISR0_INTX_ASSERT(i))) + continue; + + advk_writel(pcie, PCIE_ISR0_INTX_ASSERT(i), + PCIE_ISR0_REG); + + virq = irq_find_mapping(pcie->irq_domain, i); + generic_handle_irq(virq); + } +} + +static irqreturn_t advk_pcie_irq_handler(int irq, void *arg) +{ + struct advk_pcie *pcie = arg; + u32 status; + + status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG); + if (!(status & PCIE_IRQ_CORE_INT)) + return IRQ_NONE; + + advk_pcie_handle_int(pcie); + + /* Clear interrupt */ + advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG); + + return IRQ_HANDLED; +} + +static int advk_pcie_parse_request_of_pci_ranges(struct advk_pcie *pcie) +{ + int err, res_valid = 0; + struct device *dev = &pcie->pdev->dev; + struct device_node *np = dev->of_node; + struct resource_entry *win; + resource_size_t iobase; + + INIT_LIST_HEAD(&pcie->resources); + + err = of_pci_get_host_bridge_resources(np, 0, 0xff, &pcie->resources, + &iobase); + if (err) + return err; + + err = devm_request_pci_bus_resources(dev, &pcie->resources); + if (err) + goto out_release_res; + + resource_list_for_each_entry(win, &pcie->resources) { + struct resource *res = win->res; + + switch (resource_type(res)) { + case IORESOURCE_IO: + advk_pcie_set_ob_win(pcie, 1, + upper_32_bits(res->start), + lower_32_bits(res->start), + 0, 0xF8000000, 0, + lower_32_bits(res->start), + OB_PCIE_IO); + err = pci_remap_iospace(res, iobase); + if (err) + dev_warn(dev, "error %d: failed to map resource %pR\n", + err, res); + break; + case IORESOURCE_MEM: + advk_pcie_set_ob_win(pcie, 0, + upper_32_bits(res->start), + lower_32_bits(res->start), + 0x0, 0xF8000000, 0, + lower_32_bits(res->start), + (2 << 20) | OB_PCIE_MEM); + res_valid |= !(res->flags & IORESOURCE_PREFETCH); + break; + case IORESOURCE_BUS: + pcie->root_bus_nr = res->start; + break; + } + } + + if (!res_valid) { + dev_err(dev, "non-prefetchable memory resource required\n"); + err = -EINVAL; + goto out_release_res; + } + + return 0; + +out_release_res: + pci_free_resource_list(&pcie->resources); + return err; +} + +static int advk_pcie_probe(struct platform_device *pdev) +{ + struct advk_pcie *pcie; + struct resource *res; + struct pci_bus *bus, *child; + struct msi_controller *msi; + struct device_node *msi_node; + int ret, irq; + + pcie = devm_kzalloc(&pdev->dev, sizeof(struct advk_pcie), + GFP_KERNEL); + if (!pcie) + return -ENOMEM; + + pcie->pdev = pdev; + platform_set_drvdata(pdev, pcie); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pcie->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(pcie->base)) { + dev_err(&pdev->dev, "Failed to map registers\n"); + return PTR_ERR(pcie->base); + } + + irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(&pdev->dev, irq, advk_pcie_irq_handler, + IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie", + pcie); + if (ret) { + dev_err(&pdev->dev, "Failed to register interrupt\n"); + return ret; + } + + ret = advk_pcie_parse_request_of_pci_ranges(pcie); + if (ret) { + dev_err(&pdev->dev, "Failed to parse resources\n"); + return ret; + } + + advk_pcie_setup_hw(pcie); + + ret = advk_pcie_init_irq_domain(pcie); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize irq\n"); + return ret; + } + + ret = advk_pcie_init_msi_irq_domain(pcie); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize irq\n"); + advk_pcie_remove_irq_domain(pcie); + return ret; + } + + msi_node = of_parse_phandle(pdev->dev.of_node, "msi-parent", 0); + if (msi_node) + msi = of_pci_find_msi_chip_by_node(msi_node); + else + msi = NULL; + + bus = pci_scan_root_bus_msi(&pdev->dev, 0, &advk_pcie_ops, + pcie, &pcie->resources, &pcie->msi); + if (!bus) { + advk_pcie_remove_msi_irq_domain(pcie); + advk_pcie_remove_irq_domain(pcie); + return -ENOMEM; + } + + pci_bus_assign_resources(bus); + + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + + pci_bus_add_devices(bus); + + return 0; +} + +static const struct of_device_id advk_pcie_of_match_table[] = { + { .compatible = "marvell,armada-3700-pcie", }, + {}, +}; + +static struct platform_driver advk_pcie_driver = { + .driver = { + .name = "advk-pcie", + .of_match_table = advk_pcie_of_match_table, + /* Driver unloading/unbinding currently not supported */ + .suppress_bind_attrs = true, + }, + .probe = advk_pcie_probe, +}; +builtin_platform_driver(advk_pcie_driver); diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c index f441130407e7..81b3949a26db 100644 --- a/drivers/pci/host/pci-dra7xx.c +++ b/drivers/pci/host/pci-dra7xx.c @@ -181,14 +181,14 @@ static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp) if (!pcie_intc_node) { dev_err(dev, "No PCIe Intc node found\n"); - return PTR_ERR(pcie_intc_node); + return -ENODEV; } pp->irq_domain = irq_domain_add_linear(pcie_intc_node, 4, &intx_domain_ops, pp); if (!pp->irq_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); - return PTR_ERR(pp->irq_domain); + return -ENODEV; } return 0; diff --git a/drivers/pci/host/pci-host-common.c b/drivers/pci/host/pci-host-common.c index 8cba7ab73df9..9d9d34e959b6 100644 --- a/drivers/pci/host/pci-host-common.c +++ b/drivers/pci/host/pci-host-common.c @@ -20,10 +20,9 @@ #include <linux/module.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-ecam.h> #include <linux/platform_device.h> -#include "../ecam.h" - static int gen_pci_parse_request_of_pci_ranges(struct device *dev, struct list_head *resources, struct resource **bus_range) { @@ -36,44 +35,34 @@ static int gen_pci_parse_request_of_pci_ranges(struct device *dev, if (err) return err; + err = devm_request_pci_bus_resources(dev, resources); + if (err) + return err; + resource_list_for_each_entry(win, resources) { - struct resource *parent, *res = win->res; + struct resource *res = win->res; switch (resource_type(res)) { case IORESOURCE_IO: - parent = &ioport_resource; err = pci_remap_iospace(res, iobase); - if (err) { + if (err) dev_warn(dev, "error %d: failed to map resource %pR\n", err, res); - continue; - } break; case IORESOURCE_MEM: - parent = &iomem_resource; res_valid |= !(res->flags & IORESOURCE_PREFETCH); break; case IORESOURCE_BUS: *bus_range = res; - default: - continue; + break; } - - err = devm_request_resource(dev, parent, res); - if (err) - goto out_release_res; - } - - if (!res_valid) { - dev_err(dev, "non-prefetchable memory resource required\n"); - err = -EINVAL; - goto out_release_res; } - return 0; + if (res_valid) + return 0; -out_release_res: - return err; + dev_err(dev, "non-prefetchable memory resource required\n"); + return -EINVAL; } static void gen_pci_unmap_cfg(void *ptr) @@ -155,7 +144,14 @@ int pci_host_common_probe(struct platform_device *pdev, pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci); - if (!pci_has_flag(PCI_PROBE_ONLY)) { + /* + * We insert PCI resources into the iomem_resource and + * ioport_resource trees in either pci_bus_claim_resources() + * or pci_bus_assign_resources(). + */ + if (pci_has_flag(PCI_PROBE_ONLY)) { + pci_bus_claim_resources(bus); + } else { pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); diff --git a/drivers/pci/host/pci-host-generic.c b/drivers/pci/host/pci-host-generic.c index 6eaceab1bf04..c05ea9d72f69 100644 --- a/drivers/pci/host/pci-host-generic.c +++ b/drivers/pci/host/pci-host-generic.c @@ -20,13 +20,12 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-ecam.h> #include <linux/platform_device.h> -#include "../ecam.h" - static struct pci_ecam_ops gen_pci_cfg_cam_bus_ops = { .bus_shift = 16, .pci_ops = { @@ -46,8 +45,6 @@ static const struct of_device_id gen_pci_of_match[] = { { }, }; -MODULE_DEVICE_TABLE(of, gen_pci_of_match); - static int gen_pci_probe(struct platform_device *pdev) { const struct of_device_id *of_id; @@ -66,8 +63,4 @@ static struct platform_driver gen_pci_driver = { }, .probe = gen_pci_probe, }; -module_platform_driver(gen_pci_driver); - -MODULE_DESCRIPTION("Generic PCI host driver"); -MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(gen_pci_driver); diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 7e9b2de2aa24..6955ffdb89f3 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -732,16 +732,18 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info, pdev = msi_desc_to_pci_dev(msi); hbus = info->data; - hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn)); - if (!hpdev) + int_desc = irq_data_get_irq_chip_data(irq_data); + if (!int_desc) return; - int_desc = irq_data_get_irq_chip_data(irq_data); - if (int_desc) { - irq_data->chip_data = NULL; - hv_int_desc_free(hpdev, int_desc); + irq_data->chip_data = NULL; + hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn)); + if (!hpdev) { + kfree(int_desc); + return; } + hv_int_desc_free(hpdev, int_desc); put_pcichild(hpdev, hv_pcidev_ref_by_slot); } @@ -1657,14 +1659,16 @@ static void hv_pci_onchannelcallback(void *context) continue; } + /* Zero length indicates there are no more packets. */ + if (ret || !bytes_recvd) + break; + /* * All incoming packets must be at least as large as a * response. */ - if (bytes_recvd <= sizeof(struct pci_response)) { - kfree(buffer); - return; - } + if (bytes_recvd <= sizeof(struct pci_response)) + continue; desc = (struct vmpacket_descriptor *)buffer; switch (desc->type) { @@ -1679,8 +1683,7 @@ static void hv_pci_onchannelcallback(void *context) comp_packet->completion_func(comp_packet->compl_ctxt, response, bytes_recvd); - kfree(buffer); - return; + break; case VM_PKT_DATA_INBAND: @@ -1727,8 +1730,9 @@ static void hv_pci_onchannelcallback(void *context) desc->type, req_id, bytes_recvd); break; } - break; } + + kfree(buffer); } /** diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c index 6b8301ef21ca..8ba28834d470 100644 --- a/drivers/pci/host/pci-keystone.c +++ b/drivers/pci/host/pci-keystone.c @@ -17,7 +17,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/msi.h> #include <linux/of_irq.h> #include <linux/of.h> @@ -360,7 +360,6 @@ static const struct of_device_id ks_pcie_of_match[] = { }, { }, }; -MODULE_DEVICE_TABLE(of, ks_pcie_of_match); static int __exit ks_pcie_remove(struct platform_device *pdev) { @@ -439,9 +438,4 @@ static struct platform_driver ks_pcie_driver __refdata = { .of_match_table = of_match_ptr(ks_pcie_of_match), }, }; - -module_platform_driver(ks_pcie_driver); - -MODULE_AUTHOR("Murali Karicheri <m-karicheri2@ti.com>"); -MODULE_DESCRIPTION("Keystone PCIe host controller driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(ks_pcie_driver); diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index a21e229d95e0..114ba819277a 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -12,7 +12,7 @@ #include <linux/kernel.h> #include <linux/interrupt.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of_pci.h> #include <linux/of_platform.h> #include <linux/of_irq.h> @@ -211,7 +211,6 @@ static const struct of_device_id ls_pcie_of_match[] = { { .compatible = "fsl,ls2085a-pcie", .data = &ls2080_drvdata }, { }, }; -MODULE_DEVICE_TABLE(of, ls_pcie_of_match); static int __init ls_add_pcie_port(struct pcie_port *pp, struct platform_device *pdev) @@ -275,9 +274,4 @@ static struct platform_driver ls_pcie_driver = { .of_match_table = ls_pcie_of_match, }, }; - -module_platform_driver_probe(ls_pcie_driver, ls_pcie_probe); - -MODULE_AUTHOR("Minghuan Lian <Minghuan.Lian@freescale.com>"); -MODULE_DESCRIPTION("Freescale Layerscape PCIe host controller driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver_probe(ls_pcie_driver, ls_pcie_probe); diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c index 6b451df6502c..307f81d6b479 100644 --- a/drivers/pci/host/pci-mvebu.c +++ b/drivers/pci/host/pci-mvebu.c @@ -1,6 +1,8 @@ /* * PCIe driver for Marvell Armada 370 and Armada XP SoCs * + * Author: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> + * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. @@ -11,7 +13,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/gpio.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/mbus.h> #include <linux/msi.h> #include <linux/slab.h> @@ -839,25 +841,22 @@ static struct pci_ops mvebu_pcie_ops = { static int mvebu_pcie_setup(int nr, struct pci_sys_data *sys) { struct mvebu_pcie *pcie = sys_to_pcie(sys); - int i; + int err, i; pcie->mem.name = "PCI MEM"; pcie->realio.name = "PCI I/O"; - if (request_resource(&iomem_resource, &pcie->mem)) - return 0; - - if (resource_size(&pcie->realio) != 0) { - if (request_resource(&ioport_resource, &pcie->realio)) { - release_resource(&pcie->mem); - return 0; - } + if (resource_size(&pcie->realio) != 0) pci_add_resource_offset(&sys->resources, &pcie->realio, sys->io_offset); - } + pci_add_resource_offset(&sys->resources, &pcie->mem, sys->mem_offset); pci_add_resource(&sys->resources, &pcie->busn); + err = devm_request_pci_bus_resources(&pcie->pdev->dev, &sys->resources); + if (err) + return 0; + for (i = 0; i < pcie->nports; i++) { struct mvebu_pcie_port *port = &pcie->ports[i]; @@ -1298,7 +1297,6 @@ static const struct of_device_id mvebu_pcie_of_match_table[] = { { .compatible = "marvell,kirkwood-pcie", }, {}, }; -MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table); static const struct dev_pm_ops mvebu_pcie_pm_ops = { SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mvebu_pcie_suspend, mvebu_pcie_resume) @@ -1314,8 +1312,4 @@ static struct platform_driver mvebu_pcie_driver = { }, .probe = mvebu_pcie_probe, }; -module_platform_driver(mvebu_pcie_driver); - -MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>"); -MODULE_DESCRIPTION("Marvell EBU PCIe driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(mvebu_pcie_driver); diff --git a/drivers/pci/host/pci-rcar-gen2.c b/drivers/pci/host/pci-rcar-gen2.c index 9980a4bdae7e..597566f96f5e 100644 --- a/drivers/pci/host/pci-rcar-gen2.c +++ b/drivers/pci/host/pci-rcar-gen2.c @@ -4,6 +4,8 @@ * Copyright (C) 2013 Renesas Solutions Corp. * Copyright (C) 2013 Cogent Embedded, Inc. * + * Author: Valentine Barshak <valentine.barshak@cogentembedded.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -14,7 +16,6 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/of_address.h> #include <linux/of_pci.h> #include <linux/pci.h> @@ -97,7 +98,6 @@ struct rcar_pci_priv { struct device *dev; void __iomem *reg; - struct resource io_res; struct resource mem_res; struct resource *cfg_res; unsigned busnr; @@ -194,6 +194,7 @@ static int rcar_pci_setup(int nr, struct pci_sys_data *sys) struct rcar_pci_priv *priv = sys->private_data; void __iomem *reg = priv->reg; u32 val; + int ret; pm_runtime_enable(priv->dev); pm_runtime_get_sync(priv->dev); @@ -273,8 +274,10 @@ static int rcar_pci_setup(int nr, struct pci_sys_data *sys) rcar_pci_setup_errirq(priv); /* Add PCI resources */ - pci_add_resource(&sys->resources, &priv->io_res); pci_add_resource(&sys->resources, &priv->mem_res); + ret = devm_request_pci_bus_resources(priv->dev, &sys->resources); + if (ret < 0) + return ret; /* Setup bus number based on platform device id / of bus-range */ sys->busnr = priv->busnr; @@ -371,14 +374,6 @@ static int rcar_pci_probe(struct platform_device *pdev) return -ENOMEM; priv->mem_res = *mem_res; - /* - * The controller does not support/use port I/O, - * so setup a dummy port I/O region here. - */ - priv->io_res.start = priv->mem_res.start; - priv->io_res.end = priv->mem_res.end; - priv->io_res.flags = IORESOURCE_IO; - priv->cfg_res = cfg_res; priv->irq = platform_get_irq(pdev, 0); @@ -421,6 +416,7 @@ static int rcar_pci_probe(struct platform_device *pdev) hw_private[0] = priv; memset(&hw, 0, sizeof(hw)); hw.nr_controllers = ARRAY_SIZE(hw_private); + hw.io_optional = 1; hw.private_data = hw_private; hw.map_irq = rcar_pci_map_irq; hw.ops = &rcar_pci_ops; @@ -437,8 +433,6 @@ static struct of_device_id rcar_pci_of_match[] = { { }, }; -MODULE_DEVICE_TABLE(of, rcar_pci_of_match); - static struct platform_driver rcar_pci_driver = { .driver = { .name = "pci-rcar-gen2", @@ -447,9 +441,4 @@ static struct platform_driver rcar_pci_driver = { }, .probe = rcar_pci_probe, }; - -module_platform_driver(rcar_pci_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Renesas R-Car Gen2 internal PCI"); -MODULE_AUTHOR("Valentine Barshak <valentine.barshak@cogentembedded.com>"); +builtin_platform_driver(rcar_pci_driver); diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index c388468c202a..6de0757b11e4 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -9,6 +9,8 @@ * * Bits taken from arch/arm/mach-dove/pcie.c * + * Author: Thierry Reding <treding@nvidia.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -32,7 +34,7 @@ #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/msi.h> #include <linux/of_address.h> #include <linux/of_pci.h> @@ -183,26 +185,26 @@ #define AFI_PEXBIAS_CTRL_0 0x168 -#define RP_VEND_XP 0x00000F00 +#define RP_VEND_XP 0x00000f00 #define RP_VEND_XP_DL_UP (1 << 30) -#define RP_PRIV_MISC 0x00000FE0 -#define RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT (0xE << 0) -#define RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT (0xF << 0) +#define RP_PRIV_MISC 0x00000fe0 +#define RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT (0xe << 0) +#define RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT (0xf << 0) #define RP_LINK_CONTROL_STATUS 0x00000090 #define RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE 0x20000000 #define RP_LINK_CONTROL_STATUS_LINKSTAT_MASK 0x3fff0000 -#define PADS_CTL_SEL 0x0000009C +#define PADS_CTL_SEL 0x0000009c -#define PADS_CTL 0x000000A0 +#define PADS_CTL 0x000000a0 #define PADS_CTL_IDDQ_1L (1 << 0) #define PADS_CTL_TX_DATA_EN_1L (1 << 6) #define PADS_CTL_RX_DATA_EN_1L (1 << 10) -#define PADS_PLL_CTL_TEGRA20 0x000000B8 -#define PADS_PLL_CTL_TEGRA30 0x000000B4 +#define PADS_PLL_CTL_TEGRA20 0x000000b8 +#define PADS_PLL_CTL_TEGRA30 0x000000b4 #define PADS_PLL_CTL_RST_B4SM (1 << 1) #define PADS_PLL_CTL_LOCKDET (1 << 8) #define PADS_PLL_CTL_REFCLK_MASK (0x3 << 16) @@ -214,9 +216,9 @@ #define PADS_PLL_CTL_TXCLKREF_DIV5 (1 << 20) #define PADS_PLL_CTL_TXCLKREF_BUF_EN (1 << 22) -#define PADS_REFCLK_CFG0 0x000000C8 -#define PADS_REFCLK_CFG1 0x000000CC -#define PADS_REFCLK_BIAS 0x000000D0 +#define PADS_REFCLK_CFG0 0x000000c8 +#define PADS_REFCLK_CFG1 0x000000cc +#define PADS_REFCLK_BIAS 0x000000d0 /* * Fields in PADS_REFCLK_CFG*. Those registers form an array of 16-bit @@ -228,15 +230,6 @@ #define PADS_REFCLK_CFG_PREDI_SHIFT 8 /* 11:8 */ #define PADS_REFCLK_CFG_DRVI_SHIFT 12 /* 15:12 */ -/* Default value provided by HW engineering is 0xfa5c */ -#define PADS_REFCLK_CFG_VALUE \ - ( \ - (0x17 << PADS_REFCLK_CFG_TERM_SHIFT) | \ - (0 << PADS_REFCLK_CFG_E_TERM_SHIFT) | \ - (0xa << PADS_REFCLK_CFG_PREDI_SHIFT) | \ - (0xf << PADS_REFCLK_CFG_DRVI_SHIFT) \ - ) - struct tegra_msi { struct msi_controller chip; DECLARE_BITMAP(used, INT_PCI_MSI_NR); @@ -252,6 +245,8 @@ struct tegra_pcie_soc_data { unsigned int msi_base_shift; u32 pads_pll_ctl; u32 tx_ref_sel; + u32 pads_refclk_cfg0; + u32 pads_refclk_cfg1; bool has_pex_clkreq_en; bool has_pex_bias_ctrl; bool has_intr_prsnt_sense; @@ -274,7 +269,6 @@ struct tegra_pcie { struct list_head buses; struct resource *cs; - struct resource all; struct resource io; struct resource pio; struct resource mem; @@ -623,30 +617,21 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys) sys->mem_offset = pcie->offset.mem; sys->io_offset = pcie->offset.io; - err = devm_request_resource(pcie->dev, &pcie->all, &pcie->io); - if (err < 0) - return err; - - err = devm_request_resource(pcie->dev, &ioport_resource, &pcie->pio); - if (err < 0) - return err; - - err = devm_request_resource(pcie->dev, &pcie->all, &pcie->mem); + err = devm_request_resource(pcie->dev, &iomem_resource, &pcie->io); if (err < 0) return err; - err = devm_request_resource(pcie->dev, &pcie->all, &pcie->prefetch); - if (err) - return err; - pci_add_resource_offset(&sys->resources, &pcie->pio, sys->io_offset); pci_add_resource_offset(&sys->resources, &pcie->mem, sys->mem_offset); pci_add_resource_offset(&sys->resources, &pcie->prefetch, sys->mem_offset); pci_add_resource(&sys->resources, &pcie->busn); - pci_ioremap_io(pcie->pio.start, pcie->io.start); + err = devm_request_pci_bus_resources(pcie->dev, &sys->resources); + if (err < 0) + return err; + pci_remap_iospace(&pcie->pio, pcie->io.start); return 1; } @@ -838,12 +823,6 @@ static int tegra_pcie_phy_enable(struct tegra_pcie *pcie) value |= PADS_PLL_CTL_RST_B4SM; pads_writel(pcie, value, soc->pads_pll_ctl); - /* Configure the reference clock driver */ - value = PADS_REFCLK_CFG_VALUE | (PADS_REFCLK_CFG_VALUE << 16); - pads_writel(pcie, value, PADS_REFCLK_CFG0); - if (soc->num_ports > 2) - pads_writel(pcie, PADS_REFCLK_CFG_VALUE, PADS_REFCLK_CFG1); - /* wait for the PLL to lock */ err = tegra_pcie_pll_wait(pcie, 500); if (err < 0) { @@ -927,6 +906,7 @@ static int tegra_pcie_port_phy_power_off(struct tegra_pcie_port *port) static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) { + const struct tegra_pcie_soc_data *soc = pcie->soc_data; struct tegra_pcie_port *port; int err; @@ -952,6 +932,12 @@ static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) } } + /* Configure the reference clock driver */ + pads_writel(pcie, soc->pads_refclk_cfg0, PADS_REFCLK_CFG0); + + if (soc->num_ports > 2) + pads_writel(pcie, soc->pads_refclk_cfg1, PADS_REFCLK_CFG1); + return 0; } @@ -1822,12 +1808,6 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) struct resource res; int err; - memset(&pcie->all, 0, sizeof(pcie->all)); - pcie->all.flags = IORESOURCE_MEM; - pcie->all.name = np->full_name; - pcie->all.start = ~0; - pcie->all.end = 0; - if (of_pci_range_parser_init(&parser, np)) { dev_err(pcie->dev, "missing \"ranges\" property\n"); return -EINVAL; @@ -1880,18 +1860,8 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) } break; } - - if (res.start <= pcie->all.start) - pcie->all.start = res.start; - - if (res.end >= pcie->all.end) - pcie->all.end = res.end; } - err = devm_request_resource(pcie->dev, &iomem_resource, &pcie->all); - if (err < 0) - return err; - err = of_pci_parse_bus_range(np, &pcie->busn); if (err < 0) { dev_err(pcie->dev, "failed to parse ranges property: %d\n", @@ -2078,6 +2048,7 @@ static const struct tegra_pcie_soc_data tegra20_pcie_data = { .msi_base_shift = 0, .pads_pll_ctl = PADS_PLL_CTL_TEGRA20, .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_DIV10, + .pads_refclk_cfg0 = 0xfa5cfa5c, .has_pex_clkreq_en = false, .has_pex_bias_ctrl = false, .has_intr_prsnt_sense = false, @@ -2090,6 +2061,8 @@ static const struct tegra_pcie_soc_data tegra30_pcie_data = { .msi_base_shift = 8, .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, + .pads_refclk_cfg0 = 0xfa5cfa5c, + .pads_refclk_cfg1 = 0xfa5cfa5c, .has_pex_clkreq_en = true, .has_pex_bias_ctrl = true, .has_intr_prsnt_sense = true, @@ -2102,6 +2075,7 @@ static const struct tegra_pcie_soc_data tegra124_pcie_data = { .msi_base_shift = 8, .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, + .pads_refclk_cfg0 = 0x44ac44ac, .has_pex_clkreq_en = true, .has_pex_bias_ctrl = true, .has_intr_prsnt_sense = true, @@ -2115,7 +2089,6 @@ static const struct of_device_id tegra_pcie_of_match[] = { { .compatible = "nvidia,tegra20-pcie", .data = &tegra20_pcie_data }, { }, }; -MODULE_DEVICE_TABLE(of, tegra_pcie_of_match); static void *tegra_pcie_ports_seq_start(struct seq_file *s, loff_t *pos) { @@ -2249,8 +2222,6 @@ static int tegra_pcie_probe(struct platform_device *pdev) if (err < 0) return err; - pcibios_min_mem = 0; - err = tegra_pcie_get_resources(pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request resources: %d\n", err); @@ -2306,8 +2277,4 @@ static struct platform_driver tegra_pcie_driver = { }, .probe = tegra_pcie_probe, }; -module_platform_driver(tegra_pcie_driver); - -MODULE_AUTHOR("Thierry Reding <treding@nvidia.com>"); -MODULE_DESCRIPTION("NVIDIA Tegra PCIe driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(tegra_pcie_driver); diff --git a/drivers/pci/host/pci-thunder-ecam.c b/drivers/pci/host/pci-thunder-ecam.c index 540d030613eb..d50a3dc2d8db 100644 --- a/drivers/pci/host/pci-thunder-ecam.c +++ b/drivers/pci/host/pci-thunder-ecam.c @@ -7,14 +7,13 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/ioport.h> #include <linux/of_pci.h> #include <linux/of.h> +#include <linux/pci-ecam.h> #include <linux/platform_device.h> -#include "../ecam.h" - static void set_val(u32 v, int where, int size, u32 *val) { int shift = (where & 3) * 8; @@ -360,7 +359,6 @@ static const struct of_device_id thunder_ecam_of_match[] = { { .compatible = "cavium,pci-host-thunder-ecam" }, { }, }; -MODULE_DEVICE_TABLE(of, thunder_ecam_of_match); static int thunder_ecam_probe(struct platform_device *pdev) { @@ -374,7 +372,4 @@ static struct platform_driver thunder_ecam_driver = { }, .probe = thunder_ecam_probe, }; -module_platform_driver(thunder_ecam_driver); - -MODULE_DESCRIPTION("Thunder ECAM PCI host driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(thunder_ecam_driver); diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 9b8ab94f3c8c..6abaf80ffb39 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -15,13 +15,12 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-ecam.h> #include <linux/platform_device.h> -#include "../ecam.h" - #define PEM_CFG_WR 0x28 #define PEM_CFG_RD 0x30 @@ -285,8 +284,9 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); } -static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg) +static int thunder_pem_init(struct pci_config_window *cfg) { + struct device *dev = cfg->parent; resource_size_t bar4_start; struct resource *res_pem; struct thunder_pem_pci *pem_pci; @@ -346,7 +346,6 @@ static const struct of_device_id thunder_pem_of_match[] = { { .compatible = "cavium,pci-host-thunder-pem" }, { }, }; -MODULE_DEVICE_TABLE(of, thunder_pem_of_match); static int thunder_pem_probe(struct platform_device *pdev) { @@ -360,7 +359,4 @@ static struct platform_driver thunder_pem_driver = { }, .probe = thunder_pem_probe, }; -module_platform_driver(thunder_pem_driver); - -MODULE_DESCRIPTION("Thunder PEM PCIe host driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(thunder_pem_driver); diff --git a/drivers/pci/host/pci-versatile.c b/drivers/pci/host/pci-versatile.c index f843a72dc51c..f234405770ab 100644 --- a/drivers/pci/host/pci-versatile.c +++ b/drivers/pci/host/pci-versatile.c @@ -80,21 +80,21 @@ static int versatile_pci_parse_request_of_pci_ranges(struct device *dev, if (err) return err; + err = devm_request_pci_bus_resources(dev, res); + if (err) + goto out_release_res; + resource_list_for_each_entry(win, res) { - struct resource *parent, *res = win->res; + struct resource *res = win->res; switch (resource_type(res)) { case IORESOURCE_IO: - parent = &ioport_resource; err = pci_remap_iospace(res, iobase); - if (err) { + if (err) dev_warn(dev, "error %d: failed to map resource %pR\n", err, res); - continue; - } break; case IORESOURCE_MEM: - parent = &iomem_resource; res_valid |= !(res->flags & IORESOURCE_PREFETCH); writel(res->start >> 28, PCI_IMAP(mem)); @@ -102,23 +102,14 @@ static int versatile_pci_parse_request_of_pci_ranges(struct device *dev, mem++; break; - case IORESOURCE_BUS: - default: - continue; } - - err = devm_request_resource(dev, parent, res); - if (err) - goto out_release_res; } - if (!res_valid) { - dev_err(dev, "non-prefetchable memory resource required\n"); - err = -EINVAL; - goto out_release_res; - } + if (res_valid) + return 0; - return 0; + dev_err(dev, "non-prefetchable memory resource required\n"); + err = -EINVAL; out_release_res: pci_free_resource_list(res); diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index ae00ce22d5a6..a81273c23341 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -21,7 +21,7 @@ #include <linux/io.h> #include <linux/jiffies.h> #include <linux/memblock.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> @@ -540,14 +540,20 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) if (ret) return ret; + ret = devm_request_pci_bus_resources(&pdev->dev, &res); + if (ret) + goto error; + ret = xgene_pcie_setup(port, &res, iobase); if (ret) - return ret; + goto error; bus = pci_create_root_bus(&pdev->dev, 0, &xgene_pcie_ops, port, &res); - if (!bus) - return -ENOMEM; + if (!bus) { + ret = -ENOMEM; + goto error; + } pci_scan_child_bus(bus); pci_assign_unassigned_bus_resources(bus); @@ -555,6 +561,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) platform_set_drvdata(pdev, port); return 0; + +error: + pci_free_resource_list(&res); + return ret; } static const struct of_device_id xgene_pcie_match_table[] = { @@ -569,8 +579,4 @@ static struct platform_driver xgene_pcie_driver = { }, .probe = xgene_pcie_probe_bridge, }; -module_platform_driver(xgene_pcie_driver); - -MODULE_AUTHOR("Tanmay Inamdar <tinamdar@apm.com>"); -MODULE_DESCRIPTION("APM X-Gene PCIe driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(xgene_pcie_driver); diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index dbac6fb3f0bd..2b7837650db8 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -61,6 +61,8 @@ #define TLP_LOOP 500 #define RP_DEVFN 0 +#define LINK_UP_TIMEOUT 5000 + #define INTX_NUM 4 #define DWORD_MASK 3 @@ -81,9 +83,30 @@ struct tlp_rp_regpair_t { u32 reg1; }; +static inline void cra_writel(struct altera_pcie *pcie, const u32 value, + const u32 reg) +{ + writel_relaxed(value, pcie->cra_base + reg); +} + +static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg) +{ + return readl_relaxed(pcie->cra_base + reg); +} + +static bool altera_pcie_link_is_up(struct altera_pcie *pcie) +{ + return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) == LTSSM_L0); +} + static void altera_pcie_retrain(struct pci_dev *dev) { u16 linkcap, linkstat; + struct altera_pcie *pcie = dev->bus->sysdata; + int timeout = 0; + + if (!altera_pcie_link_is_up(pcie)) + return; /* * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but @@ -95,9 +118,16 @@ static void altera_pcie_retrain(struct pci_dev *dev) return; pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &linkstat); - if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) + if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) { pcie_capability_set_word(dev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_RL); + while (!altera_pcie_link_is_up(pcie)) { + timeout++; + if (timeout > LINK_UP_TIMEOUT) + break; + udelay(5); + } + } } DECLARE_PCI_FIXUP_EARLY(0x1172, PCI_ANY_ID, altera_pcie_retrain); @@ -120,17 +150,6 @@ static bool altera_pcie_hide_rc_bar(struct pci_bus *bus, unsigned int devfn, return false; } -static inline void cra_writel(struct altera_pcie *pcie, const u32 value, - const u32 reg) -{ - writel_relaxed(value, pcie->cra_base + reg); -} - -static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg) -{ - return readl_relaxed(pcie->cra_base + reg); -} - static void tlp_write_tx(struct altera_pcie *pcie, struct tlp_rp_regpair_t *tlp_rp_regdata) { @@ -139,11 +158,6 @@ static void tlp_write_tx(struct altera_pcie *pcie, cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL); } -static bool altera_pcie_link_is_up(struct altera_pcie *pcie) -{ - return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) == LTSSM_L0); -} - static bool altera_pcie_valid_config(struct altera_pcie *pcie, struct pci_bus *bus, int dev) { @@ -415,11 +429,6 @@ static void altera_pcie_isr(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static void altera_pcie_release_of_pci_ranges(struct altera_pcie *pcie) -{ - pci_free_resource_list(&pcie->resources); -} - static int altera_pcie_parse_request_of_pci_ranges(struct altera_pcie *pcie) { int err, res_valid = 0; @@ -432,33 +441,25 @@ static int altera_pcie_parse_request_of_pci_ranges(struct altera_pcie *pcie) if (err) return err; + err = devm_request_pci_bus_resources(dev, &pcie->resources); + if (err) + goto out_release_res; + resource_list_for_each_entry(win, &pcie->resources) { - struct resource *parent, *res = win->res; + struct resource *res = win->res; - switch (resource_type(res)) { - case IORESOURCE_MEM: - parent = &iomem_resource; + if (resource_type(res) == IORESOURCE_MEM) res_valid |= !(res->flags & IORESOURCE_PREFETCH); - break; - default: - continue; - } - - err = devm_request_resource(dev, parent, res); - if (err) - goto out_release_res; } - if (!res_valid) { - dev_err(dev, "non-prefetchable memory resource required\n"); - err = -EINVAL; - goto out_release_res; - } + if (res_valid) + return 0; - return 0; + dev_err(dev, "non-prefetchable memory resource required\n"); + err = -EINVAL; out_release_res: - altera_pcie_release_of_pci_ranges(pcie); + pci_free_resource_list(&pcie->resources); return err; } diff --git a/drivers/pci/host/pcie-armada8k.c b/drivers/pci/host/pcie-armada8k.c index 55723567b5d4..0f4f570068e3 100644 --- a/drivers/pci/host/pcie-armada8k.c +++ b/drivers/pci/host/pcie-armada8k.c @@ -5,6 +5,9 @@ * * Copyright (C) 2016 Marvell Technology Group Ltd. * + * Author: Yehuda Yitshak <yehuday@marvell.com> + * Author: Shadi Ammouri <shadi@marvell.com> + * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. @@ -14,7 +17,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of.h> #include <linux/pci.h> #include <linux/phy/phy.h> @@ -244,7 +247,6 @@ static const struct of_device_id armada8k_pcie_of_match[] = { { .compatible = "marvell,armada8k-pcie", }, {}, }; -MODULE_DEVICE_TABLE(of, armada8k_pcie_of_match); static struct platform_driver armada8k_pcie_driver = { .probe = armada8k_pcie_probe, @@ -253,10 +255,4 @@ static struct platform_driver armada8k_pcie_driver = { .of_match_table = of_match_ptr(armada8k_pcie_of_match), }, }; - -module_platform_driver(armada8k_pcie_driver); - -MODULE_DESCRIPTION("Armada 8k PCIe host controller driver"); -MODULE_AUTHOR("Yehuda Yitshak <yehuday@marvell.com>"); -MODULE_AUTHOR("Shadi Ammouri <shadi@marvell.com>"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(armada8k_pcie_driver); diff --git a/drivers/pci/host/pcie-artpec6.c b/drivers/pci/host/pcie-artpec6.c new file mode 100644 index 000000000000..16ba70b7ec65 --- /dev/null +++ b/drivers/pci/host/pcie-artpec6.c @@ -0,0 +1,280 @@ +/* + * PCIe host controller driver for Axis ARTPEC-6 SoC + * + * Author: Niklas Cassel <niklas.cassel@axis.com> + * + * Based on work done by Phil Edworthy <phil@edworthys.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/resource.h> +#include <linux/signal.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/mfd/syscon.h> +#include <linux/regmap.h> + +#include "pcie-designware.h" + +#define to_artpec6_pcie(x) container_of(x, struct artpec6_pcie, pp) + +struct artpec6_pcie { + struct pcie_port pp; + struct regmap *regmap; + void __iomem *phy_base; +}; + +/* PCIe Port Logic registers (memory-mapped) */ +#define PL_OFFSET 0x700 +#define PCIE_PHY_DEBUG_R0 (PL_OFFSET + 0x28) +#define PCIE_PHY_DEBUG_R1 (PL_OFFSET + 0x2c) + +#define MISC_CONTROL_1_OFF (PL_OFFSET + 0x1bc) +#define DBI_RO_WR_EN 1 + +/* ARTPEC-6 specific registers */ +#define PCIECFG 0x18 +#define PCIECFG_DBG_OEN (1 << 24) +#define PCIECFG_CORE_RESET_REQ (1 << 21) +#define PCIECFG_LTSSM_ENABLE (1 << 20) +#define PCIECFG_CLKREQ_B (1 << 11) +#define PCIECFG_REFCLK_ENABLE (1 << 10) +#define PCIECFG_PLL_ENABLE (1 << 9) +#define PCIECFG_PCLK_ENABLE (1 << 8) +#define PCIECFG_RISRCREN (1 << 4) +#define PCIECFG_MODE_TX_DRV_EN (1 << 3) +#define PCIECFG_CISRREN (1 << 2) +#define PCIECFG_MACRO_ENABLE (1 << 0) + +#define NOCCFG 0x40 +#define NOCCFG_ENABLE_CLK_PCIE (1 << 4) +#define NOCCFG_POWER_PCIE_IDLEACK (1 << 3) +#define NOCCFG_POWER_PCIE_IDLE (1 << 2) +#define NOCCFG_POWER_PCIE_IDLEREQ (1 << 1) + +#define PHY_STATUS 0x118 +#define PHY_COSPLLLOCK (1 << 0) + +#define ARTPEC6_CPU_TO_BUS_ADDR 0x0fffffff + +static int artpec6_pcie_establish_link(struct pcie_port *pp) +{ + struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pp); + u32 val; + unsigned int retries; + + /* Hold DW core in reset */ + regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val |= PCIECFG_CORE_RESET_REQ; + regmap_write(artpec6_pcie->regmap, PCIECFG, val); + + regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val |= PCIECFG_RISRCREN | /* Receiver term. 50 Ohm */ + PCIECFG_MODE_TX_DRV_EN | + PCIECFG_CISRREN | /* Reference clock term. 100 Ohm */ + PCIECFG_MACRO_ENABLE; + val |= PCIECFG_REFCLK_ENABLE; + val &= ~PCIECFG_DBG_OEN; + val &= ~PCIECFG_CLKREQ_B; + regmap_write(artpec6_pcie->regmap, PCIECFG, val); + usleep_range(5000, 6000); + + regmap_read(artpec6_pcie->regmap, NOCCFG, &val); + val |= NOCCFG_ENABLE_CLK_PCIE; + regmap_write(artpec6_pcie->regmap, NOCCFG, val); + usleep_range(20, 30); + + regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val |= PCIECFG_PCLK_ENABLE | PCIECFG_PLL_ENABLE; + regmap_write(artpec6_pcie->regmap, PCIECFG, val); + usleep_range(6000, 7000); + + regmap_read(artpec6_pcie->regmap, NOCCFG, &val); + val &= ~NOCCFG_POWER_PCIE_IDLEREQ; + regmap_write(artpec6_pcie->regmap, NOCCFG, val); + + retries = 50; + do { + usleep_range(1000, 2000); + regmap_read(artpec6_pcie->regmap, NOCCFG, &val); + retries--; + } while (retries && + (val & (NOCCFG_POWER_PCIE_IDLEACK | NOCCFG_POWER_PCIE_IDLE))); + + retries = 50; + do { + usleep_range(1000, 2000); + val = readl(artpec6_pcie->phy_base + PHY_STATUS); + retries--; + } while (retries && !(val & PHY_COSPLLLOCK)); + + /* Take DW core out of reset */ + regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val &= ~PCIECFG_CORE_RESET_REQ; + regmap_write(artpec6_pcie->regmap, PCIECFG, val); + usleep_range(100, 200); + + /* + * Enable writing to config regs. This is required as the Synopsys + * driver changes the class code. That register needs DBI write enable. + */ + writel(DBI_RO_WR_EN, pp->dbi_base + MISC_CONTROL_1_OFF); + + pp->io_base &= ARTPEC6_CPU_TO_BUS_ADDR; + pp->mem_base &= ARTPEC6_CPU_TO_BUS_ADDR; + pp->cfg0_base &= ARTPEC6_CPU_TO_BUS_ADDR; + pp->cfg1_base &= ARTPEC6_CPU_TO_BUS_ADDR; + + /* setup root complex */ + dw_pcie_setup_rc(pp); + + /* assert LTSSM enable */ + regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val |= PCIECFG_LTSSM_ENABLE; + regmap_write(artpec6_pcie->regmap, PCIECFG, val); + + /* check if the link is up or not */ + if (!dw_pcie_wait_for_link(pp)) + return 0; + + dev_dbg(pp->dev, "DEBUG_R0: 0x%08x, DEBUG_R1: 0x%08x\n", + readl(pp->dbi_base + PCIE_PHY_DEBUG_R0), + readl(pp->dbi_base + PCIE_PHY_DEBUG_R1)); + + return -ETIMEDOUT; +} + +static void artpec6_pcie_enable_interrupts(struct pcie_port *pp) +{ + if (IS_ENABLED(CONFIG_PCI_MSI)) + dw_pcie_msi_init(pp); +} + +static void artpec6_pcie_host_init(struct pcie_port *pp) +{ + artpec6_pcie_establish_link(pp); + artpec6_pcie_enable_interrupts(pp); +} + +static int artpec6_pcie_link_up(struct pcie_port *pp) +{ + u32 rc; + + /* + * Get status from Synopsys IP + * link is debug bit 36, debug register 1 starts at bit 32 + */ + rc = readl(pp->dbi_base + PCIE_PHY_DEBUG_R1) & (0x1 << (36 - 32)); + if (rc) + return 1; + + return 0; +} + +static struct pcie_host_ops artpec6_pcie_host_ops = { + .link_up = artpec6_pcie_link_up, + .host_init = artpec6_pcie_host_init, +}; + +static irqreturn_t artpec6_pcie_msi_handler(int irq, void *arg) +{ + struct pcie_port *pp = arg; + + return dw_handle_msi_irq(pp); +} + +static int __init artpec6_add_pcie_port(struct pcie_port *pp, + struct platform_device *pdev) +{ + int ret; + + if (IS_ENABLED(CONFIG_PCI_MSI)) { + pp->msi_irq = platform_get_irq_byname(pdev, "msi"); + if (pp->msi_irq <= 0) { + dev_err(&pdev->dev, "failed to get MSI irq\n"); + return -ENODEV; + } + + ret = devm_request_irq(&pdev->dev, pp->msi_irq, + artpec6_pcie_msi_handler, + IRQF_SHARED | IRQF_NO_THREAD, + "artpec6-pcie-msi", pp); + if (ret) { + dev_err(&pdev->dev, "failed to request MSI irq\n"); + return ret; + } + } + + pp->root_bus_nr = -1; + pp->ops = &artpec6_pcie_host_ops; + + ret = dw_pcie_host_init(pp); + if (ret) { + dev_err(&pdev->dev, "failed to initialize host\n"); + return ret; + } + + return 0; +} + +static int artpec6_pcie_probe(struct platform_device *pdev) +{ + struct artpec6_pcie *artpec6_pcie; + struct pcie_port *pp; + struct resource *dbi_base; + struct resource *phy_base; + int ret; + + artpec6_pcie = devm_kzalloc(&pdev->dev, sizeof(*artpec6_pcie), + GFP_KERNEL); + if (!artpec6_pcie) + return -ENOMEM; + + pp = &artpec6_pcie->pp; + pp->dev = &pdev->dev; + + dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); + pp->dbi_base = devm_ioremap_resource(&pdev->dev, dbi_base); + if (IS_ERR(pp->dbi_base)) + return PTR_ERR(pp->dbi_base); + + phy_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "phy"); + artpec6_pcie->phy_base = devm_ioremap_resource(&pdev->dev, phy_base); + if (IS_ERR(artpec6_pcie->phy_base)) + return PTR_ERR(artpec6_pcie->phy_base); + + artpec6_pcie->regmap = + syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "axis,syscon-pcie"); + if (IS_ERR(artpec6_pcie->regmap)) + return PTR_ERR(artpec6_pcie->regmap); + + ret = artpec6_add_pcie_port(pp, pdev); + if (ret < 0) + return ret; + + platform_set_drvdata(pdev, artpec6_pcie); + return 0; +} + +static const struct of_device_id artpec6_pcie_of_match[] = { + { .compatible = "axis,artpec6-pcie", }, + {}, +}; + +static struct platform_driver artpec6_pcie_driver = { + .probe = artpec6_pcie_probe, + .driver = { + .name = "artpec6-pcie", + .of_match_table = artpec6_pcie_of_match, + }, +}; +builtin_platform_driver(artpec6_pcie_driver); diff --git a/drivers/pci/host/pcie-designware-plat.c b/drivers/pci/host/pcie-designware-plat.c index b3500994d08a..c8079dc81c10 100644 --- a/drivers/pci/host/pcie-designware-plat.c +++ b/drivers/pci/host/pcie-designware-plat.c @@ -14,7 +14,7 @@ #include <linux/gpio.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of_gpio.h> #include <linux/pci.h> #include <linux/platform_device.h> @@ -121,7 +121,6 @@ static const struct of_device_id dw_plat_pcie_of_match[] = { { .compatible = "snps,dw-pcie", }, {}, }; -MODULE_DEVICE_TABLE(of, dw_plat_pcie_of_match); static struct platform_driver dw_plat_pcie_driver = { .driver = { @@ -130,9 +129,4 @@ static struct platform_driver dw_plat_pcie_driver = { }, .probe = dw_plat_pcie_probe, }; - -module_platform_driver(dw_plat_pcie_driver); - -MODULE_AUTHOR("Joao Pinto <Joao.Pinto@synopsys.com>"); -MODULE_DESCRIPTION("Synopsys PCIe host controller glue platform driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(dw_plat_pcie_driver); diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index aafd766546f3..12afce19890b 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -452,6 +452,10 @@ int dw_pcie_host_init(struct pcie_port *pp) if (ret) return ret; + ret = devm_request_pci_bus_resources(&pdev->dev, &res); + if (ret) + goto error; + /* Get the I/O and memory ranges from DT */ resource_list_for_each_entry(win, &res) { switch (resource_type(win->res)) { @@ -461,11 +465,9 @@ int dw_pcie_host_init(struct pcie_port *pp) pp->io_size = resource_size(pp->io); pp->io_bus_addr = pp->io->start - win->offset; ret = pci_remap_iospace(pp->io, pp->io_base); - if (ret) { + if (ret) dev_warn(pp->dev, "error %d: failed to map resource %pR\n", ret, pp->io); - continue; - } break; case IORESOURCE_MEM: pp->mem = win->res; @@ -483,8 +485,6 @@ int dw_pcie_host_init(struct pcie_port *pp) case IORESOURCE_BUS: pp->busn = win->res; break; - default: - continue; } } @@ -493,7 +493,8 @@ int dw_pcie_host_init(struct pcie_port *pp) resource_size(pp->cfg)); if (!pp->dbi_base) { dev_err(pp->dev, "error with ioremap\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } } @@ -504,7 +505,8 @@ int dw_pcie_host_init(struct pcie_port *pp) pp->cfg0_size); if (!pp->va_cfg0_base) { dev_err(pp->dev, "error with ioremap in function\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } } @@ -513,7 +515,8 @@ int dw_pcie_host_init(struct pcie_port *pp) pp->cfg1_size); if (!pp->va_cfg1_base) { dev_err(pp->dev, "error with ioremap\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } } @@ -528,7 +531,8 @@ int dw_pcie_host_init(struct pcie_port *pp) &dw_pcie_msi_chip); if (!pp->irq_domain) { dev_err(pp->dev, "irq domain init failed\n"); - return -ENXIO; + ret = -ENXIO; + goto error; } for (i = 0; i < MAX_MSI_IRQS; i++) @@ -536,7 +540,7 @@ int dw_pcie_host_init(struct pcie_port *pp) } else { ret = pp->ops->msi_host_init(pp, &dw_pcie_msi_chip); if (ret < 0) - return ret; + goto error; } } @@ -552,8 +556,10 @@ int dw_pcie_host_init(struct pcie_port *pp) } else bus = pci_scan_root_bus(pp->dev, pp->root_bus_nr, &dw_pcie_ops, pp, &res); - if (!bus) - return -ENOMEM; + if (!bus) { + ret = -ENOMEM; + goto error; + } if (pp->ops->scan_bus) pp->ops->scan_bus(pp); @@ -571,6 +577,10 @@ int dw_pcie_host_init(struct pcie_port *pp) pci_bus_add_devices(bus); return 0; + +error: + pci_free_resource_list(&res); + return ret; } static int dw_pcie_rd_other_conf(struct pcie_port *pp, struct pci_bus *bus, diff --git a/drivers/pci/host/pcie-hisi.c b/drivers/pci/host/pcie-hisi.c index 3e98d4edae2d..7ee9dfcc45fb 100644 --- a/drivers/pci/host/pcie-hisi.c +++ b/drivers/pci/host/pcie-hisi.c @@ -12,7 +12,7 @@ * published by the Free Software Foundation. */ #include <linux/interrupt.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/mfd/syscon.h> #include <linux/of_address.h> #include <linux/of_pci.h> @@ -235,9 +235,6 @@ static const struct of_device_id hisi_pcie_of_match[] = { {}, }; - -MODULE_DEVICE_TABLE(of, hisi_pcie_of_match); - static struct platform_driver hisi_pcie_driver = { .probe = hisi_pcie_probe, .driver = { @@ -245,10 +242,4 @@ static struct platform_driver hisi_pcie_driver = { .of_match_table = hisi_pcie_of_match, }, }; - -module_platform_driver(hisi_pcie_driver); - -MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>"); -MODULE_AUTHOR("Dacai Zhu <zhudacai@hisilicon.com>"); -MODULE_AUTHOR("Gabriele Paoloni <gabriele.paoloni@huawei.com>"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(hisi_pcie_driver); diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c index a576aeeb22da..e167b2f0098d 100644 --- a/drivers/pci/host/pcie-iproc.c +++ b/drivers/pci/host/pcie-iproc.c @@ -462,6 +462,10 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) if (!pcie || !pcie->dev || !pcie->base) return -EINVAL; + ret = devm_request_pci_bus_resources(pcie->dev, res); + if (ret) + return ret; + ret = phy_init(pcie->phy); if (ret) { dev_err(pcie->dev, "unable to initialize PCIe PHY\n"); diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index 35092188039b..65db7a221509 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -7,6 +7,8 @@ * arch/sh/drivers/pci/ops-sh7786.c * Copyright (C) 2009 - 2011 Paul Mundt * + * Author: Phil Edworthy <phil.edworthy@renesas.com> + * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. @@ -18,7 +20,7 @@ #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/msi.h> #include <linux/of_address.h> #include <linux/of_irq.h> @@ -936,12 +938,6 @@ static const struct of_device_id rcar_pcie_of_match[] = { { .compatible = "renesas,pcie-r8a7795", .data = rcar_pcie_hw_init }, {}, }; -MODULE_DEVICE_TABLE(of, rcar_pcie_of_match); - -static void rcar_pcie_release_of_pci_ranges(struct rcar_pcie *pci) -{ - pci_free_resource_list(&pci->resources); -} static int rcar_pcie_parse_request_of_pci_ranges(struct rcar_pcie *pci) { @@ -955,37 +951,25 @@ static int rcar_pcie_parse_request_of_pci_ranges(struct rcar_pcie *pci) if (err) return err; + err = devm_request_pci_bus_resources(dev, &pci->resources); + if (err) + goto out_release_res; + resource_list_for_each_entry(win, &pci->resources) { - struct resource *parent, *res = win->res; + struct resource *res = win->res; - switch (resource_type(res)) { - case IORESOURCE_IO: - parent = &ioport_resource; + if (resource_type(res) == IORESOURCE_IO) { err = pci_remap_iospace(res, iobase); - if (err) { + if (err) dev_warn(dev, "error %d: failed to map resource %pR\n", err, res); - continue; - } - break; - case IORESOURCE_MEM: - parent = &iomem_resource; - break; - - case IORESOURCE_BUS: - default: - continue; } - - err = devm_request_resource(dev, parent, res); - if (err) - goto out_release_res; } return 0; out_release_res: - rcar_pcie_release_of_pci_ranges(pci); + pci_free_resource_list(&pci->resources); return err; } @@ -1073,8 +1057,4 @@ static struct platform_driver rcar_pcie_driver = { }, .probe = rcar_pcie_probe, }; -module_platform_driver(rcar_pcie_driver); - -MODULE_AUTHOR("Phil Edworthy <phil.edworthy@renesas.com>"); -MODULE_DESCRIPTION("Renesas R-Car PCIe driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(rcar_pcie_driver); diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c index 3479d30e2be8..0b597d9190b4 100644 --- a/drivers/pci/host/pcie-xilinx-nwl.c +++ b/drivers/pci/host/pcie-xilinx-nwl.c @@ -825,27 +825,33 @@ static int nwl_pcie_probe(struct platform_device *pdev) err = of_pci_get_host_bridge_resources(node, 0, 0xff, &res, &iobase); if (err) { - pr_err("Getting bridge resources failed\n"); + dev_err(pcie->dev, "Getting bridge resources failed\n"); return err; } + err = devm_request_pci_bus_resources(pcie->dev, &res); + if (err) + goto error; + err = nwl_pcie_init_irq_domain(pcie); if (err) { dev_err(pcie->dev, "Failed creating IRQ Domain\n"); - return err; + goto error; } bus = pci_create_root_bus(&pdev->dev, pcie->root_busno, &nwl_pcie_ops, pcie, &res); - if (!bus) - return -ENOMEM; + if (!bus) { + err = -ENOMEM; + goto error; + } if (IS_ENABLED(CONFIG_PCI_MSI)) { err = nwl_pcie_enable_msi(pcie, bus); if (err < 0) { dev_err(&pdev->dev, "failed to enable MSI support: %d\n", err); - return err; + goto error; } } pci_scan_child_bus(bus); @@ -855,6 +861,10 @@ static int nwl_pcie_probe(struct platform_device *pdev) pci_bus_add_devices(bus); platform_set_drvdata(pdev, pcie); return 0; + +error: + pci_free_resource_list(&res); + return err; } static int nwl_pcie_remove(struct platform_device *pdev) diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c index 65f0fe0c2eaf..a30e01639557 100644 --- a/drivers/pci/host/pcie-xilinx.c +++ b/drivers/pci/host/pcie-xilinx.c @@ -550,7 +550,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) pcie_intc_node = of_get_next_child(node, NULL); if (!pcie_intc_node) { dev_err(dev, "No PCIe Intc node found\n"); - return PTR_ERR(pcie_intc_node); + return -ENODEV; } port->irq_domain = irq_domain_add_linear(pcie_intc_node, 4, @@ -558,7 +558,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) port); if (!port->irq_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); - return PTR_ERR(port->irq_domain); + return -ENODEV; } /* Setup MSI */ @@ -569,7 +569,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) &xilinx_pcie_msi_chip); if (!port->irq_domain) { dev_err(dev, "Failed to get a MSI IRQ domain\n"); - return PTR_ERR(port->irq_domain); + return -ENODEV; } xilinx_pcie_enable_msi(port); @@ -660,7 +660,6 @@ static int xilinx_pcie_probe(struct platform_device *pdev) struct xilinx_pcie_port *port; struct device *dev = &pdev->dev; struct pci_bus *bus; - int err; resource_size_t iobase = 0; LIST_HEAD(res); @@ -694,10 +693,17 @@ static int xilinx_pcie_probe(struct platform_device *pdev) dev_err(dev, "Getting bridge resources failed\n"); return err; } + + err = devm_request_pci_bus_resources(dev, &res); + if (err) + goto error; + bus = pci_create_root_bus(&pdev->dev, 0, &xilinx_pcie_ops, port, &res); - if (!bus) - return -ENOMEM; + if (!bus) { + err = -ENOMEM; + goto error; + } #ifdef CONFIG_PCI_MSI xilinx_pcie_msi_chip.dev = port->dev; @@ -712,6 +718,10 @@ static int xilinx_pcie_probe(struct platform_device *pdev) platform_set_drvdata(pdev, port); return 0; + +error: + pci_free_resource_list(&res); + return err; } /** diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index fa49f9143b80..a46b585fae31 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -675,6 +675,9 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) if (bridge->is_going_away) return; + if (bridge->pci_dev) + pm_runtime_get_sync(&bridge->pci_dev->dev); + list_for_each_entry(slot, &bridge->slots, node) { struct pci_bus *bus = slot->bus; struct pci_dev *dev, *tmp; @@ -694,6 +697,9 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) disable_slot(slot); } } + + if (bridge->pci_dev) + pm_runtime_put(&bridge->pci_dev->dev); } /* diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 5c24e938042f..08e84d61874e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -546,6 +546,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) u8 present; bool link; + /* Interrupts cannot originate from a controller that's asleep */ + if (pdev->current_state == PCI_D3cold) + return IRQ_NONE; + /* * In order to guarantee that all interrupt events are * serviced, we need to re-inspect Slot Status register after diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a080f4496fe2..a02981efdad5 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -4,6 +4,7 @@ * * Copyright (C) 2003-2004 Intel * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + * Copyright (C) 2016 Christoph Hellwig. */ #include <linux/err.h> @@ -207,6 +208,12 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag); } +static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) +{ + return desc->mask_base + + desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; +} + /* * This internal function does not flush PCI writes to the device. * All users must ensure that they read from the device before either @@ -217,8 +224,6 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; - unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; if (pci_msi_ignore_mask) return 0; @@ -226,7 +231,7 @@ u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; if (flag) mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - writel(mask_bits, desc->mask_base + offset); + writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); return mask_bits; } @@ -284,8 +289,7 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) BUG_ON(dev->current_state != PCI_D0); if (entry->msi_attrib.is_msix) { - void __iomem *base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + void __iomem *base = pci_msix_desc_addr(entry); msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); @@ -315,9 +319,7 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) if (dev->current_state != PCI_D0) { /* Don't touch the hardware now */ } else if (entry->msi_attrib.is_msix) { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + void __iomem *base = pci_msix_desc_addr(entry); writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); @@ -567,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); entry->nvec_used = nvec; + entry->affinity = dev->irq_affinity; if (control & PCI_MSI_FLAGS_64BIT) entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; @@ -678,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msix_entry *entries, int nvec) { + const struct cpumask *mask = NULL; struct msi_desc *entry; - int i; + int cpu = -1, i; for (i = 0; i < nvec; i++) { + if (dev->irq_affinity) { + cpu = cpumask_next(cpu, dev->irq_affinity); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(dev->irq_affinity); + mask = cpumask_of(cpu); + } + entry = alloc_msi_entry(&dev->dev); if (!entry) { if (!i) @@ -694,10 +705,14 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; - entry->msi_attrib.entry_nr = entries[i].entry; + if (entries) + entry->msi_attrib.entry_nr = entries[i].entry; + else + entry->msi_attrib.entry_nr = i; entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; entry->nvec_used = 1; + entry->affinity = mask; list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); } @@ -712,13 +727,11 @@ static void msix_program_entries(struct pci_dev *dev, int i = 0; for_each_pci_msi_entry(entry, dev) { - int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - - entries[i].vector = entry->irq; - entry->masked = readl(entry->mask_base + offset); + if (entries) + entries[i++].vector = entry->irq; + entry->masked = readl(pci_msix_desc_addr(entry) + + PCI_MSIX_ENTRY_VECTOR_CTRL); msix_mask_irq(entry, 1); - i++; } } @@ -931,7 +944,7 @@ EXPORT_SYMBOL(pci_msix_vec_count); /** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function - * @entries: pointer to an array of MSI-X entries + * @entries: pointer to an array of MSI-X entries (optional) * @nvec: number of MSI-X irqs requested for allocation by device driver * * Setup the MSI-X capability structure of device function with the number @@ -951,22 +964,21 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) if (!pci_msi_supported(dev, nvec)) return -EINVAL; - if (!entries) - return -EINVAL; - nr_entries = pci_msix_vec_count(dev); if (nr_entries < 0) return nr_entries; if (nvec > nr_entries) return nr_entries; - /* Check for any invalid entries */ - for (i = 0; i < nvec; i++) { - if (entries[i].entry >= nr_entries) - return -EINVAL; /* invalid entry */ - for (j = i + 1; j < nvec; j++) { - if (entries[i].entry == entries[j].entry) - return -EINVAL; /* duplicate entry */ + if (entries) { + /* Check for any invalid entries */ + for (i = 0; i < nvec; i++) { + if (entries[i].entry >= nr_entries) + return -EINVAL; /* invalid entry */ + for (j = i + 1; j < nvec; j++) { + if (entries[i].entry == entries[j].entry) + return -EINVAL; /* duplicate entry */ + } } } WARN_ON(!!dev->msix_enabled); @@ -1026,19 +1038,8 @@ int pci_msi_enabled(void) } EXPORT_SYMBOL(pci_msi_enabled); -/** - * pci_enable_msi_range - configure device's MSI capability structure - * @dev: device to configure - * @minvec: minimal number of interrupts to configure - * @maxvec: maximum number of interrupts to configure - * - * This function tries to allocate a maximum possible number of interrupts in a - * range between @minvec and @maxvec. It returns a negative errno if an error - * occurs. If it succeeds, it returns the actual number of interrupts allocated - * and updates the @dev's irq member to the lowest new interrupt number; - * the other interrupt numbers allocated to this device are consecutive. - **/ -int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) +static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, + unsigned int flags) { int nvec; int rc; @@ -1061,25 +1062,85 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) nvec = pci_msi_vec_count(dev); if (nvec < 0) return nvec; - else if (nvec < minvec) + if (nvec < minvec) return -EINVAL; - else if (nvec > maxvec) + + if (nvec > maxvec) nvec = maxvec; - do { + for (;;) { + if (!(flags & PCI_IRQ_NOAFFINITY)) { + dev->irq_affinity = irq_create_affinity_mask(&nvec); + if (nvec < minvec) + return -ENOSPC; + } + rc = msi_capability_init(dev, nvec); - if (rc < 0) { + if (rc == 0) + return nvec; + + kfree(dev->irq_affinity); + dev->irq_affinity = NULL; + + if (rc < 0) return rc; - } else if (rc > 0) { - if (rc < minvec) + if (rc < minvec) + return -ENOSPC; + + nvec = rc; + } +} + +/** + * pci_enable_msi_range - configure device's MSI capability structure + * @dev: device to configure + * @minvec: minimal number of interrupts to configure + * @maxvec: maximum number of interrupts to configure + * + * This function tries to allocate a maximum possible number of interrupts in a + * range between @minvec and @maxvec. It returns a negative errno if an error + * occurs. If it succeeds, it returns the actual number of interrupts allocated + * and updates the @dev's irq member to the lowest new interrupt number; + * the other interrupt numbers allocated to this device are consecutive. + **/ +int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) +{ + return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY); +} +EXPORT_SYMBOL(pci_enable_msi_range); + +static int __pci_enable_msix_range(struct pci_dev *dev, + struct msix_entry *entries, int minvec, int maxvec, + unsigned int flags) +{ + int nvec = maxvec; + int rc; + + if (maxvec < minvec) + return -ERANGE; + + for (;;) { + if (!(flags & PCI_IRQ_NOAFFINITY)) { + dev->irq_affinity = irq_create_affinity_mask(&nvec); + if (nvec < minvec) return -ENOSPC; - nvec = rc; } - } while (rc); - return nvec; + rc = pci_enable_msix(dev, entries, nvec); + if (rc == 0) + return nvec; + + kfree(dev->irq_affinity); + dev->irq_affinity = NULL; + + if (rc < 0) + return rc; + if (rc < minvec) + return -ENOSPC; + + nvec = rc; + } } -EXPORT_SYMBOL(pci_enable_msi_range); /** * pci_enable_msix_range - configure device's MSI-X capability structure @@ -1097,28 +1158,101 @@ EXPORT_SYMBOL(pci_enable_msi_range); * with new allocated MSI-X interrupts. **/ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, - int minvec, int maxvec) + int minvec, int maxvec) { - int nvec = maxvec; - int rc; + return __pci_enable_msix_range(dev, entries, minvec, maxvec, + PCI_IRQ_NOAFFINITY); +} +EXPORT_SYMBOL(pci_enable_msix_range); - if (maxvec < minvec) - return -ERANGE; +/** + * pci_alloc_irq_vectors - allocate multiple IRQs for a device + * @dev: PCI device to operate on + * @min_vecs: minimum number of vectors required (must be >= 1) + * @max_vecs: maximum (desired) number of vectors + * @flags: flags or quirks for the allocation + * + * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI + * vectors if available, and fall back to a single legacy vector + * if neither is available. Return the number of vectors allocated, + * (which might be smaller than @max_vecs) if successful, or a negative + * error code on error. If less than @min_vecs interrupt vectors are + * available for @dev the function will fail with -ENOSPC. + * + * To get the Linux IRQ number used for a vector that can be passed to + * request_irq() use the pci_irq_vector() helper. + */ +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + int vecs = -ENOSPC; - do { - rc = pci_enable_msix(dev, entries, nvec); - if (rc < 0) { - return rc; - } else if (rc > 0) { - if (rc < minvec) - return -ENOSPC; - nvec = rc; + if (!(flags & PCI_IRQ_NOMSIX)) { + vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, + flags); + if (vecs > 0) + return vecs; + } + + if (!(flags & PCI_IRQ_NOMSI)) { + vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags); + if (vecs > 0) + return vecs; + } + + /* use legacy irq if allowed */ + if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1) + return 1; + return vecs; +} +EXPORT_SYMBOL(pci_alloc_irq_vectors); + +/** + * pci_free_irq_vectors - free previously allocated IRQs for a device + * @dev: PCI device to operate on + * + * Undoes the allocations and enabling in pci_alloc_irq_vectors(). + */ +void pci_free_irq_vectors(struct pci_dev *dev) +{ + pci_disable_msix(dev); + pci_disable_msi(dev); +} +EXPORT_SYMBOL(pci_free_irq_vectors); + +/** + * pci_irq_vector - return Linux IRQ number of a device vector + * @dev: PCI device to operate on + * @nr: device-relative interrupt vector index (0-based). + */ +int pci_irq_vector(struct pci_dev *dev, unsigned int nr) +{ + if (dev->msix_enabled) { + struct msi_desc *entry; + int i = 0; + + for_each_pci_msi_entry(entry, dev) { + if (i == nr) + return entry->irq; + i++; } - } while (rc); + WARN_ON_ONCE(1); + return -EINVAL; + } - return nvec; + if (dev->msi_enabled) { + struct msi_desc *entry = first_pci_msi_entry(dev); + + if (WARN_ON_ONCE(nr >= entry->nvec_used)) + return -EINVAL; + } else { + if (WARN_ON_ONCE(nr > 0)) + return -EINVAL; + } + + return dev->irq + nr; } -EXPORT_SYMBOL(pci_enable_msix_range); +EXPORT_SYMBOL(pci_irq_vector); struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d7ffd66814bb..e39a67c8ef39 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -777,7 +777,7 @@ static int pci_pm_suspend_noirq(struct device *dev) if (!pci_dev->state_saved) { pci_save_state(pci_dev); - if (!pci_has_subordinate(pci_dev)) + if (pci_power_manageable(pci_dev)) pci_prepare_to_sleep(pci_dev); } @@ -1144,7 +1144,6 @@ static int pci_pm_runtime_suspend(struct device *dev) return -ENOSYS; pci_dev->state_saved = false; - pci_dev->no_d3cold = false; error = pm->runtime_suspend(dev); if (error) { /* @@ -1161,8 +1160,6 @@ static int pci_pm_runtime_suspend(struct device *dev) return error; } - if (!pci_dev->d3cold_allowed) - pci_dev->no_d3cold = true; pci_fixup_device(pci_fixup_suspend, pci_dev); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index d319a9ca9b7b..bcd10c795284 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -406,6 +406,11 @@ static ssize_t d3cold_allowed_store(struct device *dev, return -EINVAL; pdev->d3cold_allowed = !!val; + if (pdev->d3cold_allowed) + pci_d3cold_enable(pdev); + else + pci_d3cold_disable(pdev); + pm_runtime_resume(dev); return count; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index badbddc683f0..aab9d5115a5f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -7,8 +7,10 @@ * Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz> */ +#include <linux/acpi.h> #include <linux/kernel.h> #include <linux/delay.h> +#include <linux/dmi.h> #include <linux/init.h> #include <linux/of.h> #include <linux/of_pci.h> @@ -25,7 +27,9 @@ #include <linux/device.h> #include <linux/pm_runtime.h> #include <linux/pci_hotplug.h> +#include <linux/vmalloc.h> #include <asm/setup.h> +#include <asm/dma.h> #include <linux/aer.h> #include "pci.h" @@ -81,6 +85,9 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; +#define DEFAULT_HOTPLUG_BUS_SIZE 1 +unsigned long pci_hotplug_bus_size = DEFAULT_HOTPLUG_BUS_SIZE; + enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_DEFAULT; /* @@ -101,6 +108,21 @@ unsigned int pcibios_max_latency = 255; /* If set, the PCIe ARI capability will not be used. */ static bool pcie_ari_disabled; +/* Disable bridge_d3 for all PCIe ports */ +static bool pci_bridge_d3_disable; +/* Force bridge_d3 for all PCIe ports */ +static bool pci_bridge_d3_force; + +static int __init pcie_port_pm_setup(char *str) +{ + if (!strcmp(str, "off")) + pci_bridge_d3_disable = true; + else if (!strcmp(str, "force")) + pci_bridge_d3_force = true; + return 1; +} +__setup("pcie_port_pm=", pcie_port_pm_setup); + /** * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children * @bus: pointer to PCI bus structure to search @@ -2156,6 +2178,164 @@ void pci_config_pm_runtime_put(struct pci_dev *pdev) } /** + * pci_bridge_d3_possible - Is it possible to put the bridge into D3 + * @bridge: Bridge to check + * + * This function checks if it is possible to move the bridge to D3. + * Currently we only allow D3 for recent enough PCIe ports. + */ +static bool pci_bridge_d3_possible(struct pci_dev *bridge) +{ + unsigned int year; + + if (!pci_is_pcie(bridge)) + return false; + + switch (pci_pcie_type(bridge)) { + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + if (pci_bridge_d3_disable) + return false; + if (pci_bridge_d3_force) + return true; + + /* + * It should be safe to put PCIe ports from 2015 or newer + * to D3. + */ + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && + year >= 2015) { + return true; + } + break; + } + + return false; +} + +static int pci_dev_check_d3cold(struct pci_dev *dev, void *data) +{ + bool *d3cold_ok = data; + bool no_d3cold; + + /* + * The device needs to be allowed to go D3cold and if it is wake + * capable to do so from D3cold. + */ + no_d3cold = dev->no_d3cold || !dev->d3cold_allowed || + (device_may_wakeup(&dev->dev) && !pci_pme_capable(dev, PCI_D3cold)) || + !pci_power_manageable(dev); + + *d3cold_ok = !no_d3cold; + + return no_d3cold; +} + +/* + * pci_bridge_d3_update - Update bridge D3 capabilities + * @dev: PCI device which is changed + * @remove: Is the device being removed + * + * Update upstream bridge PM capabilities accordingly depending on if the + * device PM configuration was changed or the device is being removed. The + * change is also propagated upstream. + */ +static void pci_bridge_d3_update(struct pci_dev *dev, bool remove) +{ + struct pci_dev *bridge; + bool d3cold_ok = true; + + bridge = pci_upstream_bridge(dev); + if (!bridge || !pci_bridge_d3_possible(bridge)) + return; + + pci_dev_get(bridge); + /* + * If the device is removed we do not care about its D3cold + * capabilities. + */ + if (!remove) + pci_dev_check_d3cold(dev, &d3cold_ok); + + if (d3cold_ok) { + /* + * We need to go through all children to find out if all of + * them can still go to D3cold. + */ + pci_walk_bus(bridge->subordinate, pci_dev_check_d3cold, + &d3cold_ok); + } + + if (bridge->bridge_d3 != d3cold_ok) { + bridge->bridge_d3 = d3cold_ok; + /* Propagate change to upstream bridges */ + pci_bridge_d3_update(bridge, false); + } + + pci_dev_put(bridge); +} + +/** + * pci_bridge_d3_device_changed - Update bridge D3 capabilities on change + * @dev: PCI device that was changed + * + * If a device is added or its PM configuration, such as is it allowed to + * enter D3cold, is changed this function updates upstream bridge PM + * capabilities accordingly. + */ +void pci_bridge_d3_device_changed(struct pci_dev *dev) +{ + pci_bridge_d3_update(dev, false); +} + +/** + * pci_bridge_d3_device_removed - Update bridge D3 capabilities on remove + * @dev: PCI device being removed + * + * Function updates upstream bridge PM capabilities based on other devices + * still left on the bus. + */ +void pci_bridge_d3_device_removed(struct pci_dev *dev) +{ + pci_bridge_d3_update(dev, true); +} + +/** + * pci_d3cold_enable - Enable D3cold for device + * @dev: PCI device to handle + * + * This function can be used in drivers to enable D3cold from the device + * they handle. It also updates upstream PCI bridge PM capabilities + * accordingly. + */ +void pci_d3cold_enable(struct pci_dev *dev) +{ + if (dev->no_d3cold) { + dev->no_d3cold = false; + pci_bridge_d3_device_changed(dev); + } +} +EXPORT_SYMBOL_GPL(pci_d3cold_enable); + +/** + * pci_d3cold_disable - Disable D3cold for device + * @dev: PCI device to handle + * + * This function can be used in drivers to disable D3cold from the device + * they handle. It also updates upstream PCI bridge PM capabilities + * accordingly. + */ +void pci_d3cold_disable(struct pci_dev *dev) +{ + if (!dev->no_d3cold) { + dev->no_d3cold = true; + pci_bridge_d3_device_changed(dev); + } +} +EXPORT_SYMBOL_GPL(pci_d3cold_disable); + +/** * pci_pm_init - Initialize PM functions of given PCI device * @dev: PCI device to handle. */ @@ -2189,6 +2369,7 @@ void pci_pm_init(struct pci_dev *dev) dev->pm_cap = pm; dev->d3_delay = PCI_PM_D3_WAIT; dev->d3cold_delay = PCI_PM_D3COLD_WAIT; + dev->bridge_d3 = pci_bridge_d3_possible(dev); dev->d3cold_allowed = true; dev->d1_support = false; @@ -3165,6 +3346,23 @@ int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) #endif } +/** + * pci_unmap_iospace - Unmap the memory mapped I/O space + * @res: resource to be unmapped + * + * Unmap the CPU virtual address @res from virtual address space. + * Only architectures that have memory mapped IO functions defined + * (and the PCI_IOBASE value defined) should call this function. + */ +void pci_unmap_iospace(struct resource *res) +{ +#if defined(PCI_IOBASE) && defined(CONFIG_MMU) + unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start; + + unmap_kernel_range(vaddr, resource_size(res)); +#endif +} + static void __pci_set_master(struct pci_dev *dev, bool enable) { u16 old_cmd, cmd; @@ -4755,6 +4953,7 @@ static DEFINE_SPINLOCK(resource_alignment_lock); static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) { int seg, bus, slot, func, align_order, count; + unsigned short vendor, device, subsystem_vendor, subsystem_device; resource_size_t align = 0; char *p; @@ -4768,28 +4967,55 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) } else { align_order = -1; } - if (sscanf(p, "%x:%x:%x.%x%n", - &seg, &bus, &slot, &func, &count) != 4) { - seg = 0; - if (sscanf(p, "%x:%x.%x%n", - &bus, &slot, &func, &count) != 3) { - /* Invalid format */ - printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", - p); + if (strncmp(p, "pci:", 4) == 0) { + /* PCI vendor/device (subvendor/subdevice) ids are specified */ + p += 4; + if (sscanf(p, "%hx:%hx:%hx:%hx%n", + &vendor, &device, &subsystem_vendor, &subsystem_device, &count) != 4) { + if (sscanf(p, "%hx:%hx%n", &vendor, &device, &count) != 2) { + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: pci:%s\n", + p); + break; + } + subsystem_vendor = subsystem_device = 0; + } + p += count; + if ((!vendor || (vendor == dev->vendor)) && + (!device || (device == dev->device)) && + (!subsystem_vendor || (subsystem_vendor == dev->subsystem_vendor)) && + (!subsystem_device || (subsystem_device == dev->subsystem_device))) { + if (align_order == -1) + align = PAGE_SIZE; + else + align = 1 << align_order; + /* Found */ break; } } - p += count; - if (seg == pci_domain_nr(dev->bus) && - bus == dev->bus->number && - slot == PCI_SLOT(dev->devfn) && - func == PCI_FUNC(dev->devfn)) { - if (align_order == -1) - align = PAGE_SIZE; - else - align = 1 << align_order; - /* Found */ - break; + else { + if (sscanf(p, "%x:%x:%x.%x%n", + &seg, &bus, &slot, &func, &count) != 4) { + seg = 0; + if (sscanf(p, "%x:%x.%x%n", + &bus, &slot, &func, &count) != 3) { + /* Invalid format */ + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", + p); + break; + } + } + p += count; + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + if (align_order == -1) + align = PAGE_SIZE; + else + align = 1 << align_order; + /* Found */ + break; + } } if (*p != ';' && *p != ',') { /* End of param or invalid format */ @@ -4897,7 +5123,7 @@ static ssize_t pci_resource_alignment_store(struct bus_type *bus, return pci_set_resource_alignment_param(buf, count); } -BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show, +static BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show, pci_resource_alignment_store); static int __init pci_resource_alignment_sysfs_init(void) @@ -4923,7 +5149,7 @@ int pci_get_new_domain_nr(void) } #ifdef CONFIG_PCI_DOMAINS_GENERIC -void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent) +static int of_pci_bus_find_domain_nr(struct device *parent) { static int use_dt_domains = -1; int domain = -1; @@ -4967,7 +5193,13 @@ void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent) domain = -1; } - bus->domain_nr = domain; + return domain; +} + +int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) +{ + return acpi_disabled ? of_pci_bus_find_domain_nr(parent) : + acpi_pci_bus_find_domain_nr(bus); } #endif #endif @@ -5021,6 +5253,11 @@ static int __init pci_setup(char *str) pci_hotplug_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "hpmemsize=", 10)) { pci_hotplug_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "hpbussize=", 10)) { + pci_hotplug_bus_size = + simple_strtoul(str + 10, &str, 0); + if (pci_hotplug_bus_size > 0xff) + pci_hotplug_bus_size = DEFAULT_HOTPLUG_BUS_SIZE; } else if (!strncmp(str, "pcie_bus_tune_off", 17)) { pcie_bus_config = PCIE_BUS_TUNE_OFF; } else if (!strncmp(str, "pcie_bus_safe", 13)) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index a814bbb80fcb..9730c474b016 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -82,6 +82,8 @@ void pci_pm_init(struct pci_dev *dev); void pci_ea_init(struct pci_dev *dev); void pci_allocate_cap_save_buffers(struct pci_dev *dev); void pci_free_cap_save_buffers(struct pci_dev *dev); +void pci_bridge_d3_device_changed(struct pci_dev *dev); +void pci_bridge_d3_device_removed(struct pci_dev *dev); static inline void pci_wakeup_event(struct pci_dev *dev) { @@ -94,6 +96,15 @@ static inline bool pci_has_subordinate(struct pci_dev *pci_dev) return !!(pci_dev->subordinate); } +static inline bool pci_power_manageable(struct pci_dev *pci_dev) +{ + /* + * Currently we allow normal PCI devices and PCI bridges transition + * into D3 if their bridge_d3 is set. + */ + return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3; +} + struct pci_vpd_ops { ssize_t (*read)(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t (*write)(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 22ca6412bd15..7fcea75afa4c 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -83,7 +83,7 @@ config PCIE_PME depends on PCIEPORTBUS && PM config PCIE_DPC - tristate "PCIe Downstream Port Containment support" + bool "PCIe Downstream Port Containment support" depends on PCIEPORTBUS default n help @@ -92,6 +92,3 @@ config PCIE_DPC will be handled by the DPC driver. If your system doesn't have this capability or you do not want to use this feature, it is safe to answer N. - - To compile this driver as a module, choose M here: the module - will be called pcie-dpc. diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 2dfe7fdb77e7..0ec649d961d7 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -139,7 +139,7 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) static void pcie_set_clkpm(struct pcie_link_state *link, int enable) { /* Don't enable Clock PM if the link is not Clock PM capable */ - if (!link->clkpm_capable && enable) + if (!link->clkpm_capable) enable = 0; /* Need nothing if the specified equals to current state */ if (link->clkpm_enabled == enable) diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c index ab552f1bc08f..250f87861786 100644 --- a/drivers/pci/pcie/pcie-dpc.c +++ b/drivers/pci/pcie/pcie-dpc.c @@ -15,8 +15,8 @@ struct dpc_dev { struct pcie_device *dev; - struct work_struct work; - int cap_pos; + struct work_struct work; + int cap_pos; }; static void dpc_wait_link_inactive(struct pci_dev *pdev) @@ -89,7 +89,7 @@ static int dpc_probe(struct pcie_device *dev) int status; u16 ctl, cap; - dpc = kzalloc(sizeof(*dpc), GFP_KERNEL); + dpc = devm_kzalloc(&dev->device, sizeof(*dpc), GFP_KERNEL); if (!dpc) return -ENOMEM; @@ -98,11 +98,12 @@ static int dpc_probe(struct pcie_device *dev) INIT_WORK(&dpc->work, interrupt_event_handler); set_service_data(dev, dpc); - status = request_irq(dev->irq, dpc_irq, IRQF_SHARED, "pcie-dpc", dpc); + status = devm_request_irq(&dev->device, dev->irq, dpc_irq, IRQF_SHARED, + "pcie-dpc", dpc); if (status) { dev_warn(&dev->device, "request IRQ%d failed: %d\n", dev->irq, status); - goto out; + return status; } pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); @@ -117,9 +118,6 @@ static int dpc_probe(struct pcie_device *dev) FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), (cap >> 8) & 0xf, FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE)); return status; - out: - kfree(dpc); - return status; } static void dpc_remove(struct pcie_device *dev) @@ -131,14 +129,11 @@ static void dpc_remove(struct pcie_device *dev) pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN); pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); - - free_irq(dev->irq, dpc); - kfree(dpc); } static struct pcie_port_service_driver dpcdriver = { .name = "dpc", - .port_type = PCI_EXP_TYPE_ROOT_PORT | PCI_EXP_TYPE_DOWNSTREAM, + .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, .remove = dpc_remove, diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 32d4d0a3d20e..e9270b4026f3 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/pm.h> +#include <linux/pm_runtime.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/pcieport_if.h> @@ -342,6 +343,8 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq) return retval; } + pm_runtime_no_callbacks(device); + return 0; } diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index be35da2e105e..70d7ad8c6d17 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -93,6 +93,26 @@ static int pcie_port_resume_noirq(struct device *dev) return 0; } +static int pcie_port_runtime_suspend(struct device *dev) +{ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static int pcie_port_runtime_resume(struct device *dev) +{ + return 0; +} + +static int pcie_port_runtime_idle(struct device *dev) +{ + /* + * Assume the PCI core has set bridge_d3 whenever it thinks the port + * should be good to go to D3. Everything else, including moving + * the port to D3, is handled by the PCI core. + */ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + static const struct dev_pm_ops pcie_portdrv_pm_ops = { .suspend = pcie_port_device_suspend, .resume = pcie_port_device_resume, @@ -101,6 +121,9 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = { .poweroff = pcie_port_device_suspend, .restore = pcie_port_device_resume, .resume_noirq = pcie_port_resume_noirq, + .runtime_suspend = pcie_port_runtime_suspend, + .runtime_resume = pcie_port_runtime_resume, + .runtime_idle = pcie_port_runtime_idle, }; #define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) @@ -134,16 +157,39 @@ static int pcie_portdrv_probe(struct pci_dev *dev, return status; pci_save_state(dev); + /* - * D3cold may not work properly on some PCIe port, so disable - * it by default. + * Prevent runtime PM if the port is advertising support for PCIe + * hotplug. Otherwise the BIOS hotplug SMI code might not be able + * to enumerate devices behind this port properly (the port is + * powered down preventing all config space accesses to the + * subordinate devices). We can't be sure for native PCIe hotplug + * either so prevent that as well. */ - dev->d3cold_allowed = false; + if (!dev->is_hotplug_bridge) { + /* + * Keep the port resumed 100ms to make sure things like + * config space accesses from userspace (lspci) will not + * cause the port to repeatedly suspend and resume. + */ + pm_runtime_set_autosuspend_delay(&dev->dev, 100); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_mark_last_busy(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + } + return 0; } static void pcie_portdrv_remove(struct pci_dev *dev) { + if (!dev->is_hotplug_bridge) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + pcie_port_device_remove(dev); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8e3ef720997d..93f280df3428 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -16,6 +16,7 @@ #include <linux/aer.h> #include <linux/acpi.h> #include <linux/irqdomain.h> +#include <linux/pm_runtime.h> #include "pci.h" #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ @@ -832,6 +833,12 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) u8 primary, secondary, subordinate; int broken = 0; + /* + * Make sure the bridge is powered on to be able to access config + * space of devices below it. + */ + pm_runtime_get_sync(&dev->dev); + pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses); primary = buses & 0xFF; secondary = (buses >> 8) & 0xFF; @@ -1012,6 +1019,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) out: pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl); + pm_runtime_put(&dev->dev); + return max; } EXPORT_SYMBOL(pci_scan_bridge); @@ -2077,6 +2086,15 @@ unsigned int pci_scan_child_bus(struct pci_bus *bus) } /* + * Make sure a hotplug bridge has at least the minimum requested + * number of buses. + */ + if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) { + if (max - bus->busn_res.start < pci_hotplug_bus_size - 1) + max = bus->busn_res.start + pci_hotplug_bus_size - 1; + } + + /* * We've scanned the bus and so we know all about what's on * the other side of any bridges that may be on this bus plus * any devices. @@ -2127,7 +2145,9 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, b->sysdata = sysdata; b->ops = ops; b->number = b->busn_res.start = bus; - pci_bus_assign_domain_nr(b, parent); +#ifdef CONFIG_PCI_DOMAINS_GENERIC + b->domain_nr = pci_bus_find_domain_nr(b, parent); +#endif b2 = pci_find_bus(pci_domain_nr(b), bus); if (b2) { /* If we already got to this bus through a different bridge, ignore it */ diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 3f155e78513f..2408abe4ee8c 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -231,7 +231,7 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) { struct pci_dev *dev = PDE_DATA(file_inode(file)); struct pci_filp_private *fpriv = file->private_data; - int i, ret; + int i, ret, write_combine; if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -245,9 +245,12 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) if (i >= PCI_ROM_RESOURCE) return -ENODEV; + if (fpriv->mmap_state == pci_mmap_mem) + write_combine = fpriv->write_combine; + else + write_combine = 0; ret = pci_mmap_page_range(dev, vma, - fpriv->mmap_state, - fpriv->write_combine); + fpriv->mmap_state, write_combine); if (ret < 0) return ret; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ee72ebe18f4b..37ff0158e45f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3189,13 +3189,15 @@ static void quirk_no_bus_reset(struct pci_dev *dev) } /* - * Atheros AR93xx chips do not behave after a bus reset. The device will - * throw a Link Down error on AER-capable systems and regardless of AER, - * config space of the device is never accessible again and typically - * causes the system to hang or reset when access is attempted. + * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset. + * The device will throw a Link Down error on AER-capable systems and + * regardless of AER, config space of the device is never accessible again + * and typically causes the system to hang or reset when access is attempted. * http://www.spinics.net/lists/linux-pci/msg34797.html */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0030, quirk_no_bus_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0032, quirk_no_bus_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003c, quirk_no_bus_reset); static void quirk_no_pm_reset(struct pci_dev *dev) { @@ -3711,6 +3713,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c59 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x917a, quirk_dma_func1_alias); +/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c78 */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9182, + quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0, quirk_dma_func1_alias); @@ -3747,6 +3752,9 @@ static const struct pci_device_id fixed_dma_alias_tbl[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x0285, PCI_VENDOR_ID_ADAPTEC2, 0x02bb), /* Adaptec 3405 */ .driver_data = PCI_DEVFN(1, 0) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x0285, + PCI_VENDOR_ID_ADAPTEC2, 0x02bc), /* Adaptec 3805 */ + .driver_data = PCI_DEVFN(1, 0) }, { 0 } }; @@ -4087,6 +4095,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_AMD, 0x7809, pci_quirk_amd_sb_acs }, { PCI_VENDOR_ID_SOLARFLARE, 0x0903, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_SOLARFLARE, 0x0923, pci_quirk_mf_endpoint_acs }, + { PCI_VENDOR_ID_SOLARFLARE, 0x0A03, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_INTEL, 0x10C6, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_INTEL, 0x10DB, pci_quirk_mf_endpoint_acs }, { PCI_VENDOR_ID_INTEL, 0x10DD, pci_quirk_mf_endpoint_acs }, diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 8982026637d5..d1ef7acf6930 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -96,6 +96,8 @@ static void pci_remove_bus_device(struct pci_dev *dev) dev->subordinate = NULL; } + pci_bridge_d3_device_removed(dev); + pci_destroy_dev(dev); } diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index d678c46e5f03..c74059e10a6d 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1428,6 +1428,74 @@ void pci_bus_assign_resources(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_assign_resources); +static void pci_claim_device_resources(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (!r->flags || r->parent) + continue; + + pci_claim_resource(dev, i); + } +} + +static void pci_claim_bridge_resources(struct pci_dev *dev) +{ + int i; + + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (!r->flags || r->parent) + continue; + + pci_claim_bridge_resource(dev, i); + } +} + +static void pci_bus_allocate_dev_resources(struct pci_bus *b) +{ + struct pci_dev *dev; + struct pci_bus *child; + + list_for_each_entry(dev, &b->devices, bus_list) { + pci_claim_device_resources(dev); + + child = dev->subordinate; + if (child) + pci_bus_allocate_dev_resources(child); + } +} + +static void pci_bus_allocate_resources(struct pci_bus *b) +{ + struct pci_bus *child; + + /* + * Carry out a depth-first search on the PCI bus + * tree to allocate bridge apertures. Read the + * programmed bridge bases and recursively claim + * the respective bridge resources. + */ + if (b->self) { + pci_read_bridge_bases(b); + pci_claim_bridge_resources(b->self); + } + + list_for_each_entry(child, &b->children, node) + pci_bus_allocate_resources(child); +} + +void pci_bus_claim_resources(struct pci_bus *b) +{ + pci_bus_allocate_resources(b); + pci_bus_allocate_dev_resources(b); +} +EXPORT_SYMBOL(pci_bus_claim_resources); + static void __pci_bridge_assign_resources(const struct pci_dev *bridge, struct list_head *add_head, struct list_head *fail_head) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 5f70fee59a94..d6ff5e82377d 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -1086,7 +1086,7 @@ out: return err; } -static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, +static void __ref pcifront_backend_changed(struct xenbus_device *xdev, enum xenbus_state be_state) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index b6e161f71b26..6c084b266651 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -380,3 +380,20 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, return ret; } EXPORT_SYMBOL(cros_ec_cmd_xfer); + +int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg) +{ + int ret; + + ret = cros_ec_cmd_xfer(ec_dev, msg); + if (ret < 0) { + dev_err(ec_dev->dev, "Command xfer error (err:%d)\n", ret); + } else if (msg->result != EC_RES_SUCCESS) { + dev_dbg(ec_dev->dev, "Command result (err: %d)\n", msg->result); + return -EPROTO; + } + + return ret; +} +EXPORT_SYMBOL(cros_ec_cmd_xfer_status); diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index bedb361746a0..c38a5b9733c8 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -60,6 +60,7 @@ #include <linux/delay.h> #include <linux/acpi.h> #include <linux/freezer.h> +#include <linux/kmod.h> #include <linux/kthread.h> #include <asm/page.h> diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index c182efc62c7b..80a566a00d04 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -74,6 +74,16 @@ config PWM_ATMEL_TCB To compile this driver as a module, choose M here: the module will be called pwm-atmel-tcb. +config PWM_BCM_IPROC + tristate "iProc PWM support" + depends on ARCH_BCM_IPROC + help + Generic PWM framework driver for Broadcom iProc PWM block. This + block is used in Broadcom iProc SoC's. + + To compile this driver as a module, choose M here: the module + will be called pwm-bcm-iproc. + config PWM_BCM_KONA tristate "Kona PWM support" depends on ARCH_BCM_MOBILE @@ -137,6 +147,13 @@ config PWM_CRC Generic PWM framework driver for Crystalcove (CRC) PMIC based PWM control. +config PWM_CROS_EC + tristate "ChromeOS EC PWM driver" + depends on MFD_CROS_EC + help + PWM driver for exposing a PWM attached to the ChromeOS Embedded + Controller. + config PWM_EP93XX tristate "Cirrus Logic EP93xx PWM support" depends on ARCH_EP93XX @@ -305,7 +322,7 @@ config PWM_PXA config PWM_RCAR tristate "Renesas R-Car PWM support" - depends on ARCH_RCAR_GEN1 || ARCH_RCAR_GEN2 || COMPILE_TEST + depends on ARCH_RENESAS || COMPILE_TEST depends on HAS_IOMEM help This driver exposes the PWM Timer controller found in Renesas @@ -362,6 +379,13 @@ config PWM_STI To compile this driver as a module, choose M here: the module will be called pwm-sti. +config PWM_STMPE + bool "STMPE expander PWM export" + depends on MFD_STMPE + help + This enables support for the PWMs found in the STMPE I/O + expanders. + config PWM_SUN4I tristate "Allwinner PWM support" depends on ARCH_SUNXI || COMPILE_TEST diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index dd35bc121a18..feef1dd29f73 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PWM_AB8500) += pwm-ab8500.o obj-$(CONFIG_PWM_ATMEL) += pwm-atmel.o obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM) += pwm-atmel-hlcdc.o obj-$(CONFIG_PWM_ATMEL_TCB) += pwm-atmel-tcb.o +obj-$(CONFIG_PWM_BCM_IPROC) += pwm-bcm-iproc.o obj-$(CONFIG_PWM_BCM_KONA) += pwm-bcm-kona.o obj-$(CONFIG_PWM_BCM2835) += pwm-bcm2835.o obj-$(CONFIG_PWM_BERLIN) += pwm-berlin.o @@ -11,6 +12,7 @@ obj-$(CONFIG_PWM_BFIN) += pwm-bfin.o obj-$(CONFIG_PWM_BRCMSTB) += pwm-brcmstb.o obj-$(CONFIG_PWM_CLPS711X) += pwm-clps711x.o obj-$(CONFIG_PWM_CRC) += pwm-crc.o +obj-$(CONFIG_PWM_CROS_EC) += pwm-cros-ec.o obj-$(CONFIG_PWM_EP93XX) += pwm-ep93xx.o obj-$(CONFIG_PWM_FSL_FTM) += pwm-fsl-ftm.o obj-$(CONFIG_PWM_IMG) += pwm-img.o @@ -34,6 +36,7 @@ obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o obj-$(CONFIG_PWM_SPEAR) += pwm-spear.o obj-$(CONFIG_PWM_STI) += pwm-sti.o +obj-$(CONFIG_PWM_STMPE) += pwm-stmpe.o obj-$(CONFIG_PWM_SUN4I) += pwm-sun4i.o obj-$(CONFIG_PWM_TEGRA) += pwm-tegra.o obj-$(CONFIG_PWM_TIECAP) += pwm-tiecap.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index ed337a8c34ab..0dbd29e287db 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -526,6 +526,33 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) EXPORT_SYMBOL_GPL(pwm_apply_state); /** + * pwm_capture() - capture and report a PWM signal + * @pwm: PWM device + * @result: structure to fill with capture result + * @timeout: time to wait, in milliseconds, before giving up on capture + * + * Returns: 0 on success or a negative error code on failure. + */ +int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, + unsigned long timeout) +{ + int err; + + if (!pwm || !pwm->chip->ops) + return -EINVAL; + + if (!pwm->chip->ops->capture) + return -ENOSYS; + + mutex_lock(&pwm_lock); + err = pwm->chip->ops->capture(pwm->chip, pwm, result, timeout); + mutex_unlock(&pwm_lock); + + return err; +} +EXPORT_SYMBOL_GPL(pwm_capture); + +/** * pwm_adjust_config() - adjust the current PWM config to the PWM arguments * @pwm: PWM device * diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 0e4bd4e8e582..e6b8b1b7e6ba 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -64,7 +64,8 @@ struct atmel_pwm_chip { void __iomem *base; unsigned int updated_pwms; - struct mutex isr_lock; /* ISR is cleared when read, ensure only one thread does that */ + /* ISR is cleared when read, ensure only one thread does that */ + struct mutex isr_lock; void (*config)(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long dty, unsigned long prd); @@ -271,6 +272,16 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) mutex_unlock(&atmel_pwm->isr_lock); atmel_pwm_writel(atmel_pwm, PWM_DIS, 1 << pwm->hwpwm); + /* + * Wait for the PWM channel disable operation to be effective before + * stopping the clock. + */ + timeout = jiffies + 2 * HZ; + + while ((atmel_pwm_readl(atmel_pwm, PWM_SR) & (1 << pwm->hwpwm)) && + time_before(jiffies, timeout)) + usleep_range(10, 100); + clk_disable(atmel_pwm->clk); } @@ -324,21 +335,14 @@ MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids); static inline const struct atmel_pwm_data * atmel_pwm_get_driver_data(struct platform_device *pdev) { - if (pdev->dev.of_node) { - const struct of_device_id *match; - - match = of_match_device(atmel_pwm_dt_ids, &pdev->dev); - if (!match) - return NULL; + const struct platform_device_id *id; - return match->data; - } else { - const struct platform_device_id *id; + if (pdev->dev.of_node) + return of_device_get_match_data(&pdev->dev); - id = platform_get_device_id(pdev); + id = platform_get_device_id(pdev); - return (struct atmel_pwm_data *)id->driver_data; - } + return (struct atmel_pwm_data *)id->driver_data; } static int atmel_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c new file mode 100644 index 000000000000..d961a8207b1c --- /dev/null +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -0,0 +1,277 @@ +/* + * Copyright (C) 2016 Broadcom + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> + +#define IPROC_PWM_CTRL_OFFSET 0x00 +#define IPROC_PWM_CTRL_TYPE_SHIFT(ch) (15 + (ch)) +#define IPROC_PWM_CTRL_POLARITY_SHIFT(ch) (8 + (ch)) +#define IPROC_PWM_CTRL_EN_SHIFT(ch) (ch) + +#define IPROC_PWM_PERIOD_OFFSET(ch) (0x04 + ((ch) << 3)) +#define IPROC_PWM_PERIOD_MIN 0x02 +#define IPROC_PWM_PERIOD_MAX 0xffff + +#define IPROC_PWM_DUTY_CYCLE_OFFSET(ch) (0x08 + ((ch) << 3)) +#define IPROC_PWM_DUTY_CYCLE_MIN 0x00 +#define IPROC_PWM_DUTY_CYCLE_MAX 0xffff + +#define IPROC_PWM_PRESCALE_OFFSET 0x24 +#define IPROC_PWM_PRESCALE_BITS 0x06 +#define IPROC_PWM_PRESCALE_SHIFT(ch) ((3 - (ch)) * \ + IPROC_PWM_PRESCALE_BITS) +#define IPROC_PWM_PRESCALE_MASK(ch) (IPROC_PWM_PRESCALE_MAX << \ + IPROC_PWM_PRESCALE_SHIFT(ch)) +#define IPROC_PWM_PRESCALE_MIN 0x00 +#define IPROC_PWM_PRESCALE_MAX 0x3f + +struct iproc_pwmc { + struct pwm_chip chip; + void __iomem *base; + struct clk *clk; +}; + +static inline struct iproc_pwmc *to_iproc_pwmc(struct pwm_chip *chip) +{ + return container_of(chip, struct iproc_pwmc, chip); +} + +static void iproc_pwmc_enable(struct iproc_pwmc *ip, unsigned int channel) +{ + u32 value; + + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + value |= 1 << IPROC_PWM_CTRL_EN_SHIFT(channel); + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + /* must be a 400 ns delay between clearing and setting enable bit */ + ndelay(400); +} + +static void iproc_pwmc_disable(struct iproc_pwmc *ip, unsigned int channel) +{ + u32 value; + + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + value &= ~(1 << IPROC_PWM_CTRL_EN_SHIFT(channel)); + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + /* must be a 400 ns delay between clearing and setting enable bit */ + ndelay(400); +} + +static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct iproc_pwmc *ip = to_iproc_pwmc(chip); + u64 tmp, multi, rate; + u32 value, prescale; + + rate = clk_get_rate(ip->clk); + + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + + if (value & BIT(IPROC_PWM_CTRL_EN_SHIFT(pwm->hwpwm))) + state->enabled = true; + else + state->enabled = false; + + if (value & BIT(IPROC_PWM_CTRL_POLARITY_SHIFT(pwm->hwpwm))) + state->polarity = PWM_POLARITY_NORMAL; + else + state->polarity = PWM_POLARITY_INVERSED; + + value = readl(ip->base + IPROC_PWM_PRESCALE_OFFSET); + prescale = value >> IPROC_PWM_PRESCALE_SHIFT(pwm->hwpwm); + prescale &= IPROC_PWM_PRESCALE_MAX; + + multi = NSEC_PER_SEC * (prescale + 1); + + value = readl(ip->base + IPROC_PWM_PERIOD_OFFSET(pwm->hwpwm)); + tmp = (value & IPROC_PWM_PERIOD_MAX) * multi; + state->period = div64_u64(tmp, rate); + + value = readl(ip->base + IPROC_PWM_DUTY_CYCLE_OFFSET(pwm->hwpwm)); + tmp = (value & IPROC_PWM_PERIOD_MAX) * multi; + state->duty_cycle = div64_u64(tmp, rate); +} + +static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + unsigned long prescale = IPROC_PWM_PRESCALE_MIN; + struct iproc_pwmc *ip = to_iproc_pwmc(chip); + u32 value, period, duty; + u64 rate; + + rate = clk_get_rate(ip->clk); + + /* + * Find period count, duty count and prescale to suit duty_cycle and + * period. This is done according to formulas described below: + * + * period_ns = 10^9 * (PRESCALE + 1) * PC / PWM_CLK_RATE + * duty_ns = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE + * + * PC = (PWM_CLK_RATE * period_ns) / (10^9 * (PRESCALE + 1)) + * DC = (PWM_CLK_RATE * duty_ns) / (10^9 * (PRESCALE + 1)) + */ + while (1) { + u64 value, div; + + div = NSEC_PER_SEC * (prescale + 1); + value = rate * state->period; + period = div64_u64(value, div); + value = rate * state->duty_cycle; + duty = div64_u64(value, div); + + if (period < IPROC_PWM_PERIOD_MIN || + duty < IPROC_PWM_DUTY_CYCLE_MIN) + return -EINVAL; + + if (period <= IPROC_PWM_PERIOD_MAX && + duty <= IPROC_PWM_DUTY_CYCLE_MAX) + break; + + /* Otherwise, increase prescale and recalculate counts */ + if (++prescale > IPROC_PWM_PRESCALE_MAX) + return -EINVAL; + } + + iproc_pwmc_disable(ip, pwm->hwpwm); + + /* Set prescale */ + value = readl(ip->base + IPROC_PWM_PRESCALE_OFFSET); + value &= ~IPROC_PWM_PRESCALE_MASK(pwm->hwpwm); + value |= prescale << IPROC_PWM_PRESCALE_SHIFT(pwm->hwpwm); + writel(value, ip->base + IPROC_PWM_PRESCALE_OFFSET); + + /* set period and duty cycle */ + writel(period, ip->base + IPROC_PWM_PERIOD_OFFSET(pwm->hwpwm)); + writel(duty, ip->base + IPROC_PWM_DUTY_CYCLE_OFFSET(pwm->hwpwm)); + + /* set polarity */ + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + + if (state->polarity == PWM_POLARITY_NORMAL) + value |= 1 << IPROC_PWM_CTRL_POLARITY_SHIFT(pwm->hwpwm); + else + value &= ~(1 << IPROC_PWM_CTRL_POLARITY_SHIFT(pwm->hwpwm)); + + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + if (state->enabled) + iproc_pwmc_enable(ip, pwm->hwpwm); + + return 0; +} + +static const struct pwm_ops iproc_pwm_ops = { + .apply = iproc_pwmc_apply, + .get_state = iproc_pwmc_get_state, +}; + +static int iproc_pwmc_probe(struct platform_device *pdev) +{ + struct iproc_pwmc *ip; + struct resource *res; + unsigned int i; + u32 value; + int ret; + + ip = devm_kzalloc(&pdev->dev, sizeof(*ip), GFP_KERNEL); + if (!ip) + return -ENOMEM; + + platform_set_drvdata(pdev, ip); + + ip->chip.dev = &pdev->dev; + ip->chip.ops = &iproc_pwm_ops; + ip->chip.base = -1; + ip->chip.npwm = 4; + ip->chip.of_xlate = of_pwm_xlate_with_flags; + ip->chip.of_pwm_n_cells = 3; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ip->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ip->base)) + return PTR_ERR(ip->base); + + ip->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ip->clk)) { + dev_err(&pdev->dev, "failed to get clock: %ld\n", + PTR_ERR(ip->clk)); + return PTR_ERR(ip->clk); + } + + ret = clk_prepare_enable(ip->clk); + if (ret < 0) { + dev_err(&pdev->dev, "failed to enable clock: %d\n", ret); + return ret; + } + + /* Set full drive and normal polarity for all channels */ + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + + for (i = 0; i < ip->chip.npwm; i++) { + value &= ~(1 << IPROC_PWM_CTRL_TYPE_SHIFT(i)); + value |= 1 << IPROC_PWM_CTRL_POLARITY_SHIFT(i); + } + + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + ret = pwmchip_add(&ip->chip); + if (ret < 0) { + dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); + clk_disable_unprepare(ip->clk); + } + + return ret; +} + +static int iproc_pwmc_remove(struct platform_device *pdev) +{ + struct iproc_pwmc *ip = platform_get_drvdata(pdev); + + clk_disable_unprepare(ip->clk); + + return pwmchip_remove(&ip->chip); +} + +static const struct of_device_id bcm_iproc_pwmc_dt[] = { + { .compatible = "brcm,iproc-pwm" }, + { }, +}; +MODULE_DEVICE_TABLE(of, bcm_iproc_pwmc_dt); + +static struct platform_driver iproc_pwmc_driver = { + .driver = { + .name = "bcm-iproc-pwm", + .of_match_table = bcm_iproc_pwmc_dt, + }, + .probe = iproc_pwmc_probe, + .remove = iproc_pwmc_remove, +}; +module_platform_driver(iproc_pwmc_driver); + +MODULE_AUTHOR("Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com>"); +MODULE_DESCRIPTION("Broadcom iProc PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c new file mode 100644 index 000000000000..99b9acc1a420 --- /dev/null +++ b/drivers/pwm/pwm-cros-ec.c @@ -0,0 +1,260 @@ +/* + * Copyright (C) 2016 Google, Inc + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2, as published by + * the Free Software Foundation. + * + * Expose a PWM controlled by the ChromeOS EC to the host processor. + */ + +#include <linux/module.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +/** + * struct cros_ec_pwm_device - Driver data for EC PWM + * + * @dev: Device node + * @ec: Pointer to EC device + * @chip: PWM controller chip + */ +struct cros_ec_pwm_device { + struct device *dev; + struct cros_ec_device *ec; + struct pwm_chip chip; +}; + +static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *c) +{ + return container_of(c, struct cros_ec_pwm_device, chip); +} + +static int cros_ec_pwm_set_duty(struct cros_ec_device *ec, u8 index, u16 duty) +{ + struct { + struct cros_ec_command msg; + struct ec_params_pwm_set_duty params; + } buf; + struct ec_params_pwm_set_duty *params = &buf.params; + struct cros_ec_command *msg = &buf.msg; + + memset(&buf, 0, sizeof(buf)); + + msg->version = 0; + msg->command = EC_CMD_PWM_SET_DUTY; + msg->insize = 0; + msg->outsize = sizeof(*params); + + params->duty = duty; + params->pwm_type = EC_PWM_TYPE_GENERIC; + params->index = index; + + return cros_ec_cmd_xfer_status(ec, msg); +} + +static int __cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index, + u32 *result) +{ + struct { + struct cros_ec_command msg; + union { + struct ec_params_pwm_get_duty params; + struct ec_response_pwm_get_duty resp; + }; + } buf; + struct ec_params_pwm_get_duty *params = &buf.params; + struct ec_response_pwm_get_duty *resp = &buf.resp; + struct cros_ec_command *msg = &buf.msg; + int ret; + + memset(&buf, 0, sizeof(buf)); + + msg->version = 0; + msg->command = EC_CMD_PWM_GET_DUTY; + msg->insize = sizeof(*params); + msg->outsize = sizeof(*resp); + + params->pwm_type = EC_PWM_TYPE_GENERIC; + params->index = index; + + ret = cros_ec_cmd_xfer_status(ec, msg); + if (result) + *result = msg->result; + if (ret < 0) + return ret; + + return resp->duty; +} + +static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index) +{ + return __cros_ec_pwm_get_duty(ec, index, NULL); +} + +static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); + int duty_cycle; + + /* The EC won't let us change the period */ + if (state->period != EC_PWM_MAX_DUTY) + return -EINVAL; + + /* + * EC doesn't separate the concept of duty cycle and enabled, but + * kernel does. Translate. + */ + duty_cycle = state->enabled ? state->duty_cycle : 0; + + return cros_ec_pwm_set_duty(ec_pwm->ec, pwm->hwpwm, duty_cycle); +} + +static void cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); + int ret; + + ret = cros_ec_pwm_get_duty(ec_pwm->ec, pwm->hwpwm); + if (ret < 0) { + dev_err(chip->dev, "error getting initial duty: %d\n", ret); + return; + } + + state->enabled = (ret > 0); + state->period = EC_PWM_MAX_DUTY; + + /* Note that "disabled" and "duty cycle == 0" are treated the same */ + state->duty_cycle = ret; +} + +static struct pwm_device * +cros_ec_pwm_xlate(struct pwm_chip *pc, const struct of_phandle_args *args) +{ + struct pwm_device *pwm; + + if (args->args[0] >= pc->npwm) + return ERR_PTR(-EINVAL); + + pwm = pwm_request_from_chip(pc, args->args[0], NULL); + if (IS_ERR(pwm)) + return pwm; + + /* The EC won't let us change the period */ + pwm->args.period = EC_PWM_MAX_DUTY; + + return pwm; +} + +static const struct pwm_ops cros_ec_pwm_ops = { + .get_state = cros_ec_pwm_get_state, + .apply = cros_ec_pwm_apply, + .owner = THIS_MODULE, +}; + +static int cros_ec_num_pwms(struct cros_ec_device *ec) +{ + int i, ret; + + /* The index field is only 8 bits */ + for (i = 0; i <= U8_MAX; i++) { + u32 result = 0; + + ret = __cros_ec_pwm_get_duty(ec, i, &result); + /* We want to parse EC protocol errors */ + if (ret < 0 && !(ret == -EPROTO && result)) + return ret; + + /* + * We look for SUCCESS, INVALID_COMMAND, or INVALID_PARAM + * responses; everything else is treated as an error. + */ + if (result == EC_RES_INVALID_COMMAND) + return -ENODEV; + else if (result == EC_RES_INVALID_PARAM) + return i; + else if (result) + return -EPROTO; + } + + return U8_MAX; +} + +static int cros_ec_pwm_probe(struct platform_device *pdev) +{ + struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent); + struct device *dev = &pdev->dev; + struct cros_ec_pwm_device *ec_pwm; + struct pwm_chip *chip; + int ret; + + if (!ec) { + dev_err(dev, "no parent EC device\n"); + return -EINVAL; + } + + ec_pwm = devm_kzalloc(dev, sizeof(*ec_pwm), GFP_KERNEL); + if (!ec_pwm) + return -ENOMEM; + chip = &ec_pwm->chip; + ec_pwm->ec = ec; + + /* PWM chip */ + chip->dev = dev; + chip->ops = &cros_ec_pwm_ops; + chip->of_xlate = cros_ec_pwm_xlate; + chip->of_pwm_n_cells = 1; + chip->base = -1; + ret = cros_ec_num_pwms(ec); + if (ret < 0) { + dev_err(dev, "Couldn't find PWMs: %d\n", ret); + return ret; + } + chip->npwm = ret; + dev_dbg(dev, "Probed %u PWMs\n", chip->npwm); + + ret = pwmchip_add(chip); + if (ret < 0) { + dev_err(dev, "cannot register PWM: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, ec_pwm); + + return ret; +} + +static int cros_ec_pwm_remove(struct platform_device *dev) +{ + struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev); + struct pwm_chip *chip = &ec_pwm->chip; + + return pwmchip_remove(chip); +} + +#ifdef CONFIG_OF +static const struct of_device_id cros_ec_pwm_of_match[] = { + { .compatible = "google,cros-ec-pwm" }, + {}, +}; +MODULE_DEVICE_TABLE(of, cros_ec_pwm_of_match); +#endif + +static struct platform_driver cros_ec_pwm_driver = { + .probe = cros_ec_pwm_probe, + .remove = cros_ec_pwm_remove, + .driver = { + .name = "cros-ec-pwm", + .of_match_table = of_match_ptr(cros_ec_pwm_of_match), + }, +}; +module_platform_driver(cros_ec_pwm_driver); + +MODULE_ALIAS("platform:cros-ec-pwm"); +MODULE_DESCRIPTION("ChromeOS EC PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 4d470c1a406a..a9b3cff96aac 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -25,6 +25,7 @@ struct lpc32xx_pwm_chip { }; #define PWM_ENABLE BIT(31) +#define PWM_PIN_LEVEL BIT(30) #define to_lpc32xx_pwm_chip(_chip) \ container_of(_chip, struct lpc32xx_pwm_chip, chip) @@ -103,6 +104,7 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) struct lpc32xx_pwm_chip *lpc32xx; struct resource *res; int ret; + u32 val; lpc32xx = devm_kzalloc(&pdev->dev, sizeof(*lpc32xx), GFP_KERNEL); if (!lpc32xx) @@ -128,6 +130,11 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) return ret; } + /* When PWM is disable, configure the output to the default value */ + val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + val &= ~PWM_PIN_LEVEL; + writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + platform_set_drvdata(pdev, lpc32xx); return 0; diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c index 7160e8ab38a4..3622f093490e 100644 --- a/drivers/pwm/pwm-lpss-pci.c +++ b/drivers/pwm/pwm-lpss-pci.c @@ -76,6 +76,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info}, { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info}, + { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info}, { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info}, diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 295b963dbddb..72c0bce5a75c 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -27,7 +27,6 @@ #define PWM_SW_UPDATE BIT(30) #define PWM_BASE_UNIT_SHIFT 8 #define PWM_ON_TIME_DIV_MASK 0x000000ff -#define PWM_DIVISION_CORRECTION 0x2 /* Size of each PWM register space if multiple */ #define PWM_SIZE 0x400 @@ -92,8 +91,8 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct pwm_lpss_chip *lpwm = to_lpwm(chip); - u8 on_time_div; - unsigned long c, base_unit_range; + unsigned long long on_time_div; + unsigned long c = lpwm->info->clk_rate, base_unit_range; unsigned long long base_unit, freq = NSEC_PER_SEC; u32 ctrl; @@ -101,21 +100,18 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm, /* * The equation is: - * base_unit = ((freq / c) * base_unit_range) + correction + * base_unit = round(base_unit_range * freq / c) */ base_unit_range = BIT(lpwm->info->base_unit_bits); - base_unit = freq * base_unit_range; + freq *= base_unit_range; - c = lpwm->info->clk_rate; - if (!c) - return -EINVAL; - - do_div(base_unit, c); - base_unit += PWM_DIVISION_CORRECTION; + base_unit = DIV_ROUND_CLOSEST_ULL(freq, c); if (duty_ns <= 0) duty_ns = 1; - on_time_div = 255 - (255 * duty_ns / period_ns); + on_time_div = 255ULL * duty_ns; + do_div(on_time_div, period_ns); + on_time_div = 255ULL - on_time_div; pm_runtime_get_sync(chip->dev); @@ -169,6 +165,7 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, const struct pwm_lpss_boardinfo *info) { struct pwm_lpss_chip *lpwm; + unsigned long c; int ret; lpwm = devm_kzalloc(dev, sizeof(*lpwm), GFP_KERNEL); @@ -180,6 +177,11 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, return ERR_CAST(lpwm->regs); lpwm->info = info; + + c = lpwm->info->clk_rate; + if (!c) + return ERR_PTR(-EINVAL); + lpwm->chip.dev = dev; lpwm->chip.ops = &pwm_lpss_ops; lpwm->chip.base = -1; diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 7d9cc9049522..ef89df1f7336 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -47,10 +47,14 @@ struct rockchip_pwm_regs { struct rockchip_pwm_data { struct rockchip_pwm_regs regs; unsigned int prescaler; + bool supports_polarity; const struct pwm_ops *ops; void (*set_enable)(struct pwm_chip *chip, - struct pwm_device *pwm, bool enable); + struct pwm_device *pwm, bool enable, + enum pwm_polarity polarity); + void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state); }; static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) @@ -59,7 +63,8 @@ static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) } static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, - struct pwm_device *pwm, bool enable) + struct pwm_device *pwm, bool enable, + enum pwm_polarity polarity) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN; @@ -75,15 +80,29 @@ static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, writel_relaxed(val, pc->base + pc->data->regs.ctrl); } +static void rockchip_pwm_get_state_v1(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN; + u32 val; + + val = readl_relaxed(pc->base + pc->data->regs.ctrl); + if ((val & enable_conf) == enable_conf) + state->enabled = true; +} + static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, - struct pwm_device *pwm, bool enable) + struct pwm_device *pwm, bool enable, + enum pwm_polarity polarity) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE | PWM_CONTINUOUS; u32 val; - if (pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED) + if (polarity == PWM_POLARITY_INVERSED) enable_conf |= PWM_DUTY_NEGATIVE | PWM_INACTIVE_POSITIVE; else enable_conf |= PWM_DUTY_POSITIVE | PWM_INACTIVE_NEGATIVE; @@ -98,13 +117,59 @@ static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, writel_relaxed(val, pc->base + pc->data->regs.ctrl); } +static void rockchip_pwm_get_state_v2(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE | + PWM_CONTINUOUS; + u32 val; + + val = readl_relaxed(pc->base + pc->data->regs.ctrl); + if ((val & enable_conf) != enable_conf) + return; + + state->enabled = true; + + if (!(val & PWM_DUTY_POSITIVE)) + state->polarity = PWM_POLARITY_INVERSED; +} + +static void rockchip_pwm_get_state(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + unsigned long clk_rate; + u64 tmp; + int ret; + + ret = clk_enable(pc->clk); + if (ret) + return; + + clk_rate = clk_get_rate(pc->clk); + + tmp = readl_relaxed(pc->base + pc->data->regs.period); + tmp *= pc->data->prescaler * NSEC_PER_SEC; + state->period = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate); + + tmp = readl_relaxed(pc->base + pc->data->regs.duty); + tmp *= pc->data->prescaler * NSEC_PER_SEC; + state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate); + + pc->data->get_state(chip, pwm, state); + + clk_disable(pc->clk); +} + static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); unsigned long period, duty; u64 clk_rate, div; - int ret; clk_rate = clk_get_rate(pc->clk); @@ -114,74 +179,72 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * default prescaler value for all practical clock rate values. */ div = clk_rate * period_ns; - do_div(div, pc->data->prescaler * NSEC_PER_SEC); - period = div; + period = DIV_ROUND_CLOSEST_ULL(div, + pc->data->prescaler * NSEC_PER_SEC); div = clk_rate * duty_ns; - do_div(div, pc->data->prescaler * NSEC_PER_SEC); - duty = div; - - ret = clk_enable(pc->clk); - if (ret) - return ret; + duty = DIV_ROUND_CLOSEST_ULL(div, pc->data->prescaler * NSEC_PER_SEC); writel(period, pc->base + pc->data->regs.period); writel(duty, pc->base + pc->data->regs.duty); - writel(0, pc->base + pc->data->regs.cntr); - - clk_disable(pc->clk); - - return 0; -} - -static int rockchip_pwm_set_polarity(struct pwm_chip *chip, - struct pwm_device *pwm, - enum pwm_polarity polarity) -{ - /* - * No action needed here because pwm->polarity will be set by the core - * and the core will only change polarity when the PWM is not enabled. - * We'll handle things in set_enable(). - */ return 0; } -static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + struct pwm_state curstate; + bool enabled; int ret; + pwm_get_state(pwm, &curstate); + enabled = curstate.enabled; + ret = clk_enable(pc->clk); if (ret) return ret; - pc->data->set_enable(chip, pwm, true); + if (state->polarity != curstate.polarity && enabled) { + pc->data->set_enable(chip, pwm, false, state->polarity); + enabled = false; + } - return 0; -} + ret = rockchip_pwm_config(chip, pwm, state->duty_cycle, state->period); + if (ret) { + if (enabled != curstate.enabled) + pc->data->set_enable(chip, pwm, !enabled, + state->polarity); -static void rockchip_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + goto out; + } + + if (state->enabled != enabled) + pc->data->set_enable(chip, pwm, state->enabled, + state->polarity); - pc->data->set_enable(chip, pwm, false); + /* + * Update the state with the real hardware, which can differ a bit + * because of period/duty_cycle approximation. + */ + rockchip_pwm_get_state(chip, pwm, state); +out: clk_disable(pc->clk); + + return ret; } static const struct pwm_ops rockchip_pwm_ops_v1 = { - .config = rockchip_pwm_config, - .enable = rockchip_pwm_enable, - .disable = rockchip_pwm_disable, + .get_state = rockchip_pwm_get_state, + .apply = rockchip_pwm_apply, .owner = THIS_MODULE, }; static const struct pwm_ops rockchip_pwm_ops_v2 = { - .config = rockchip_pwm_config, - .set_polarity = rockchip_pwm_set_polarity, - .enable = rockchip_pwm_enable, - .disable = rockchip_pwm_disable, + .get_state = rockchip_pwm_get_state, + .apply = rockchip_pwm_apply, .owner = THIS_MODULE, }; @@ -195,6 +258,7 @@ static const struct rockchip_pwm_data pwm_data_v1 = { .prescaler = 2, .ops = &rockchip_pwm_ops_v1, .set_enable = rockchip_pwm_set_enable_v1, + .get_state = rockchip_pwm_get_state_v1, }; static const struct rockchip_pwm_data pwm_data_v2 = { @@ -205,8 +269,10 @@ static const struct rockchip_pwm_data pwm_data_v2 = { .ctrl = 0x0c, }, .prescaler = 1, + .supports_polarity = true, .ops = &rockchip_pwm_ops_v2, .set_enable = rockchip_pwm_set_enable_v2, + .get_state = rockchip_pwm_get_state_v2, }; static const struct rockchip_pwm_data pwm_data_vop = { @@ -217,8 +283,10 @@ static const struct rockchip_pwm_data pwm_data_vop = { .ctrl = 0x00, }, .prescaler = 1, + .supports_polarity = true, .ops = &rockchip_pwm_ops_v2, .set_enable = rockchip_pwm_set_enable_v2, + .get_state = rockchip_pwm_get_state_v2, }; static const struct of_device_id rockchip_pwm_dt_ids[] = { @@ -253,7 +321,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->clk)) return PTR_ERR(pc->clk); - ret = clk_prepare(pc->clk); + ret = clk_prepare_enable(pc->clk); if (ret) return ret; @@ -265,7 +333,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev) pc->chip.base = -1; pc->chip.npwm = 1; - if (pc->data->ops->set_polarity) { + if (pc->data->supports_polarity) { pc->chip.of_xlate = of_pwm_xlate_with_flags; pc->chip.of_pwm_n_cells = 3; } @@ -276,6 +344,10 @@ static int rockchip_pwm_probe(struct platform_device *pdev) dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); } + /* Keep the PWM clk enabled if the PWM appears to be up and running. */ + if (!pwm_is_enabled(pc->chip.pwms)) + clk_disable(pc->clk); + return ret; } @@ -283,6 +355,20 @@ static int rockchip_pwm_remove(struct platform_device *pdev) { struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev); + /* + * Disable the PWM clk before unpreparing it if the PWM device is still + * running. This should only happen when the last PWM user left it + * enabled, or when nobody requested a PWM that was previously enabled + * by the bootloader. + * + * FIXME: Maybe the core should disable all PWM devices in + * pwmchip_remove(). In this case we'd only have to call + * clk_unprepare() after pwmchip_remove(). + * + */ + if (pwm_is_enabled(pc->chip.pwms)) + clk_disable(pc->clk); + clk_unprepare(pc->clk); return pwmchip_remove(&pc->chip); diff --git a/drivers/pwm/pwm-stmpe.c b/drivers/pwm/pwm-stmpe.c new file mode 100644 index 000000000000..e464582a390a --- /dev/null +++ b/drivers/pwm/pwm-stmpe.c @@ -0,0 +1,319 @@ +/* + * Copyright (C) 2016 Linaro Ltd. + * + * Author: Linus Walleij <linus.walleij@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/mfd/stmpe.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +#define STMPE24XX_PWMCS 0x30 +#define PWMCS_EN_PWM0 BIT(0) +#define PWMCS_EN_PWM1 BIT(1) +#define PWMCS_EN_PWM2 BIT(2) +#define STMPE24XX_PWMIC0 0x38 +#define STMPE24XX_PWMIC1 0x39 +#define STMPE24XX_PWMIC2 0x3a + +#define STMPE_PWM_24XX_PINBASE 21 + +struct stmpe_pwm { + struct stmpe *stmpe; + struct pwm_chip chip; + u8 last_duty; +}; + +static inline struct stmpe_pwm *to_stmpe_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct stmpe_pwm, chip); +} + +static int stmpe_24xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip); + u8 value; + int ret; + + ret = stmpe_reg_read(stmpe_pwm->stmpe, STMPE24XX_PWMCS); + if (ret < 0) { + dev_err(chip->dev, "error reading PWM#%u control\n", + pwm->hwpwm); + return ret; + } + + value = ret | BIT(pwm->hwpwm); + + ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value); + if (ret) { + dev_err(chip->dev, "error writing PWM#%u control\n", + pwm->hwpwm); + return ret; + } + + return 0; +} + +static void stmpe_24xx_pwm_disable(struct pwm_chip *chip, + struct pwm_device *pwm) +{ + struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip); + u8 value; + int ret; + + ret = stmpe_reg_read(stmpe_pwm->stmpe, STMPE24XX_PWMCS); + if (ret < 0) { + dev_err(chip->dev, "error reading PWM#%u control\n", + pwm->hwpwm); + return; + } + + value = ret & ~BIT(pwm->hwpwm); + + ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value); + if (ret) { + dev_err(chip->dev, "error writing PWM#%u control\n", + pwm->hwpwm); + return; + } +} + +/* STMPE 24xx PWM instructions */ +#define SMAX 0x007f +#define SMIN 0x00ff +#define GTS 0x0000 +#define LOAD BIT(14) /* Only available on 2403 */ +#define RAMPUP 0x0000 +#define RAMPDOWN BIT(7) +#define PRESCALE_512 BIT(14) +#define STEPTIME_1 BIT(8) +#define BRANCH (BIT(15) | BIT(13)) + +static int stmpe_24xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip); + unsigned int i, pin; + u16 program[3] = { + SMAX, + GTS, + GTS, + }; + u8 offset; + int ret; + + /* Make sure we are disabled */ + if (pwm_is_enabled(pwm)) { + stmpe_24xx_pwm_disable(chip, pwm); + } else { + /* Connect the PWM to the pin */ + pin = pwm->hwpwm; + + /* On STMPE2401 and 2403 pins 21,22,23 are used */ + if (stmpe_pwm->stmpe->partnum == STMPE2401 || + stmpe_pwm->stmpe->partnum == STMPE2403) + pin += STMPE_PWM_24XX_PINBASE; + + ret = stmpe_set_altfunc(stmpe_pwm->stmpe, BIT(pin), + STMPE_BLOCK_PWM); + if (ret) { + dev_err(chip->dev, "unable to connect PWM#%u to pin\n", + pwm->hwpwm); + return ret; + } + } + + /* STMPE24XX */ + switch (pwm->hwpwm) { + case 0: + offset = STMPE24XX_PWMIC0; + break; + + case 1: + offset = STMPE24XX_PWMIC1; + break; + + case 2: + offset = STMPE24XX_PWMIC1; + break; + + default: + /* Should not happen as npwm is 3 */ + return -ENODEV; + } + + dev_dbg(chip->dev, "PWM#%u: config duty %d ns, period %d ns\n", + pwm->hwpwm, duty_ns, period_ns); + + if (duty_ns == 0) { + if (stmpe_pwm->stmpe->partnum == STMPE2401) + program[0] = SMAX; /* off all the time */ + + if (stmpe_pwm->stmpe->partnum == STMPE2403) + program[0] = LOAD | 0xff; /* LOAD 0xff */ + + stmpe_pwm->last_duty = 0x00; + } else if (duty_ns == period_ns) { + if (stmpe_pwm->stmpe->partnum == STMPE2401) + program[0] = SMIN; /* on all the time */ + + if (stmpe_pwm->stmpe->partnum == STMPE2403) + program[0] = LOAD | 0x00; /* LOAD 0x00 */ + + stmpe_pwm->last_duty = 0xff; + } else { + u8 value, last = stmpe_pwm->last_duty; + unsigned long duty; + + /* + * Counter goes from 0x00 to 0xff repeatedly at 32768 Hz, + * (means a period of 30517 ns) then this is compared to the + * counter from the ramp, if this is >= PWM counter the output + * is high. With LOAD we can define how much of the cycle it + * is on. + * + * Prescale = 0 -> 2 kHz -> T = 1/f = 488281.25 ns + */ + + /* Scale to 0..0xff */ + duty = duty_ns * 256; + duty = DIV_ROUND_CLOSEST(duty, period_ns); + value = duty; + + if (value == last) { + /* Run the old program */ + if (pwm_is_enabled(pwm)) + stmpe_24xx_pwm_enable(chip, pwm); + + return 0; + } else if (stmpe_pwm->stmpe->partnum == STMPE2403) { + /* STMPE2403 can simply set the right PWM value */ + program[0] = LOAD | value; + program[1] = 0x0000; + } else if (stmpe_pwm->stmpe->partnum == STMPE2401) { + /* STMPE2401 need a complex program */ + u16 incdec = 0x0000; + + if (last < value) + /* Count up */ + incdec = RAMPUP | (value - last); + else + /* Count down */ + incdec = RAMPDOWN | (last - value); + + /* Step to desired value, smoothly */ + program[0] = PRESCALE_512 | STEPTIME_1 | incdec; + + /* Loop eternally to 0x00 */ + program[1] = BRANCH; + } + + dev_dbg(chip->dev, + "PWM#%u: value = %02x, last_duty = %02x, program=%04x,%04x,%04x\n", + pwm->hwpwm, value, last, program[0], program[1], + program[2]); + stmpe_pwm->last_duty = value; + } + + /* + * We can write programs of up to 64 16-bit words into this channel. + */ + for (i = 0; i < ARRAY_SIZE(program); i++) { + u8 value; + + value = (program[i] >> 8) & 0xff; + + ret = stmpe_reg_write(stmpe_pwm->stmpe, offset, value); + if (ret) { + dev_err(chip->dev, "error writing register %02x: %d\n", + offset, ret); + return ret; + } + + value = program[i] & 0xff; + + ret = stmpe_reg_write(stmpe_pwm->stmpe, offset, value); + if (ret) { + dev_err(chip->dev, "error writing register %02x: %d\n", + offset, ret); + return ret; + } + } + + /* If we were enabled, re-enable this PWM */ + if (pwm_is_enabled(pwm)) + stmpe_24xx_pwm_enable(chip, pwm); + + /* Sleep for 200ms so we're sure it will take effect */ + msleep(200); + + dev_dbg(chip->dev, "programmed PWM#%u, %u bytes\n", pwm->hwpwm, i); + + return 0; +} + +static const struct pwm_ops stmpe_24xx_pwm_ops = { + .config = stmpe_24xx_pwm_config, + .enable = stmpe_24xx_pwm_enable, + .disable = stmpe_24xx_pwm_disable, + .owner = THIS_MODULE, +}; + +static int __init stmpe_pwm_probe(struct platform_device *pdev) +{ + struct stmpe *stmpe = dev_get_drvdata(pdev->dev.parent); + struct stmpe_pwm *pwm; + int ret; + + pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL); + if (!pwm) + return -ENOMEM; + + pwm->stmpe = stmpe; + pwm->chip.dev = &pdev->dev; + pwm->chip.base = -1; + + if (stmpe->partnum == STMPE2401 || stmpe->partnum == STMPE2403) { + pwm->chip.ops = &stmpe_24xx_pwm_ops; + pwm->chip.npwm = 3; + } else { + if (stmpe->partnum == STMPE1601) + dev_err(&pdev->dev, "STMPE1601 not yet supported\n"); + else + dev_err(&pdev->dev, "Unknown STMPE PWM\n"); + + return -ENODEV; + } + + ret = stmpe_enable(stmpe, STMPE_BLOCK_PWM); + if (ret) + return ret; + + ret = pwmchip_add(&pwm->chip); + if (ret) { + stmpe_disable(stmpe, STMPE_BLOCK_PWM); + return ret; + } + + platform_set_drvdata(pdev, pwm); + + return 0; +} + +static struct platform_driver stmpe_pwm_driver = { + .driver = { + .name = "stmpe-pwm", + }, +}; +builtin_platform_driver_probe(stmpe_pwm_driver, stmpe_pwm_probe); diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index d4de0607b502..e4647840cd6e 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -26,9 +26,11 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/pwm.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/reset.h> #define PWM_ENABLE (1 << 31) #define PWM_DUTY_WIDTH 8 @@ -36,15 +38,20 @@ #define PWM_SCALE_WIDTH 13 #define PWM_SCALE_SHIFT 0 -#define NUM_PWM 4 +struct tegra_pwm_soc { + unsigned int num_channels; +}; struct tegra_pwm_chip { - struct pwm_chip chip; - struct device *dev; + struct pwm_chip chip; + struct device *dev; + + struct clk *clk; + struct reset_control*rst; - struct clk *clk; + void __iomem *regs; - void __iomem *mmio_base; + const struct tegra_pwm_soc *soc; }; static inline struct tegra_pwm_chip *to_tegra_pwm_chip(struct pwm_chip *chip) @@ -54,20 +61,20 @@ static inline struct tegra_pwm_chip *to_tegra_pwm_chip(struct pwm_chip *chip) static inline u32 pwm_readl(struct tegra_pwm_chip *chip, unsigned int num) { - return readl(chip->mmio_base + (num << 4)); + return readl(chip->regs + (num << 4)); } static inline void pwm_writel(struct tegra_pwm_chip *chip, unsigned int num, unsigned long val) { - writel(val, chip->mmio_base + (num << 4)); + writel(val, chip->regs + (num << 4)); } static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct tegra_pwm_chip *pc = to_tegra_pwm_chip(chip); - unsigned long long c; + unsigned long long c = duty_ns; unsigned long rate, hz; u32 val = 0; int err; @@ -77,7 +84,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * per (1 << PWM_DUTY_WIDTH) cycles and make sure to round to the * nearest integer during division. */ - c = duty_ns * ((1 << PWM_DUTY_WIDTH) - 1) + period_ns / 2; + c *= (1 << PWM_DUTY_WIDTH); + c += period_ns / 2; do_div(c, period_ns); val = (u32)c << PWM_DUTY_SHIFT; @@ -176,12 +184,13 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (!pwm) return -ENOMEM; + pwm->soc = of_device_get_match_data(&pdev->dev); pwm->dev = &pdev->dev; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pwm->mmio_base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(pwm->mmio_base)) - return PTR_ERR(pwm->mmio_base); + pwm->regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(pwm->regs)) + return PTR_ERR(pwm->regs); platform_set_drvdata(pdev, pwm); @@ -189,14 +198,24 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); + pwm->rst = devm_reset_control_get(&pdev->dev, "pwm"); + if (IS_ERR(pwm->rst)) { + ret = PTR_ERR(pwm->rst); + dev_err(&pdev->dev, "Reset control is not found: %d\n", ret); + return ret; + } + + reset_control_deassert(pwm->rst); + pwm->chip.dev = &pdev->dev; pwm->chip.ops = &tegra_pwm_ops; pwm->chip.base = -1; - pwm->chip.npwm = NUM_PWM; + pwm->chip.npwm = pwm->soc->num_channels; ret = pwmchip_add(&pwm->chip); if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); + reset_control_assert(pwm->rst); return ret; } @@ -206,12 +225,17 @@ static int tegra_pwm_probe(struct platform_device *pdev) static int tegra_pwm_remove(struct platform_device *pdev) { struct tegra_pwm_chip *pc = platform_get_drvdata(pdev); - int i; + unsigned int i; + int err; if (WARN_ON(!pc)) return -ENODEV; - for (i = 0; i < NUM_PWM; i++) { + err = clk_prepare_enable(pc->clk); + if (err < 0) + return err; + + for (i = 0; i < pc->chip.npwm; i++) { struct pwm_device *pwm = &pc->chip.pwms[i]; if (!pwm_is_enabled(pwm)) @@ -223,12 +247,23 @@ static int tegra_pwm_remove(struct platform_device *pdev) clk_disable_unprepare(pc->clk); } + reset_control_assert(pc->rst); + clk_disable_unprepare(pc->clk); + return pwmchip_remove(&pc->chip); } +static const struct tegra_pwm_soc tegra20_pwm_soc = { + .num_channels = 4, +}; + +static const struct tegra_pwm_soc tegra186_pwm_soc = { + .num_channels = 1, +}; + static const struct of_device_id tegra_pwm_of_match[] = { - { .compatible = "nvidia,tegra20-pwm" }, - { .compatible = "nvidia,tegra30-pwm" }, + { .compatible = "nvidia,tegra20-pwm", .data = &tegra20_pwm_soc }, + { .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc }, { } }; diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 616af764a276..6ec342dd3eea 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -27,8 +27,6 @@ #include <linux/pwm.h> #include <linux/of_device.h> -#include "pwm-tipwmss.h" - /* ECAP registers and bits definitions */ #define CAP1 0x08 #define CAP2 0x0C @@ -195,6 +193,7 @@ static const struct pwm_ops ecap_pwm_ops = { }; static const struct of_device_id ecap_of_match[] = { + { .compatible = "ti,am3352-ecap" }, { .compatible = "ti,am33xx-ecap" }, {}, }; @@ -202,11 +201,11 @@ MODULE_DEVICE_TABLE(of, ecap_of_match); static int ecap_pwm_probe(struct platform_device *pdev) { + struct device_node *np = pdev->dev.of_node; int ret; struct resource *r; struct clk *clk; struct ecap_pwm_chip *pc; - u16 status; pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL); if (!pc) @@ -214,6 +213,13 @@ static int ecap_pwm_probe(struct platform_device *pdev) clk = devm_clk_get(&pdev->dev, "fck"); if (IS_ERR(clk)) { + if (of_device_is_compatible(np, "ti,am33xx-ecap")) { + dev_warn(&pdev->dev, "Binding is obsolete.\n"); + clk = devm_clk_get(pdev->dev.parent, "fck"); + } + } + + if (IS_ERR(clk)) { dev_err(&pdev->dev, "failed to get clock\n"); return PTR_ERR(clk); } @@ -243,40 +249,15 @@ static int ecap_pwm_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - - status = pwmss_submodule_state_change(pdev->dev.parent, - PWMSS_ECAPCLK_EN); - if (!(status & PWMSS_ECAPCLK_EN_ACK)) { - dev_err(&pdev->dev, "PWMSS config space clock enable failed\n"); - ret = -EINVAL; - goto pwmss_clk_failure; - } - - pm_runtime_put_sync(&pdev->dev); platform_set_drvdata(pdev, pc); return 0; - -pwmss_clk_failure: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); - pwmchip_remove(&pc->chip); - return ret; } static int ecap_pwm_remove(struct platform_device *pdev) { struct ecap_pwm_chip *pc = platform_get_drvdata(pdev); - pm_runtime_get_sync(&pdev->dev); - /* - * Due to hardware misbehaviour, acknowledge of the stop_req - * is missing. Hence checking of the status bit skipped. - */ - pwmss_submodule_state_change(pdev->dev.parent, PWMSS_ECAPCLK_STOP_REQ); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); return pwmchip_remove(&pc->chip); } diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 6a41e66015b6..b5c6b0636893 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -27,8 +27,6 @@ #include <linux/pm_runtime.h> #include <linux/of_device.h> -#include "pwm-tipwmss.h" - /* EHRPWM registers and bits definitions */ /* Time base module registers */ @@ -426,6 +424,7 @@ static const struct pwm_ops ehrpwm_pwm_ops = { }; static const struct of_device_id ehrpwm_of_match[] = { + { .compatible = "ti,am3352-ehrpwm" }, { .compatible = "ti,am33xx-ehrpwm" }, {}, }; @@ -433,11 +432,11 @@ MODULE_DEVICE_TABLE(of, ehrpwm_of_match); static int ehrpwm_pwm_probe(struct platform_device *pdev) { + struct device_node *np = pdev->dev.of_node; int ret; struct resource *r; struct clk *clk; struct ehrpwm_pwm_chip *pc; - u16 status; pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL); if (!pc) @@ -445,6 +444,13 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev) clk = devm_clk_get(&pdev->dev, "fck"); if (IS_ERR(clk)) { + if (of_device_is_compatible(np, "ti,am33xx-ecap")) { + dev_warn(&pdev->dev, "Binding is obsolete.\n"); + clk = devm_clk_get(pdev->dev.parent, "fck"); + } + } + + if (IS_ERR(clk)) { dev_err(&pdev->dev, "failed to get clock\n"); return PTR_ERR(clk); } @@ -487,27 +493,9 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - - status = pwmss_submodule_state_change(pdev->dev.parent, - PWMSS_EPWMCLK_EN); - if (!(status & PWMSS_EPWMCLK_EN_ACK)) { - dev_err(&pdev->dev, "PWMSS config space clock enable failed\n"); - ret = -EINVAL; - goto pwmss_clk_failure; - } - - pm_runtime_put_sync(&pdev->dev); platform_set_drvdata(pdev, pc); return 0; - -pwmss_clk_failure: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); - pwmchip_remove(&pc->chip); - clk_unprepare(pc->tbclk); - return ret; } static int ehrpwm_pwm_remove(struct platform_device *pdev) @@ -516,14 +504,6 @@ static int ehrpwm_pwm_remove(struct platform_device *pdev) clk_unprepare(pc->tbclk); - pm_runtime_get_sync(&pdev->dev); - /* - * Due to hardware misbehaviour, acknowledge of the stop_req - * is missing. Hence checking of the status bit skipped. - */ - pwmss_submodule_state_change(pdev->dev.parent, PWMSS_EPWMCLK_STOP_REQ); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); return pwmchip_remove(&pc->chip); diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c index 5cf65a15d021..829f4991c96f 100644 --- a/drivers/pwm/pwm-tipwmss.c +++ b/drivers/pwm/pwm-tipwmss.c @@ -22,32 +22,6 @@ #include <linux/pm_runtime.h> #include <linux/of_device.h> -#include "pwm-tipwmss.h" - -#define PWMSS_CLKCONFIG 0x8 /* Clock gating reg */ -#define PWMSS_CLKSTATUS 0xc /* Clock gating status reg */ - -struct pwmss_info { - void __iomem *mmio_base; - struct mutex pwmss_lock; - u16 pwmss_clkconfig; -}; - -u16 pwmss_submodule_state_change(struct device *dev, int set) -{ - struct pwmss_info *info = dev_get_drvdata(dev); - u16 val; - - mutex_lock(&info->pwmss_lock); - val = readw(info->mmio_base + PWMSS_CLKCONFIG); - val |= set; - writew(val , info->mmio_base + PWMSS_CLKCONFIG); - mutex_unlock(&info->pwmss_lock); - - return readw(info->mmio_base + PWMSS_CLKSTATUS); -} -EXPORT_SYMBOL(pwmss_submodule_state_change); - static const struct of_device_id pwmss_of_match[] = { { .compatible = "ti,am33xx-pwmss" }, {}, @@ -57,24 +31,10 @@ MODULE_DEVICE_TABLE(of, pwmss_of_match); static int pwmss_probe(struct platform_device *pdev) { int ret; - struct resource *r; - struct pwmss_info *info; struct device_node *node = pdev->dev.of_node; - info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - mutex_init(&info->pwmss_lock); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - info->mmio_base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(info->mmio_base)) - return PTR_ERR(info->mmio_base); - pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); - platform_set_drvdata(pdev, info); /* Populate all the child nodes here... */ ret = of_platform_populate(node, NULL, NULL, &pdev->dev); @@ -86,30 +46,21 @@ static int pwmss_probe(struct platform_device *pdev) static int pwmss_remove(struct platform_device *pdev) { - struct pwmss_info *info = platform_get_drvdata(pdev); - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - mutex_destroy(&info->pwmss_lock); return 0; } #ifdef CONFIG_PM_SLEEP static int pwmss_suspend(struct device *dev) { - struct pwmss_info *info = dev_get_drvdata(dev); - - info->pwmss_clkconfig = readw(info->mmio_base + PWMSS_CLKCONFIG); pm_runtime_put_sync(dev); return 0; } static int pwmss_resume(struct device *dev) { - struct pwmss_info *info = dev_get_drvdata(dev); - pm_runtime_get_sync(dev); - writew(info->pwmss_clkconfig, info->mmio_base + PWMSS_CLKCONFIG); return 0; } #endif diff --git a/drivers/pwm/pwm-tipwmss.h b/drivers/pwm/pwm-tipwmss.h deleted file mode 100644 index 10ad8040408b..000000000000 --- a/drivers/pwm/pwm-tipwmss.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * TI PWM Subsystem driver - * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __TIPWMSS_H -#define __TIPWMSS_H - -/* PWM substem clock gating */ -#define PWMSS_ECAPCLK_EN BIT(0) -#define PWMSS_ECAPCLK_STOP_REQ BIT(1) -#define PWMSS_EPWMCLK_EN BIT(8) -#define PWMSS_EPWMCLK_STOP_REQ BIT(9) - -#define PWMSS_ECAPCLK_EN_ACK BIT(0) -#define PWMSS_EPWMCLK_EN_ACK BIT(8) - -#ifdef CONFIG_PWM_TIPWMSS -extern u16 pwmss_submodule_state_change(struct device *dev, int set); -#else -static inline u16 pwmss_submodule_state_change(struct device *dev, int set) -{ - /* return success status value */ - return 0xFFFF; -} -#endif -#endif /* __TIPWMSS_H */ diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 01695d48dd54..18ed725594c3 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -208,16 +208,33 @@ static ssize_t polarity_store(struct device *child, return ret ? : size; } +static ssize_t capture_show(struct device *child, + struct device_attribute *attr, + char *buf) +{ + struct pwm_device *pwm = child_to_pwm_device(child); + struct pwm_capture result; + int ret; + + ret = pwm_capture(pwm, &result, jiffies_to_msecs(HZ)); + if (ret) + return ret; + + return sprintf(buf, "%u %u\n", result.period, result.duty_cycle); +} + static DEVICE_ATTR_RW(period); static DEVICE_ATTR_RW(duty_cycle); static DEVICE_ATTR_RW(enable); static DEVICE_ATTR_RW(polarity); +static DEVICE_ATTR_RO(capture); static struct attribute *pwm_attrs[] = { &dev_attr_period.attr, &dev_attr_duty_cycle.attr, &dev_attr_enable.attr, &dev_attr_polarity.attr, + &dev_attr_capture.attr, NULL }; ATTRIBUTE_GROUPS(pwm); diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index b5a10d3c92c7..d6d2f20c4597 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -67,6 +67,15 @@ config RAPIDIO_ENUM_BASIC endchoice +config RAPIDIO_CHMAN + tristate "RapidIO Channelized Messaging driver" + depends on RAPIDIO + help + This option includes RapidIO channelized messaging driver which + provides socket-like interface to allow sharing of single RapidIO + messaging mailbox between multiple user-space applications. + See "Documentation/rapidio/rio_cm.txt" for driver description. + config RAPIDIO_MPORT_CDEV tristate "RapidIO /dev mport device driver" depends on RAPIDIO diff --git a/drivers/rapidio/Makefile b/drivers/rapidio/Makefile index 6271ada6993f..74dcea45ad49 100644 --- a/drivers/rapidio/Makefile +++ b/drivers/rapidio/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_RAPIDIO) += rapidio.o rapidio-y := rio.o rio-access.o rio-driver.o rio-sysfs.o obj-$(CONFIG_RAPIDIO_ENUM_BASIC) += rio-scan.o +obj-$(CONFIG_RAPIDIO_CHMAN) += rio_cm.o obj-$(CONFIG_RAPIDIO) += switches/ obj-$(CONFIG_RAPIDIO) += devices/ diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index e165b7ce29d7..436dfe871d32 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1813,7 +1813,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, if (rdev->pef & RIO_PEF_EXT_FEATURES) { rdev->efptr = rval & 0xffff; rdev->phys_efptr = rio_mport_get_physefb(mport, 0, destid, - hopcount); + hopcount, &rdev->phys_rmap); rdev->em_efptr = rio_mport_get_feature(mport, 0, destid, hopcount, RIO_EFB_ERR_MGMNT); @@ -2242,7 +2242,7 @@ static void mport_mm_open(struct vm_area_struct *vma) { struct rio_mport_mapping *map = vma->vm_private_data; -rmcd_debug(MMAP, "0x%pad", &map->phys_addr); + rmcd_debug(MMAP, "%pad", &map->phys_addr); kref_get(&map->ref); } @@ -2250,7 +2250,7 @@ static void mport_mm_close(struct vm_area_struct *vma) { struct rio_mport_mapping *map = vma->vm_private_data; -rmcd_debug(MMAP, "0x%pad", &map->phys_addr); + rmcd_debug(MMAP, "%pad", &map->phys_addr); mutex_lock(&map->md->buf_mutex); kref_put(&map->ref, mport_release_mapping); mutex_unlock(&map->md->buf_mutex); diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index b5b455614f8a..32f0f014a067 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -37,11 +37,20 @@ #include "tsi721.h" #ifdef DEBUG -u32 dbg_level = DBG_INIT | DBG_EXIT; +u32 dbg_level; module_param(dbg_level, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)"); #endif +static int pcie_mrrs = -1; +module_param(pcie_mrrs, int, S_IRUGO); +MODULE_PARM_DESC(pcie_mrrs, "PCIe MRRS override value (0...5)"); + +static u8 mbox_sel = 0x0f; +module_param(mbox_sel, byte, S_IRUGO); +MODULE_PARM_DESC(mbox_sel, + "RIO Messaging MBOX Selection Mask (default: 0x0f = all)"); + static void tsi721_omsg_handler(struct tsi721_device *priv, int ch); static void tsi721_imsg_handler(struct tsi721_device *priv, int ch); @@ -1081,7 +1090,7 @@ static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv) * from rstart to lstart. */ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, - u64 rstart, u32 size, u32 flags) + u64 rstart, u64 size, u32 flags) { struct tsi721_device *priv = mport->priv; int i, avail = -1; @@ -1094,6 +1103,10 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, struct tsi721_ib_win_mapping *map = NULL; int ret = -EBUSY; + /* Max IBW size supported by HW is 16GB */ + if (size > 0x400000000UL) + return -EINVAL; + if (direct) { /* Calculate minimal acceptable window size and base address */ @@ -1101,15 +1114,15 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, ibw_start = lstart & ~(ibw_size - 1); tsi_debug(IBW, &priv->pdev->dev, - "Direct (RIO_0x%llx -> PCIe_0x%pad), size=0x%x, ibw_start = 0x%llx", + "Direct (RIO_0x%llx -> PCIe_%pad), size=0x%llx, ibw_start = 0x%llx", rstart, &lstart, size, ibw_start); while ((lstart + size) > (ibw_start + ibw_size)) { ibw_size *= 2; ibw_start = lstart & ~(ibw_size - 1); - if (ibw_size > 0x80000000) { /* Limit max size to 2GB */ + /* Check for crossing IBW max size 16GB */ + if (ibw_size > 0x400000000UL) return -EBUSY; - } } loc_start = ibw_start; @@ -1120,7 +1133,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, } else { tsi_debug(IBW, &priv->pdev->dev, - "Translated (RIO_0x%llx -> PCIe_0x%pad), size=0x%x", + "Translated (RIO_0x%llx -> PCIe_%pad), size=0x%llx", rstart, &lstart, size); if (!is_power_of_2(size) || size < 0x1000 || @@ -1215,7 +1228,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, priv->ibwin_cnt--; tsi_debug(IBW, &priv->pdev->dev, - "Configured IBWIN%d (RIO_0x%llx -> PCIe_0x%pad), size=0x%llx", + "Configured IBWIN%d (RIO_0x%llx -> PCIe_%pad), size=0x%llx", i, ibw_start, &loc_start, ibw_size); return 0; @@ -1237,7 +1250,7 @@ static void tsi721_rio_unmap_inb_mem(struct rio_mport *mport, int i; tsi_debug(IBW, &priv->pdev->dev, - "Unmap IBW mapped to PCIe_0x%pad", &lstart); + "Unmap IBW mapped to PCIe_%pad", &lstart); /* Search for matching active inbound translation window */ for (i = 0; i < TSI721_IBWIN_NUM; i++) { @@ -1877,6 +1890,11 @@ static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id, goto out; } + if ((mbox_sel & (1 << mbox)) == 0) { + rc = -ENODEV; + goto out; + } + priv->omsg_ring[mbox].dev_id = dev_id; priv->omsg_ring[mbox].size = entries; priv->omsg_ring[mbox].sts_rdptr = 0; @@ -2161,6 +2179,11 @@ static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id, goto out; } + if ((mbox_sel & (1 << mbox)) == 0) { + rc = -ENODEV; + goto out; + } + /* Initialize IB Messaging Ring */ priv->imsg_ring[mbox].dev_id = dev_id; priv->imsg_ring[mbox].size = entries; @@ -2532,11 +2555,11 @@ static int tsi721_query_mport(struct rio_mport *mport, struct tsi721_device *priv = mport->priv; u32 rval; - rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_ERR_STS_CSR(0))); + rval = ioread32(priv->regs + 0x100 + RIO_PORT_N_ERR_STS_CSR(0, 0)); if (rval & RIO_PORT_N_ERR_STS_PORT_OK) { - rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL2_CSR(0))); + rval = ioread32(priv->regs + 0x100 + RIO_PORT_N_CTL2_CSR(0, 0)); attr->link_speed = (rval & RIO_PORT_N_CTL2_SEL_BAUD) >> 28; - rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL_CSR(0))); + rval = ioread32(priv->regs + 0x100 + RIO_PORT_N_CTL_CSR(0, 0)); attr->link_width = (rval & RIO_PORT_N_CTL_IPW) >> 27; } else attr->link_speed = RIO_LINK_DOWN; @@ -2650,9 +2673,9 @@ static int tsi721_setup_mport(struct tsi721_device *priv) mport->ops = &tsi721_rio_ops; mport->index = 0; mport->sys_size = 0; /* small system */ - mport->phy_type = RIO_PHY_SERIAL; mport->priv = (void *)priv; mport->phys_efptr = 0x100; + mport->phys_rmap = 1; mport->dev.parent = &pdev->dev; mport->dev.release = tsi721_mport_release; @@ -2840,6 +2863,16 @@ static int tsi721_probe(struct pci_dev *pdev, pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN, 0); + /* Override PCIe Maximum Read Request Size setting if requested */ + if (pcie_mrrs >= 0) { + if (pcie_mrrs <= 5) + pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_READRQ, pcie_mrrs << 12); + else + tsi_info(&pdev->dev, + "Invalid MRRS override value %d", pcie_mrrs); + } + /* Adjust PCIe completion timeout. */ pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL2, 0xf, 0x2); diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h index 5456dbddc929..5941437cbdd1 100644 --- a/drivers/rapidio/devices/tsi721.h +++ b/drivers/rapidio/devices/tsi721.h @@ -661,7 +661,7 @@ enum dma_rtype { */ #define TSI721_DMA_CHNUM TSI721_DMA_MAXCH -#define TSI721_DMACH_MAINT 0 /* DMA channel for maint requests */ +#define TSI721_DMACH_MAINT 7 /* DMA channel for maint requests */ #define TSI721_DMACH_MAINT_NBD 32 /* Number of BDs for maint requests */ #define TSI721_DMACH_DMA 1 /* DMA channel for data transfers */ diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index 155cae1e62de..e2a418598129 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -36,18 +36,26 @@ #include "tsi721.h" -#define TSI721_DMA_TX_QUEUE_SZ 16 /* number of transaction descriptors */ - #ifdef CONFIG_PCI_MSI static irqreturn_t tsi721_bdma_msix(int irq, void *ptr); #endif static int tsi721_submit_sg(struct tsi721_tx_desc *desc); static unsigned int dma_desc_per_channel = 128; -module_param(dma_desc_per_channel, uint, S_IWUSR | S_IRUGO); +module_param(dma_desc_per_channel, uint, S_IRUGO); MODULE_PARM_DESC(dma_desc_per_channel, "Number of DMA descriptors per channel (default: 128)"); +static unsigned int dma_txqueue_sz = 16; +module_param(dma_txqueue_sz, uint, S_IRUGO); +MODULE_PARM_DESC(dma_txqueue_sz, + "DMA Transactions Queue Size (default: 16)"); + +static u8 dma_sel = 0x7f; +module_param(dma_sel, byte, S_IRUGO); +MODULE_PARM_DESC(dma_sel, + "DMA Channel Selection Mask (default: 0x7f = all)"); + static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan) { return container_of(chan, struct tsi721_bdma_chan, dchan); @@ -718,6 +726,7 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd) cookie = dma_cookie_assign(txd); desc->status = DMA_IN_PROGRESS; list_add_tail(&desc->desc_node, &bdma_chan->queue); + tsi721_advance_work(bdma_chan, NULL); spin_unlock_bh(&bdma_chan->lock); return cookie; @@ -732,7 +741,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id); if (bdma_chan->bd_base) - return TSI721_DMA_TX_QUEUE_SZ; + return dma_txqueue_sz; /* Initialize BDMA channel */ if (tsi721_bdma_ch_init(bdma_chan, dma_desc_per_channel)) { @@ -742,7 +751,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) } /* Allocate queue of transaction descriptors */ - desc = kcalloc(TSI721_DMA_TX_QUEUE_SZ, sizeof(struct tsi721_tx_desc), + desc = kcalloc(dma_txqueue_sz, sizeof(struct tsi721_tx_desc), GFP_ATOMIC); if (!desc) { tsi_err(&dchan->dev->device, @@ -754,7 +763,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) bdma_chan->tx_desc = desc; - for (i = 0; i < TSI721_DMA_TX_QUEUE_SZ; i++) { + for (i = 0; i < dma_txqueue_sz; i++) { dma_async_tx_descriptor_init(&desc[i].txd, dchan); desc[i].txd.tx_submit = tsi721_tx_submit; desc[i].txd.flags = DMA_CTRL_ACK; @@ -766,7 +775,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) bdma_chan->active = true; tsi721_bdma_interrupt_enable(bdma_chan, 1); - return TSI721_DMA_TX_QUEUE_SZ; + return dma_txqueue_sz; } static void tsi721_sync_dma_irq(struct tsi721_bdma_chan *bdma_chan) @@ -962,7 +971,7 @@ void tsi721_dma_stop_all(struct tsi721_device *priv) int i; for (i = 0; i < TSI721_DMA_MAXCH; i++) { - if (i != TSI721_DMACH_MAINT) + if ((i != TSI721_DMACH_MAINT) && (dma_sel & (1 << i))) tsi721_dma_stop(&priv->bdma[i]); } } @@ -979,7 +988,7 @@ int tsi721_register_dma(struct tsi721_device *priv) for (i = 0; i < TSI721_DMA_MAXCH; i++) { struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i]; - if (i == TSI721_DMACH_MAINT) + if ((i == TSI721_DMACH_MAINT) || (dma_sel & (1 << i)) == 0) continue; bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i); diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index a63a380809d1..23429bdaca84 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -49,15 +49,6 @@ struct rio_id_table { static int next_destid = 0; static int next_comptag = 1; -static int rio_mport_phys_table[] = { - RIO_EFB_PAR_EP_ID, - RIO_EFB_PAR_EP_REC_ID, - RIO_EFB_SER_EP_ID, - RIO_EFB_SER_EP_REC_ID, - -1, -}; - - /** * rio_destid_alloc - Allocate next available destID for given network * @net: RIO network @@ -380,10 +371,15 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, if (rdev->pef & RIO_PEF_EXT_FEATURES) { rdev->efptr = result & 0xffff; rdev->phys_efptr = rio_mport_get_physefb(port, 0, destid, - hopcount); + hopcount, &rdev->phys_rmap); + pr_debug("RIO: %s Register Map %d device\n", + __func__, rdev->phys_rmap); rdev->em_efptr = rio_mport_get_feature(port, 0, destid, hopcount, RIO_EFB_ERR_MGMNT); + if (!rdev->em_efptr) + rdev->em_efptr = rio_mport_get_feature(port, 0, destid, + hopcount, RIO_EFB_ERR_MGMNT_HS); } rio_mport_read_config_32(port, destid, hopcount, RIO_SRC_OPS_CAR, @@ -445,7 +441,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rio_route_clr_table(rdev, RIO_GLOBAL_TABLE, 0); } else { if (do_enum) - /*Enable Input Output Port (transmitter reviever)*/ + /*Enable Input Output Port (transmitter receiver)*/ rio_enable_rx_tx_port(port, 0, destid, hopcount, 0); dev_set_name(&rdev->dev, "%02x:e:%04x", rdev->net->id, @@ -481,10 +477,8 @@ cleanup: /** * rio_sport_is_active- Tests if a switch port has an active connection. - * @port: Master port to send transaction - * @destid: Associated destination ID for switch - * @hopcount: Hopcount to reach switch - * @sport: Switch port number + * @rdev: RapidIO device object + * @sp: Switch port number * * Reads the port error status CSR for a particular switch port to * determine if the port has an active link. Returns @@ -492,31 +486,12 @@ cleanup: * inactive. */ static int -rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport) +rio_sport_is_active(struct rio_dev *rdev, int sp) { u32 result = 0; - u32 ext_ftr_ptr; - ext_ftr_ptr = rio_mport_get_efb(port, 0, destid, hopcount, 0); - - while (ext_ftr_ptr) { - rio_mport_read_config_32(port, destid, hopcount, - ext_ftr_ptr, &result); - result = RIO_GET_BLOCK_ID(result); - if ((result == RIO_EFB_SER_EP_FREE_ID) || - (result == RIO_EFB_SER_EP_FREE_ID_V13P) || - (result == RIO_EFB_SER_EP_FREC_ID)) - break; - - ext_ftr_ptr = rio_mport_get_efb(port, 0, destid, hopcount, - ext_ftr_ptr); - } - - if (ext_ftr_ptr) - rio_mport_read_config_32(port, destid, hopcount, - ext_ftr_ptr + - RIO_PORT_N_ERR_STS_CSR(sport), - &result); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, sp), + &result); return result & RIO_PORT_N_ERR_STS_PORT_OK; } @@ -655,9 +630,7 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, cur_destid = next_destid; - if (rio_sport_is_active - (port, RIO_ANY_DESTID(port->sys_size), hopcount, - port_num)) { + if (rio_sport_is_active(rdev, port_num)) { pr_debug( "RIO: scanning device on port %d\n", port_num); @@ -785,8 +758,7 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num) continue; - if (rio_sport_is_active - (port, destid, hopcount, port_num)) { + if (rio_sport_is_active(rdev, port_num)) { pr_debug( "RIO: scanning device on port %d\n", port_num); @@ -831,21 +803,11 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, static int rio_mport_is_active(struct rio_mport *port) { u32 result = 0; - u32 ext_ftr_ptr; - int *entry = rio_mport_phys_table; - - do { - if ((ext_ftr_ptr = - rio_mport_get_feature(port, 1, 0, 0, *entry))) - break; - } while (*++entry >= 0); - - if (ext_ftr_ptr) - rio_local_read_config_32(port, - ext_ftr_ptr + - RIO_PORT_N_ERR_STS_CSR(port->index), - &result); + rio_local_read_config_32(port, + port->phys_efptr + + RIO_PORT_N_ERR_STS_CSR(port->index, port->phys_rmap), + &result); return result & RIO_PORT_N_ERR_STS_PORT_OK; } diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 0dcaa660cba1..37042858c2db 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -268,6 +268,12 @@ int rio_request_inb_mbox(struct rio_mport *mport, mport->inb_msg[mbox].mcback = minb; rc = mport->ops->open_inb_mbox(mport, dev_id, mbox, entries); + if (rc) { + mport->inb_msg[mbox].mcback = NULL; + mport->inb_msg[mbox].res = NULL; + release_resource(res); + kfree(res); + } } else rc = -ENOMEM; @@ -285,13 +291,22 @@ int rio_request_inb_mbox(struct rio_mport *mport, */ int rio_release_inb_mbox(struct rio_mport *mport, int mbox) { - if (mport->ops->close_inb_mbox) { - mport->ops->close_inb_mbox(mport, mbox); + int rc; - /* Release the mailbox resource */ - return release_resource(mport->inb_msg[mbox].res); - } else - return -ENOSYS; + if (!mport->ops->close_inb_mbox || !mport->inb_msg[mbox].res) + return -EINVAL; + + mport->ops->close_inb_mbox(mport, mbox); + mport->inb_msg[mbox].mcback = NULL; + + rc = release_resource(mport->inb_msg[mbox].res); + if (rc) + return rc; + + kfree(mport->inb_msg[mbox].res); + mport->inb_msg[mbox].res = NULL; + + return 0; } /** @@ -336,6 +351,12 @@ int rio_request_outb_mbox(struct rio_mport *mport, mport->outb_msg[mbox].mcback = moutb; rc = mport->ops->open_outb_mbox(mport, dev_id, mbox, entries); + if (rc) { + mport->outb_msg[mbox].mcback = NULL; + mport->outb_msg[mbox].res = NULL; + release_resource(res); + kfree(res); + } } else rc = -ENOMEM; @@ -353,13 +374,22 @@ int rio_request_outb_mbox(struct rio_mport *mport, */ int rio_release_outb_mbox(struct rio_mport *mport, int mbox) { - if (mport->ops->close_outb_mbox) { - mport->ops->close_outb_mbox(mport, mbox); + int rc; - /* Release the mailbox resource */ - return release_resource(mport->outb_msg[mbox].res); - } else - return -ENOSYS; + if (!mport->ops->close_outb_mbox || !mport->outb_msg[mbox].res) + return -EINVAL; + + mport->ops->close_outb_mbox(mport, mbox); + mport->outb_msg[mbox].mcback = NULL; + + rc = release_resource(mport->outb_msg[mbox].res); + if (rc) + return rc; + + kfree(mport->outb_msg[mbox].res); + mport->outb_msg[mbox].res = NULL; + + return 0; } /** @@ -756,10 +786,11 @@ EXPORT_SYMBOL_GPL(rio_unmap_outb_region); * @local: Indicate a local master port or remote device access * @destid: Destination ID of the device * @hopcount: Number of switch hops to the device + * @rmap: pointer to location to store register map type info */ u32 rio_mport_get_physefb(struct rio_mport *port, int local, - u16 destid, u8 hopcount) + u16 destid, u8 hopcount, u32 *rmap) { u32 ext_ftr_ptr; u32 ftr_header; @@ -777,14 +808,21 @@ rio_mport_get_physefb(struct rio_mport *port, int local, ftr_header = RIO_GET_BLOCK_ID(ftr_header); switch (ftr_header) { - case RIO_EFB_SER_EP_ID_V13P: - case RIO_EFB_SER_EP_REC_ID_V13P: - case RIO_EFB_SER_EP_FREE_ID_V13P: case RIO_EFB_SER_EP_ID: case RIO_EFB_SER_EP_REC_ID: case RIO_EFB_SER_EP_FREE_ID: - case RIO_EFB_SER_EP_FREC_ID: + case RIO_EFB_SER_EP_M1_ID: + case RIO_EFB_SER_EP_SW_M1_ID: + case RIO_EFB_SER_EPF_M1_ID: + case RIO_EFB_SER_EPF_SW_M1_ID: + *rmap = 1; + return ext_ftr_ptr; + case RIO_EFB_SER_EP_M2_ID: + case RIO_EFB_SER_EP_SW_M2_ID: + case RIO_EFB_SER_EPF_M2_ID: + case RIO_EFB_SER_EPF_SW_M2_ID: + *rmap = 2; return ext_ftr_ptr; default: @@ -843,16 +881,16 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock) u32 regval; rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(pnum), - ®val); + RIO_DEV_PORT_N_CTL_CSR(rdev, pnum), + ®val); if (lock) regval |= RIO_PORT_N_CTL_LOCKOUT; else regval &= ~RIO_PORT_N_CTL_LOCKOUT; rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(pnum), - regval); + RIO_DEV_PORT_N_CTL_CSR(rdev, pnum), + regval); return 0; } EXPORT_SYMBOL_GPL(rio_set_port_lockout); @@ -876,6 +914,7 @@ int rio_enable_rx_tx_port(struct rio_mport *port, #ifdef CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS u32 regval; u32 ext_ftr_ptr; + u32 rmap; /* * enable rx input tx output port @@ -883,34 +922,29 @@ int rio_enable_rx_tx_port(struct rio_mport *port, pr_debug("rio_enable_rx_tx_port(local = %d, destid = %d, hopcount = " "%d, port_num = %d)\n", local, destid, hopcount, port_num); - ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, hopcount); + ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, + hopcount, &rmap); if (local) { - rio_local_read_config_32(port, ext_ftr_ptr + - RIO_PORT_N_CTL_CSR(0), + rio_local_read_config_32(port, + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(0, rmap), ®val); } else { if (rio_mport_read_config_32(port, destid, hopcount, - ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), ®val) < 0) + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num, rmap), + ®val) < 0) return -EIO; } - if (regval & RIO_PORT_N_CTL_P_TYP_SER) { - /* serial */ - regval = regval | RIO_PORT_N_CTL_EN_RX_SER - | RIO_PORT_N_CTL_EN_TX_SER; - } else { - /* parallel */ - regval = regval | RIO_PORT_N_CTL_EN_RX_PAR - | RIO_PORT_N_CTL_EN_TX_PAR; - } + regval = regval | RIO_PORT_N_CTL_EN_RX | RIO_PORT_N_CTL_EN_TX; if (local) { - rio_local_write_config_32(port, ext_ftr_ptr + - RIO_PORT_N_CTL_CSR(0), regval); + rio_local_write_config_32(port, + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(0, rmap), regval); } else { if (rio_mport_write_config_32(port, destid, hopcount, - ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), regval) < 0) + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num, rmap), + regval) < 0) return -EIO; } #endif @@ -1012,14 +1046,14 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) /* Read from link maintenance response register * to clear valid bit */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, pnum), ®val); udelay(50); } /* Issue Input-status command */ rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_REQ_CSR(pnum), + RIO_DEV_PORT_N_MNT_REQ_CSR(rdev, pnum), RIO_MNT_REQ_CMD_IS); /* Exit if the response is not expected */ @@ -1030,7 +1064,7 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) while (checkcount--) { udelay(50); rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, pnum), ®val); if (regval & RIO_PORT_N_MNT_RSP_RVAL) { *lnkresp = regval; @@ -1046,6 +1080,13 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) * @rdev: Pointer to RIO device control structure * @pnum: Switch port number to clear errors * @err_status: port error status (if 0 reads register from device) + * + * TODO: Currently this routine is not compatible with recovery process + * specified for idt_gen3 RapidIO switch devices. It has to be reviewed + * to implement universal recovery process that is compatible full range + * off available devices. + * IDT gen3 switch driver now implements HW-specific error handler that + * issues soft port reset to the port to reset ERR_STOP bits and ackIDs. */ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) { @@ -1055,10 +1096,10 @@ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) if (err_status == 0) rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), &err_status); - if (err_status & RIO_PORT_N_ERR_STS_PW_OUT_ES) { + if (err_status & RIO_PORT_N_ERR_STS_OUT_ES) { pr_debug("RIO_EM: servicing Output Error-Stopped state\n"); /* * Send a Link-Request/Input-Status control symbol @@ -1073,7 +1114,7 @@ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) far_ackid = (regval & RIO_PORT_N_MNT_RSP_ASTAT) >> 5; far_linkstat = regval & RIO_PORT_N_MNT_RSP_LSTAT; rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum), + RIO_DEV_PORT_N_ACK_STS_CSR(rdev, pnum), ®val); pr_debug("RIO_EM: SP%d_ACK_STS_CSR=0x%08x\n", pnum, regval); near_ackid = (regval & RIO_PORT_N_ACK_INBOUND) >> 24; @@ -1091,43 +1132,43 @@ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) * far inbound. */ rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum), + RIO_DEV_PORT_N_ACK_STS_CSR(rdev, pnum), (near_ackid << 24) | (far_ackid << 8) | far_ackid); /* Align far outstanding/outbound ackIDs with * near inbound. */ far_ackid++; - if (nextdev) - rio_write_config_32(nextdev, - nextdev->phys_efptr + - RIO_PORT_N_ACK_STS_CSR(RIO_GET_PORT_NUM(nextdev->swpinfo)), - (far_ackid << 24) | - (near_ackid << 8) | near_ackid); - else - pr_debug("RIO_EM: Invalid nextdev pointer (NULL)\n"); + if (!nextdev) { + pr_debug("RIO_EM: nextdev pointer == NULL\n"); + goto rd_err; + } + + rio_write_config_32(nextdev, + RIO_DEV_PORT_N_ACK_STS_CSR(nextdev, + RIO_GET_PORT_NUM(nextdev->swpinfo)), + (far_ackid << 24) | + (near_ackid << 8) | near_ackid); } rd_err: - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), - &err_status); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), + &err_status); pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); } - if ((err_status & RIO_PORT_N_ERR_STS_PW_INP_ES) && nextdev) { + if ((err_status & RIO_PORT_N_ERR_STS_INP_ES) && nextdev) { pr_debug("RIO_EM: servicing Input Error-Stopped state\n"); rio_get_input_status(nextdev, RIO_GET_PORT_NUM(nextdev->swpinfo), NULL); udelay(50); - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), - &err_status); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), + &err_status); pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); } - return (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | - RIO_PORT_N_ERR_STS_PW_INP_ES)) ? 1 : 0; + return (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES)) ? 1 : 0; } /** @@ -1227,9 +1268,8 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) if (rdev->rswitch->ops && rdev->rswitch->ops->em_handle) rdev->rswitch->ops->em_handle(rdev, portnum); - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - &err_status); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), + &err_status); pr_debug("RIO_PW: SP%d_ERR_STS_CSR=0x%08x\n", portnum, err_status); if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) { @@ -1246,8 +1286,8 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) * Depending on the link partner state, two attempts * may be needed for successful recovery. */ - if (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | - RIO_PORT_N_ERR_STS_PW_INP_ES)) { + if (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES)) { if (rio_clr_err_stopped(rdev, portnum, err_status)) rio_clr_err_stopped(rdev, portnum, 0); } @@ -1257,10 +1297,18 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) rdev->rswitch->port_ok &= ~(1 << portnum); rio_set_port_lockout(rdev, portnum, 1); + if (rdev->phys_rmap == 1) { rio_write_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_ACK_STS_CSR(portnum), + RIO_DEV_PORT_N_ACK_STS_CSR(rdev, portnum), RIO_PORT_N_ACK_CLEAR); + } else { + rio_write_config_32(rdev, + RIO_DEV_PORT_N_OB_ACK_CSR(rdev, portnum), + RIO_PORT_N_OB_ACK_CLEAR); + rio_write_config_32(rdev, + RIO_DEV_PORT_N_IB_ACK_CSR(rdev, portnum), + 0); + } /* Schedule Extraction Service */ pr_debug("RIO_PW: Device Extraction on [%s]-P%d\n", @@ -1289,9 +1337,8 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) } /* Clear remaining error bits and Port-Write Pending bit */ - rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - err_status); + rio_write_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), + err_status); return 0; } @@ -1342,20 +1389,7 @@ EXPORT_SYMBOL_GPL(rio_mport_get_efb); * Tell if a device supports a given RapidIO capability. * Returns the offset of the requested extended feature * block within the device's RIO configuration space or - * 0 in case the device does not support it. Possible - * values for @ftr: - * - * %RIO_EFB_PAR_EP_ID LP/LVDS EP Devices - * - * %RIO_EFB_PAR_EP_REC_ID LP/LVDS EP Recovery Devices - * - * %RIO_EFB_PAR_EP_FREE_ID LP/LVDS EP Free Devices - * - * %RIO_EFB_SER_EP_ID LP/Serial EP Devices - * - * %RIO_EFB_SER_EP_REC_ID LP/Serial EP Recovery Devices - * - * %RIO_EFB_SER_EP_FREE_ID LP/Serial EP Free Devices + * 0 in case the device does not support it. */ u32 rio_mport_get_feature(struct rio_mport * port, int local, u16 destid, @@ -1848,7 +1882,9 @@ EXPORT_SYMBOL_GPL(rio_release_dma); * Initializes RapidIO capable DMA channel for the specified data transfer. * Uses DMA channel private extension to pass information related to remote * target RIO device. - * Returns pointer to DMA transaction descriptor or NULL if failed. + * + * Returns: pointer to DMA transaction descriptor if successful, + * error-valued pointer or NULL if failed. */ struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan, u16 destid, struct rio_dma_data *data, @@ -1883,7 +1919,9 @@ EXPORT_SYMBOL_GPL(rio_dma_prep_xfer); * Initializes RapidIO capable DMA channel for the specified data transfer. * Uses DMA channel private extension to pass information related to remote * target RIO device. - * Returns pointer to DMA transaction descriptor or NULL if failed. + * + * Returns: pointer to DMA transaction descriptor if successful, + * error-valued pointer or NULL if failed. */ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev, struct dma_chan *dchan, struct rio_dma_data *data, diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index 625d09add001..9796b3fee70d 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -22,7 +22,7 @@ extern u32 rio_mport_get_feature(struct rio_mport *mport, int local, u16 destid, u8 hopcount, int ftr); extern u32 rio_mport_get_physefb(struct rio_mport *port, int local, - u16 destid, u8 hopcount); + u16 destid, u8 hopcount, u32 *rmap); extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid, u8 hopcount, u32 from); extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c new file mode 100644 index 000000000000..cecc15a880de --- /dev/null +++ b/drivers/rapidio/rio_cm.c @@ -0,0 +1,2366 @@ +/* + * rio_cm - RapidIO Channelized Messaging Driver + * + * Copyright 2013-2016 Integrated Device Technology, Inc. + * Copyright (c) 2015, Prodrive Technologies + * Copyright (c) 2015, RapidIO Trade Association + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS PROGRAM IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, + * BUT WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED WARRANTY OF + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE + * GNU GENERAL PUBLIC LICENSE FOR MORE DETAILS. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/dma-mapping.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/rio.h> +#include <linux/rio_drv.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/reboot.h> +#include <linux/bitops.h> +#include <linux/printk.h> +#include <linux/rio_cm_cdev.h> + +#define DRV_NAME "rio_cm" +#define DRV_VERSION "1.0.0" +#define DRV_AUTHOR "Alexandre Bounine <alexandre.bounine@idt.com>" +#define DRV_DESC "RapidIO Channelized Messaging Driver" +#define DEV_NAME "rio_cm" + +/* Debug output filtering masks */ +enum { + DBG_NONE = 0, + DBG_INIT = BIT(0), /* driver init */ + DBG_EXIT = BIT(1), /* driver exit */ + DBG_MPORT = BIT(2), /* mport add/remove */ + DBG_RDEV = BIT(3), /* RapidIO device add/remove */ + DBG_CHOP = BIT(4), /* channel operations */ + DBG_WAIT = BIT(5), /* waiting for events */ + DBG_TX = BIT(6), /* message TX */ + DBG_TX_EVENT = BIT(7), /* message TX event */ + DBG_RX_DATA = BIT(8), /* inbound data messages */ + DBG_RX_CMD = BIT(9), /* inbound REQ/ACK/NACK messages */ + DBG_ALL = ~0, +}; + +#ifdef DEBUG +#define riocm_debug(level, fmt, arg...) \ + do { \ + if (DBG_##level & dbg_level) \ + pr_debug(DRV_NAME ": %s " fmt "\n", \ + __func__, ##arg); \ + } while (0) +#else +#define riocm_debug(level, fmt, arg...) \ + no_printk(KERN_DEBUG pr_fmt(DRV_NAME fmt "\n"), ##arg) +#endif + +#define riocm_warn(fmt, arg...) \ + pr_warn(DRV_NAME ": %s WARNING " fmt "\n", __func__, ##arg) + +#define riocm_error(fmt, arg...) \ + pr_err(DRV_NAME ": %s ERROR " fmt "\n", __func__, ##arg) + + +static int cmbox = 1; +module_param(cmbox, int, S_IRUGO); +MODULE_PARM_DESC(cmbox, "RapidIO Mailbox number (default 1)"); + +static int chstart = 256; +module_param(chstart, int, S_IRUGO); +MODULE_PARM_DESC(chstart, + "Start channel number for dynamic allocation (default 256)"); + +#ifdef DEBUG +static u32 dbg_level = DBG_NONE; +module_param(dbg_level, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)"); +#endif + +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +#define RIOCM_TX_RING_SIZE 128 +#define RIOCM_RX_RING_SIZE 128 +#define RIOCM_CONNECT_TO 3 /* connect response TO (in sec) */ + +#define RIOCM_MAX_CHNUM 0xffff /* Use full range of u16 field */ +#define RIOCM_CHNUM_AUTO 0 +#define RIOCM_MAX_EP_COUNT 0x10000 /* Max number of endpoints */ + +enum rio_cm_state { + RIO_CM_IDLE, + RIO_CM_CONNECT, + RIO_CM_CONNECTED, + RIO_CM_DISCONNECT, + RIO_CM_CHAN_BOUND, + RIO_CM_LISTEN, + RIO_CM_DESTROYING, +}; + +enum rio_cm_pkt_type { + RIO_CM_SYS = 0xaa, + RIO_CM_CHAN = 0x55, +}; + +enum rio_cm_chop { + CM_CONN_REQ, + CM_CONN_ACK, + CM_CONN_CLOSE, + CM_DATA_MSG, +}; + +struct rio_ch_base_bhdr { + u32 src_id; + u32 dst_id; +#define RIO_HDR_LETTER_MASK 0xffff0000 +#define RIO_HDR_MBOX_MASK 0x0000ffff + u8 src_mbox; + u8 dst_mbox; + u8 type; +} __attribute__((__packed__)); + +struct rio_ch_chan_hdr { + struct rio_ch_base_bhdr bhdr; + u8 ch_op; + u16 dst_ch; + u16 src_ch; + u16 msg_len; + u16 rsrvd; +} __attribute__((__packed__)); + +struct tx_req { + struct list_head node; + struct rio_dev *rdev; + void *buffer; + size_t len; +}; + +struct cm_dev { + struct list_head list; + struct rio_mport *mport; + void *rx_buf[RIOCM_RX_RING_SIZE]; + int rx_slots; + struct mutex rx_lock; + + void *tx_buf[RIOCM_TX_RING_SIZE]; + int tx_slot; + int tx_cnt; + int tx_ack_slot; + struct list_head tx_reqs; + spinlock_t tx_lock; + + struct list_head peers; + u32 npeers; + struct workqueue_struct *rx_wq; + struct work_struct rx_work; +}; + +struct chan_rx_ring { + void *buf[RIOCM_RX_RING_SIZE]; + int head; + int tail; + int count; + + /* Tracking RX buffers reported to upper level */ + void *inuse[RIOCM_RX_RING_SIZE]; + int inuse_cnt; +}; + +struct rio_channel { + u16 id; /* local channel ID */ + struct kref ref; /* channel refcount */ + struct file *filp; + struct cm_dev *cmdev; /* associated CM device object */ + struct rio_dev *rdev; /* remote RapidIO device */ + enum rio_cm_state state; + int error; + spinlock_t lock; + void *context; + u32 loc_destid; /* local destID */ + u32 rem_destid; /* remote destID */ + u16 rem_channel; /* remote channel ID */ + struct list_head accept_queue; + struct list_head ch_node; + struct completion comp; + struct completion comp_close; + struct chan_rx_ring rx_ring; +}; + +struct cm_peer { + struct list_head node; + struct rio_dev *rdev; +}; + +struct rio_cm_work { + struct work_struct work; + struct cm_dev *cm; + void *data; +}; + +struct conn_req { + struct list_head node; + u32 destid; /* requester destID */ + u16 chan; /* requester channel ID */ + struct cm_dev *cmdev; +}; + +/* + * A channel_dev structure represents a CM_CDEV + * @cdev Character device + * @dev Associated device object + */ +struct channel_dev { + struct cdev cdev; + struct device *dev; +}; + +static struct rio_channel *riocm_ch_alloc(u16 ch_num); +static void riocm_ch_free(struct kref *ref); +static int riocm_post_send(struct cm_dev *cm, struct rio_dev *rdev, + void *buffer, size_t len); +static int riocm_ch_close(struct rio_channel *ch); + +static DEFINE_SPINLOCK(idr_lock); +static DEFINE_IDR(ch_idr); + +static LIST_HEAD(cm_dev_list); +static DECLARE_RWSEM(rdev_sem); + +static struct class *dev_class; +static unsigned int dev_major; +static unsigned int dev_minor_base; +static dev_t dev_number; +static struct channel_dev riocm_cdev; + +#define is_msg_capable(src_ops, dst_ops) \ + ((src_ops & RIO_SRC_OPS_DATA_MSG) && \ + (dst_ops & RIO_DST_OPS_DATA_MSG)) +#define dev_cm_capable(dev) \ + is_msg_capable(dev->src_ops, dev->dst_ops) + +static int riocm_cmp(struct rio_channel *ch, enum rio_cm_state cmp) +{ + int ret; + + spin_lock_bh(&ch->lock); + ret = (ch->state == cmp); + spin_unlock_bh(&ch->lock); + return ret; +} + +static int riocm_cmp_exch(struct rio_channel *ch, + enum rio_cm_state cmp, enum rio_cm_state exch) +{ + int ret; + + spin_lock_bh(&ch->lock); + ret = (ch->state == cmp); + if (ret) + ch->state = exch; + spin_unlock_bh(&ch->lock); + return ret; +} + +static enum rio_cm_state riocm_exch(struct rio_channel *ch, + enum rio_cm_state exch) +{ + enum rio_cm_state old; + + spin_lock_bh(&ch->lock); + old = ch->state; + ch->state = exch; + spin_unlock_bh(&ch->lock); + return old; +} + +static struct rio_channel *riocm_get_channel(u16 nr) +{ + struct rio_channel *ch; + + spin_lock_bh(&idr_lock); + ch = idr_find(&ch_idr, nr); + if (ch) + kref_get(&ch->ref); + spin_unlock_bh(&idr_lock); + return ch; +} + +static void riocm_put_channel(struct rio_channel *ch) +{ + kref_put(&ch->ref, riocm_ch_free); +} + +static void *riocm_rx_get_msg(struct cm_dev *cm) +{ + void *msg; + int i; + + msg = rio_get_inb_message(cm->mport, cmbox); + if (msg) { + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (cm->rx_buf[i] == msg) { + cm->rx_buf[i] = NULL; + cm->rx_slots++; + break; + } + } + + if (i == RIOCM_RX_RING_SIZE) + riocm_warn("no record for buffer 0x%p", msg); + } + + return msg; +} + +/* + * riocm_rx_fill - fills a ring of receive buffers for given cm device + * @cm: cm_dev object + * @nent: max number of entries to fill + * + * Returns: none + */ +static void riocm_rx_fill(struct cm_dev *cm, int nent) +{ + int i; + + if (cm->rx_slots == 0) + return; + + for (i = 0; i < RIOCM_RX_RING_SIZE && cm->rx_slots && nent; i++) { + if (cm->rx_buf[i] == NULL) { + cm->rx_buf[i] = kmalloc(RIO_MAX_MSG_SIZE, GFP_KERNEL); + if (cm->rx_buf[i] == NULL) + break; + rio_add_inb_buffer(cm->mport, cmbox, cm->rx_buf[i]); + cm->rx_slots--; + nent--; + } + } +} + +/* + * riocm_rx_free - frees all receive buffers associated with given cm device + * @cm: cm_dev object + * + * Returns: none + */ +static void riocm_rx_free(struct cm_dev *cm) +{ + int i; + + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (cm->rx_buf[i] != NULL) { + kfree(cm->rx_buf[i]); + cm->rx_buf[i] = NULL; + } + } +} + +/* + * riocm_req_handler - connection request handler + * @cm: cm_dev object + * @req_data: pointer to the request packet + * + * Returns: 0 if success, or + * -EINVAL if channel is not in correct state, + * -ENODEV if cannot find a channel with specified ID, + * -ENOMEM if unable to allocate memory to store the request + */ +static int riocm_req_handler(struct cm_dev *cm, void *req_data) +{ + struct rio_channel *ch; + struct conn_req *req; + struct rio_ch_chan_hdr *hh = req_data; + u16 chnum; + + chnum = ntohs(hh->dst_ch); + + ch = riocm_get_channel(chnum); + + if (!ch) + return -ENODEV; + + if (ch->state != RIO_CM_LISTEN) { + riocm_debug(RX_CMD, "channel %d is not in listen state", chnum); + riocm_put_channel(ch); + return -EINVAL; + } + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + riocm_put_channel(ch); + return -ENOMEM; + } + + req->destid = ntohl(hh->bhdr.src_id); + req->chan = ntohs(hh->src_ch); + req->cmdev = cm; + + spin_lock_bh(&ch->lock); + list_add_tail(&req->node, &ch->accept_queue); + spin_unlock_bh(&ch->lock); + complete(&ch->comp); + riocm_put_channel(ch); + + return 0; +} + +/* + * riocm_resp_handler - response to connection request handler + * @resp_data: pointer to the response packet + * + * Returns: 0 if success, or + * -EINVAL if channel is not in correct state, + * -ENODEV if cannot find a channel with specified ID, + */ +static int riocm_resp_handler(void *resp_data) +{ + struct rio_channel *ch; + struct rio_ch_chan_hdr *hh = resp_data; + u16 chnum; + + chnum = ntohs(hh->dst_ch); + ch = riocm_get_channel(chnum); + if (!ch) + return -ENODEV; + + if (ch->state != RIO_CM_CONNECT) { + riocm_put_channel(ch); + return -EINVAL; + } + + riocm_exch(ch, RIO_CM_CONNECTED); + ch->rem_channel = ntohs(hh->src_ch); + complete(&ch->comp); + riocm_put_channel(ch); + + return 0; +} + +/* + * riocm_close_handler - channel close request handler + * @req_data: pointer to the request packet + * + * Returns: 0 if success, or + * -ENODEV if cannot find a channel with specified ID, + * + error codes returned by riocm_ch_close. + */ +static int riocm_close_handler(void *data) +{ + struct rio_channel *ch; + struct rio_ch_chan_hdr *hh = data; + int ret; + + riocm_debug(RX_CMD, "for ch=%d", ntohs(hh->dst_ch)); + + spin_lock_bh(&idr_lock); + ch = idr_find(&ch_idr, ntohs(hh->dst_ch)); + if (!ch) { + spin_unlock_bh(&idr_lock); + return -ENODEV; + } + idr_remove(&ch_idr, ch->id); + spin_unlock_bh(&idr_lock); + + riocm_exch(ch, RIO_CM_DISCONNECT); + + ret = riocm_ch_close(ch); + if (ret) + riocm_debug(RX_CMD, "riocm_ch_close() returned %d", ret); + + return 0; +} + +/* + * rio_cm_handler - function that services request (non-data) packets + * @cm: cm_dev object + * @data: pointer to the packet + */ +static void rio_cm_handler(struct cm_dev *cm, void *data) +{ + struct rio_ch_chan_hdr *hdr; + + if (!rio_mport_is_running(cm->mport)) + goto out; + + hdr = data; + + riocm_debug(RX_CMD, "OP=%x for ch=%d from %d", + hdr->ch_op, ntohs(hdr->dst_ch), ntohs(hdr->src_ch)); + + switch (hdr->ch_op) { + case CM_CONN_REQ: + riocm_req_handler(cm, data); + break; + case CM_CONN_ACK: + riocm_resp_handler(data); + break; + case CM_CONN_CLOSE: + riocm_close_handler(data); + break; + default: + riocm_error("Invalid packet header"); + break; + } +out: + kfree(data); +} + +/* + * rio_rx_data_handler - received data packet handler + * @cm: cm_dev object + * @buf: data packet + * + * Returns: 0 if success, or + * -ENODEV if cannot find a channel with specified ID, + * -EIO if channel is not in CONNECTED state, + * -ENOMEM if channel RX queue is full (packet discarded) + */ +static int rio_rx_data_handler(struct cm_dev *cm, void *buf) +{ + struct rio_ch_chan_hdr *hdr; + struct rio_channel *ch; + + hdr = buf; + + riocm_debug(RX_DATA, "for ch=%d", ntohs(hdr->dst_ch)); + + ch = riocm_get_channel(ntohs(hdr->dst_ch)); + if (!ch) { + /* Discard data message for non-existing channel */ + kfree(buf); + return -ENODEV; + } + + /* Place pointer to the buffer into channel's RX queue */ + spin_lock(&ch->lock); + + if (ch->state != RIO_CM_CONNECTED) { + /* Channel is not ready to receive data, discard a packet */ + riocm_debug(RX_DATA, "ch=%d is in wrong state=%d", + ch->id, ch->state); + spin_unlock(&ch->lock); + kfree(buf); + riocm_put_channel(ch); + return -EIO; + } + + if (ch->rx_ring.count == RIOCM_RX_RING_SIZE) { + /* If RX ring is full, discard a packet */ + riocm_debug(RX_DATA, "ch=%d is full", ch->id); + spin_unlock(&ch->lock); + kfree(buf); + riocm_put_channel(ch); + return -ENOMEM; + } + + ch->rx_ring.buf[ch->rx_ring.head] = buf; + ch->rx_ring.head++; + ch->rx_ring.count++; + ch->rx_ring.head %= RIOCM_RX_RING_SIZE; + + complete(&ch->comp); + + spin_unlock(&ch->lock); + riocm_put_channel(ch); + + return 0; +} + +/* + * rio_ibmsg_handler - inbound message packet handler + */ +static void rio_ibmsg_handler(struct work_struct *work) +{ + struct cm_dev *cm = container_of(work, struct cm_dev, rx_work); + void *data; + struct rio_ch_chan_hdr *hdr; + + if (!rio_mport_is_running(cm->mport)) + return; + + while (1) { + mutex_lock(&cm->rx_lock); + data = riocm_rx_get_msg(cm); + if (data) + riocm_rx_fill(cm, 1); + mutex_unlock(&cm->rx_lock); + + if (data == NULL) + break; + + hdr = data; + + if (hdr->bhdr.type != RIO_CM_CHAN) { + /* For now simply discard packets other than channel */ + riocm_error("Unsupported TYPE code (0x%x). Msg dropped", + hdr->bhdr.type); + kfree(data); + continue; + } + + /* Process a channel message */ + if (hdr->ch_op == CM_DATA_MSG) + rio_rx_data_handler(cm, data); + else + rio_cm_handler(cm, data); + } +} + +static void riocm_inb_msg_event(struct rio_mport *mport, void *dev_id, + int mbox, int slot) +{ + struct cm_dev *cm = dev_id; + + if (rio_mport_is_running(cm->mport) && !work_pending(&cm->rx_work)) + queue_work(cm->rx_wq, &cm->rx_work); +} + +/* + * rio_txcq_handler - TX completion handler + * @cm: cm_dev object + * @slot: TX queue slot + * + * TX completion handler also ensures that pending request packets are placed + * into transmit queue as soon as a free slot becomes available. This is done + * to give higher priority to request packets during high intensity data flow. + */ +static void rio_txcq_handler(struct cm_dev *cm, int slot) +{ + int ack_slot; + + /* ATTN: Add TX completion notification if/when direct buffer + * transfer is implemented. At this moment only correct tracking + * of tx_count is important. + */ + riocm_debug(TX_EVENT, "for mport_%d slot %d tx_cnt %d", + cm->mport->id, slot, cm->tx_cnt); + + spin_lock(&cm->tx_lock); + ack_slot = cm->tx_ack_slot; + + if (ack_slot == slot) + riocm_debug(TX_EVENT, "slot == ack_slot"); + + while (cm->tx_cnt && ((ack_slot != slot) || + (cm->tx_cnt == RIOCM_TX_RING_SIZE))) { + + cm->tx_buf[ack_slot] = NULL; + ++ack_slot; + ack_slot &= (RIOCM_TX_RING_SIZE - 1); + cm->tx_cnt--; + } + + if (cm->tx_cnt < 0 || cm->tx_cnt > RIOCM_TX_RING_SIZE) + riocm_error("tx_cnt %d out of sync", cm->tx_cnt); + + WARN_ON((cm->tx_cnt < 0) || (cm->tx_cnt > RIOCM_TX_RING_SIZE)); + + cm->tx_ack_slot = ack_slot; + + /* + * If there are pending requests, insert them into transmit queue + */ + if (!list_empty(&cm->tx_reqs) && (cm->tx_cnt < RIOCM_TX_RING_SIZE)) { + struct tx_req *req, *_req; + int rc; + + list_for_each_entry_safe(req, _req, &cm->tx_reqs, node) { + list_del(&req->node); + cm->tx_buf[cm->tx_slot] = req->buffer; + rc = rio_add_outb_message(cm->mport, req->rdev, cmbox, + req->buffer, req->len); + kfree(req->buffer); + kfree(req); + + ++cm->tx_cnt; + ++cm->tx_slot; + cm->tx_slot &= (RIOCM_TX_RING_SIZE - 1); + if (cm->tx_cnt == RIOCM_TX_RING_SIZE) + break; + } + } + + spin_unlock(&cm->tx_lock); +} + +static void riocm_outb_msg_event(struct rio_mport *mport, void *dev_id, + int mbox, int slot) +{ + struct cm_dev *cm = dev_id; + + if (cm && rio_mport_is_running(cm->mport)) + rio_txcq_handler(cm, slot); +} + +static int riocm_queue_req(struct cm_dev *cm, struct rio_dev *rdev, + void *buffer, size_t len) +{ + unsigned long flags; + struct tx_req *treq; + + treq = kzalloc(sizeof(*treq), GFP_KERNEL); + if (treq == NULL) + return -ENOMEM; + + treq->rdev = rdev; + treq->buffer = buffer; + treq->len = len; + + spin_lock_irqsave(&cm->tx_lock, flags); + list_add_tail(&treq->node, &cm->tx_reqs); + spin_unlock_irqrestore(&cm->tx_lock, flags); + return 0; +} + +/* + * riocm_post_send - helper function that places packet into msg TX queue + * @cm: cm_dev object + * @rdev: target RapidIO device object (required by outbound msg interface) + * @buffer: pointer to a packet buffer to send + * @len: length of data to transfer + * @req: request priority flag + * + * Returns: 0 if success, or error code otherwise. + */ +static int riocm_post_send(struct cm_dev *cm, struct rio_dev *rdev, + void *buffer, size_t len) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&cm->tx_lock, flags); + + if (cm->mport == NULL) { + rc = -ENODEV; + goto err_out; + } + + if (cm->tx_cnt == RIOCM_TX_RING_SIZE) { + riocm_debug(TX, "Tx Queue is full"); + rc = -EBUSY; + goto err_out; + } + + cm->tx_buf[cm->tx_slot] = buffer; + rc = rio_add_outb_message(cm->mport, rdev, cmbox, buffer, len); + + riocm_debug(TX, "Add buf@%p destid=%x tx_slot=%d tx_cnt=%d", + buffer, rdev->destid, cm->tx_slot, cm->tx_cnt); + + ++cm->tx_cnt; + ++cm->tx_slot; + cm->tx_slot &= (RIOCM_TX_RING_SIZE - 1); + +err_out: + spin_unlock_irqrestore(&cm->tx_lock, flags); + return rc; +} + +/* + * riocm_ch_send - sends a data packet to a remote device + * @ch_id: local channel ID + * @buf: pointer to a data buffer to send (including CM header) + * @len: length of data to transfer (including CM header) + * + * ATTN: ASSUMES THAT THE HEADER SPACE IS RESERVED PART OF THE DATA PACKET + * + * Returns: 0 if success, or + * -EINVAL if one or more input parameters is/are not valid, + * -ENODEV if cannot find a channel with specified ID, + * -EAGAIN if a channel is not in CONNECTED state, + * + error codes returned by HW send routine. + */ +static int riocm_ch_send(u16 ch_id, void *buf, int len) +{ + struct rio_channel *ch; + struct rio_ch_chan_hdr *hdr; + int ret; + + if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) + return -EINVAL; + + ch = riocm_get_channel(ch_id); + if (!ch) { + riocm_error("%s(%d) ch_%d not found", current->comm, + task_pid_nr(current), ch_id); + return -ENODEV; + } + + if (!riocm_cmp(ch, RIO_CM_CONNECTED)) { + ret = -EAGAIN; + goto err_out; + } + + /* + * Fill buffer header section with corresponding channel data + */ + hdr = buf; + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(ch->rem_destid); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_DATA_MSG; + hdr->dst_ch = htons(ch->rem_channel); + hdr->src_ch = htons(ch->id); + hdr->msg_len = htons((u16)len); + + /* ATTN: the function call below relies on the fact that underlying + * HW-specific add_outb_message() routine copies TX data into its own + * internal transfer buffer (true for all RIONET compatible mport + * drivers). Must be reviewed if mport driver uses the buffer directly. + */ + + ret = riocm_post_send(ch->cmdev, ch->rdev, buf, len); + if (ret) + riocm_debug(TX, "ch %d send_err=%d", ch->id, ret); +err_out: + riocm_put_channel(ch); + return ret; +} + +static int riocm_ch_free_rxbuf(struct rio_channel *ch, void *buf) +{ + int i, ret = -EINVAL; + + spin_lock_bh(&ch->lock); + + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (ch->rx_ring.inuse[i] == buf) { + ch->rx_ring.inuse[i] = NULL; + ch->rx_ring.inuse_cnt--; + ret = 0; + break; + } + } + + spin_unlock_bh(&ch->lock); + + if (!ret) + kfree(buf); + + return ret; +} + +/* + * riocm_ch_receive - fetch a data packet received for the specified channel + * @ch: local channel ID + * @buf: pointer to a packet buffer + * @timeout: timeout to wait for incoming packet (in jiffies) + * + * Returns: 0 and valid buffer pointer if success, or NULL pointer and one of: + * -EAGAIN if a channel is not in CONNECTED state, + * -ENOMEM if in-use tracking queue is full, + * -ETIME if wait timeout expired, + * -EINTR if wait was interrupted. + */ +static int riocm_ch_receive(struct rio_channel *ch, void **buf, long timeout) +{ + void *rxmsg = NULL; + int i, ret = 0; + long wret; + + if (!riocm_cmp(ch, RIO_CM_CONNECTED)) { + ret = -EAGAIN; + goto out; + } + + if (ch->rx_ring.inuse_cnt == RIOCM_RX_RING_SIZE) { + /* If we do not have entries to track buffers given to upper + * layer, reject request. + */ + ret = -ENOMEM; + goto out; + } + + wret = wait_for_completion_interruptible_timeout(&ch->comp, timeout); + + riocm_debug(WAIT, "wait on %d returned %ld", ch->id, wret); + + if (!wret) + ret = -ETIME; + else if (wret == -ERESTARTSYS) + ret = -EINTR; + else + ret = riocm_cmp(ch, RIO_CM_CONNECTED) ? 0 : -ECONNRESET; + + if (ret) + goto out; + + spin_lock_bh(&ch->lock); + + rxmsg = ch->rx_ring.buf[ch->rx_ring.tail]; + ch->rx_ring.buf[ch->rx_ring.tail] = NULL; + ch->rx_ring.count--; + ch->rx_ring.tail++; + ch->rx_ring.tail %= RIOCM_RX_RING_SIZE; + ret = -ENOMEM; + + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (ch->rx_ring.inuse[i] == NULL) { + ch->rx_ring.inuse[i] = rxmsg; + ch->rx_ring.inuse_cnt++; + ret = 0; + break; + } + } + + if (ret) { + /* We have no entry to store pending message: drop it */ + kfree(rxmsg); + rxmsg = NULL; + } + + spin_unlock_bh(&ch->lock); +out: + *buf = rxmsg; + return ret; +} + +/* + * riocm_ch_connect - sends a connect request to a remote device + * @loc_ch: local channel ID + * @cm: CM device to send connect request + * @peer: target RapidIO device + * @rem_ch: remote channel ID + * + * Returns: 0 if success, or + * -EINVAL if the channel is not in IDLE state, + * -EAGAIN if no connection request available immediately, + * -ETIME if ACK response timeout expired, + * -EINTR if wait for response was interrupted. + */ +static int riocm_ch_connect(u16 loc_ch, struct cm_dev *cm, + struct cm_peer *peer, u16 rem_ch) +{ + struct rio_channel *ch = NULL; + struct rio_ch_chan_hdr *hdr; + int ret; + long wret; + + ch = riocm_get_channel(loc_ch); + if (!ch) + return -ENODEV; + + if (!riocm_cmp_exch(ch, RIO_CM_IDLE, RIO_CM_CONNECT)) { + ret = -EINVAL; + goto conn_done; + } + + ch->cmdev = cm; + ch->rdev = peer->rdev; + ch->context = NULL; + ch->loc_destid = cm->mport->host_deviceid; + ch->rem_channel = rem_ch; + + /* + * Send connect request to the remote RapidIO device + */ + + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (hdr == NULL) { + ret = -ENOMEM; + goto conn_done; + } + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(peer->rdev->destid); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_CONN_REQ; + hdr->dst_ch = htons(rem_ch); + hdr->src_ch = htons(loc_ch); + + /* ATTN: the function call below relies on the fact that underlying + * HW-specific add_outb_message() routine copies TX data into its + * internal transfer buffer. Must be reviewed if mport driver uses + * this buffer directly. + */ + ret = riocm_post_send(cm, peer->rdev, hdr, sizeof(*hdr)); + + if (ret != -EBUSY) { + kfree(hdr); + } else { + ret = riocm_queue_req(cm, peer->rdev, hdr, sizeof(*hdr)); + if (ret) + kfree(hdr); + } + + if (ret) { + riocm_cmp_exch(ch, RIO_CM_CONNECT, RIO_CM_IDLE); + goto conn_done; + } + + /* Wait for connect response from the remote device */ + wret = wait_for_completion_interruptible_timeout(&ch->comp, + RIOCM_CONNECT_TO * HZ); + riocm_debug(WAIT, "wait on %d returns %ld", ch->id, wret); + + if (!wret) + ret = -ETIME; + else if (wret == -ERESTARTSYS) + ret = -EINTR; + else + ret = riocm_cmp(ch, RIO_CM_CONNECTED) ? 0 : -1; + +conn_done: + riocm_put_channel(ch); + return ret; +} + +static int riocm_send_ack(struct rio_channel *ch) +{ + struct rio_ch_chan_hdr *hdr; + int ret; + + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (hdr == NULL) + return -ENOMEM; + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(ch->rem_destid); + hdr->dst_ch = htons(ch->rem_channel); + hdr->src_ch = htons(ch->id); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_CONN_ACK; + + /* ATTN: the function call below relies on the fact that underlying + * add_outb_message() routine copies TX data into its internal transfer + * buffer. Review if switching to direct buffer version. + */ + ret = riocm_post_send(ch->cmdev, ch->rdev, hdr, sizeof(*hdr)); + + if (ret == -EBUSY && !riocm_queue_req(ch->cmdev, + ch->rdev, hdr, sizeof(*hdr))) + return 0; + kfree(hdr); + + if (ret) + riocm_error("send ACK to ch_%d on %s failed (ret=%d)", + ch->id, rio_name(ch->rdev), ret); + return ret; +} + +/* + * riocm_ch_accept - accept incoming connection request + * @ch_id: channel ID + * @new_ch_id: local mport device + * @timeout: wait timeout (if 0 non-blocking call, do not wait if connection + * request is not available). + * + * Returns: pointer to new channel struct if success, or error-valued pointer: + * -ENODEV - cannot find specified channel or mport, + * -EINVAL - the channel is not in IDLE state, + * -EAGAIN - no connection request available immediately (timeout=0), + * -ENOMEM - unable to allocate new channel, + * -ETIME - wait timeout expired, + * -EINTR - wait was interrupted. + */ +static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, + long timeout) +{ + struct rio_channel *ch = NULL; + struct rio_channel *new_ch = NULL; + struct conn_req *req; + struct cm_peer *peer; + int found = 0; + int err = 0; + long wret; + + ch = riocm_get_channel(ch_id); + if (!ch) + return ERR_PTR(-EINVAL); + + if (!riocm_cmp(ch, RIO_CM_LISTEN)) { + err = -EINVAL; + goto err_put; + } + + /* Don't sleep if this is a non blocking call */ + if (!timeout) { + if (!try_wait_for_completion(&ch->comp)) { + err = -EAGAIN; + goto err_put; + } + } else { + riocm_debug(WAIT, "on %d", ch->id); + + wret = wait_for_completion_interruptible_timeout(&ch->comp, + timeout); + if (!wret) { + err = -ETIME; + goto err_put; + } else if (wret == -ERESTARTSYS) { + err = -EINTR; + goto err_put; + } + } + + spin_lock_bh(&ch->lock); + + if (ch->state != RIO_CM_LISTEN) { + err = -ECANCELED; + } else if (list_empty(&ch->accept_queue)) { + riocm_debug(WAIT, "on %d accept_queue is empty on completion", + ch->id); + err = -EIO; + } + + spin_unlock_bh(&ch->lock); + + if (err) { + riocm_debug(WAIT, "on %d returns %d", ch->id, err); + goto err_put; + } + + /* Create new channel for this connection */ + new_ch = riocm_ch_alloc(RIOCM_CHNUM_AUTO); + + if (IS_ERR(new_ch)) { + riocm_error("failed to get channel for new req (%ld)", + PTR_ERR(new_ch)); + err = -ENOMEM; + goto err_put; + } + + spin_lock_bh(&ch->lock); + + req = list_first_entry(&ch->accept_queue, struct conn_req, node); + list_del(&req->node); + new_ch->cmdev = ch->cmdev; + new_ch->loc_destid = ch->loc_destid; + new_ch->rem_destid = req->destid; + new_ch->rem_channel = req->chan; + + spin_unlock_bh(&ch->lock); + riocm_put_channel(ch); + kfree(req); + + down_read(&rdev_sem); + /* Find requester's device object */ + list_for_each_entry(peer, &new_ch->cmdev->peers, node) { + if (peer->rdev->destid == new_ch->rem_destid) { + riocm_debug(RX_CMD, "found matching device(%s)", + rio_name(peer->rdev)); + found = 1; + break; + } + } + up_read(&rdev_sem); + + if (!found) { + /* If peer device object not found, simply ignore the request */ + err = -ENODEV; + goto err_nodev; + } + + new_ch->rdev = peer->rdev; + new_ch->state = RIO_CM_CONNECTED; + spin_lock_init(&new_ch->lock); + + /* Acknowledge the connection request. */ + riocm_send_ack(new_ch); + + *new_ch_id = new_ch->id; + return new_ch; +err_put: + riocm_put_channel(ch); +err_nodev: + if (new_ch) { + spin_lock_bh(&idr_lock); + idr_remove(&ch_idr, new_ch->id); + spin_unlock_bh(&idr_lock); + riocm_put_channel(new_ch); + } + *new_ch_id = 0; + return ERR_PTR(err); +} + +/* + * riocm_ch_listen - puts a channel into LISTEN state + * @ch_id: channel ID + * + * Returns: 0 if success, or + * -EINVAL if the specified channel does not exists or + * is not in CHAN_BOUND state. + */ +static int riocm_ch_listen(u16 ch_id) +{ + struct rio_channel *ch = NULL; + int ret = 0; + + riocm_debug(CHOP, "(ch_%d)", ch_id); + + ch = riocm_get_channel(ch_id); + if (!ch || !riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) + ret = -EINVAL; + riocm_put_channel(ch); + return ret; +} + +/* + * riocm_ch_bind - associate a channel object and an mport device + * @ch_id: channel ID + * @mport_id: local mport device ID + * @context: pointer to the additional caller's context + * + * Returns: 0 if success, or + * -ENODEV if cannot find specified mport, + * -EINVAL if the specified channel does not exist or + * is not in IDLE state. + */ +static int riocm_ch_bind(u16 ch_id, u8 mport_id, void *context) +{ + struct rio_channel *ch = NULL; + struct cm_dev *cm; + int rc = -ENODEV; + + riocm_debug(CHOP, "ch_%d to mport_%d", ch_id, mport_id); + + /* Find matching cm_dev object */ + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if ((cm->mport->id == mport_id) && + rio_mport_is_running(cm->mport)) { + rc = 0; + break; + } + } + + if (rc) + goto exit; + + ch = riocm_get_channel(ch_id); + if (!ch) { + rc = -EINVAL; + goto exit; + } + + spin_lock_bh(&ch->lock); + if (ch->state != RIO_CM_IDLE) { + spin_unlock_bh(&ch->lock); + rc = -EINVAL; + goto err_put; + } + + ch->cmdev = cm; + ch->loc_destid = cm->mport->host_deviceid; + ch->context = context; + ch->state = RIO_CM_CHAN_BOUND; + spin_unlock_bh(&ch->lock); +err_put: + riocm_put_channel(ch); +exit: + up_read(&rdev_sem); + return rc; +} + +/* + * riocm_ch_alloc - channel object allocation helper routine + * @ch_num: channel ID (1 ... RIOCM_MAX_CHNUM, 0 = automatic) + * + * Return value: pointer to newly created channel object, + * or error-valued pointer + */ +static struct rio_channel *riocm_ch_alloc(u16 ch_num) +{ + int id; + int start, end; + struct rio_channel *ch; + + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) + return ERR_PTR(-ENOMEM); + + if (ch_num) { + /* If requested, try to obtain the specified channel ID */ + start = ch_num; + end = ch_num + 1; + } else { + /* Obtain channel ID from the dynamic allocation range */ + start = chstart; + end = RIOCM_MAX_CHNUM + 1; + } + + idr_preload(GFP_KERNEL); + spin_lock_bh(&idr_lock); + id = idr_alloc_cyclic(&ch_idr, ch, start, end, GFP_NOWAIT); + spin_unlock_bh(&idr_lock); + idr_preload_end(); + + if (id < 0) { + kfree(ch); + return ERR_PTR(id == -ENOSPC ? -EBUSY : id); + } + + ch->id = (u16)id; + ch->state = RIO_CM_IDLE; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->accept_queue); + INIT_LIST_HEAD(&ch->ch_node); + init_completion(&ch->comp); + init_completion(&ch->comp_close); + kref_init(&ch->ref); + ch->rx_ring.head = 0; + ch->rx_ring.tail = 0; + ch->rx_ring.count = 0; + ch->rx_ring.inuse_cnt = 0; + + return ch; +} + +/* + * riocm_ch_create - creates a new channel object and allocates ID for it + * @ch_num: channel ID (1 ... RIOCM_MAX_CHNUM, 0 = automatic) + * + * Allocates and initializes a new channel object. If the parameter ch_num > 0 + * and is within the valid range, riocm_ch_create tries to allocate the + * specified ID for the new channel. If ch_num = 0, channel ID will be assigned + * automatically from the range (chstart ... RIOCM_MAX_CHNUM). + * Module parameter 'chstart' defines start of an ID range available for dynamic + * allocation. Range below 'chstart' is reserved for pre-defined ID numbers. + * Available channel numbers are limited by 16-bit size of channel numbers used + * in the packet header. + * + * Return value: PTR to rio_channel structure if successful (with channel number + * updated via pointer) or error-valued pointer if error. + */ +static struct rio_channel *riocm_ch_create(u16 *ch_num) +{ + struct rio_channel *ch = NULL; + + ch = riocm_ch_alloc(*ch_num); + + if (IS_ERR(ch)) + riocm_debug(CHOP, "Failed to allocate channel %d (err=%ld)", + *ch_num, PTR_ERR(ch)); + else + *ch_num = ch->id; + + return ch; +} + +/* + * riocm_ch_free - channel object release routine + * @ref: pointer to a channel's kref structure + */ +static void riocm_ch_free(struct kref *ref) +{ + struct rio_channel *ch = container_of(ref, struct rio_channel, ref); + int i; + + riocm_debug(CHOP, "(ch_%d)", ch->id); + + if (ch->rx_ring.inuse_cnt) { + for (i = 0; + i < RIOCM_RX_RING_SIZE && ch->rx_ring.inuse_cnt; i++) { + if (ch->rx_ring.inuse[i] != NULL) { + kfree(ch->rx_ring.inuse[i]); + ch->rx_ring.inuse_cnt--; + } + } + } + + if (ch->rx_ring.count) + for (i = 0; i < RIOCM_RX_RING_SIZE && ch->rx_ring.count; i++) { + if (ch->rx_ring.buf[i] != NULL) { + kfree(ch->rx_ring.buf[i]); + ch->rx_ring.count--; + } + } + + complete(&ch->comp_close); +} + +static int riocm_send_close(struct rio_channel *ch) +{ + struct rio_ch_chan_hdr *hdr; + int ret; + + /* + * Send CH_CLOSE notification to the remote RapidIO device + */ + + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (hdr == NULL) + return -ENOMEM; + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(ch->rem_destid); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_CONN_CLOSE; + hdr->dst_ch = htons(ch->rem_channel); + hdr->src_ch = htons(ch->id); + + /* ATTN: the function call below relies on the fact that underlying + * add_outb_message() routine copies TX data into its internal transfer + * buffer. Needs to be reviewed if switched to direct buffer mode. + */ + ret = riocm_post_send(ch->cmdev, ch->rdev, hdr, sizeof(*hdr)); + + if (ret == -EBUSY && !riocm_queue_req(ch->cmdev, ch->rdev, + hdr, sizeof(*hdr))) + return 0; + kfree(hdr); + + if (ret) + riocm_error("ch(%d) send CLOSE failed (ret=%d)", ch->id, ret); + + return ret; +} + +/* + * riocm_ch_close - closes a channel object with specified ID (by local request) + * @ch: channel to be closed + */ +static int riocm_ch_close(struct rio_channel *ch) +{ + unsigned long tmo = msecs_to_jiffies(3000); + enum rio_cm_state state; + long wret; + int ret = 0; + + riocm_debug(CHOP, "ch_%d by %s(%d)", + ch->id, current->comm, task_pid_nr(current)); + + state = riocm_exch(ch, RIO_CM_DESTROYING); + if (state == RIO_CM_CONNECTED) + riocm_send_close(ch); + + complete_all(&ch->comp); + + riocm_put_channel(ch); + wret = wait_for_completion_interruptible_timeout(&ch->comp_close, tmo); + + riocm_debug(WAIT, "wait on %d returns %ld", ch->id, wret); + + if (wret == 0) { + /* Timeout on wait occurred */ + riocm_debug(CHOP, "%s(%d) timed out waiting for ch %d", + current->comm, task_pid_nr(current), ch->id); + ret = -ETIMEDOUT; + } else if (wret == -ERESTARTSYS) { + /* Wait_for_completion was interrupted by a signal */ + riocm_debug(CHOP, "%s(%d) wait for ch %d was interrupted", + current->comm, task_pid_nr(current), ch->id); + ret = -EINTR; + } + + if (!ret) { + riocm_debug(CHOP, "ch_%d resources released", ch->id); + kfree(ch); + } else { + riocm_debug(CHOP, "failed to release ch_%d resources", ch->id); + } + + return ret; +} + +/* + * riocm_cdev_open() - Open character device + */ +static int riocm_cdev_open(struct inode *inode, struct file *filp) +{ + riocm_debug(INIT, "by %s(%d) filp=%p ", + current->comm, task_pid_nr(current), filp); + + if (list_empty(&cm_dev_list)) + return -ENODEV; + + return 0; +} + +/* + * riocm_cdev_release() - Release character device + */ +static int riocm_cdev_release(struct inode *inode, struct file *filp) +{ + struct rio_channel *ch, *_c; + unsigned int i; + LIST_HEAD(list); + + riocm_debug(EXIT, "by %s(%d) filp=%p", + current->comm, task_pid_nr(current), filp); + + /* Check if there are channels associated with this file descriptor */ + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + if (ch && ch->filp == filp) { + riocm_debug(EXIT, "ch_%d not released by %s(%d)", + ch->id, current->comm, + task_pid_nr(current)); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } + } + spin_unlock_bh(&idr_lock); + + if (!list_empty(&list)) { + list_for_each_entry_safe(ch, _c, &list, ch_node) { + list_del(&ch->ch_node); + riocm_ch_close(ch); + } + } + + return 0; +} + +/* + * cm_ep_get_list_size() - Reports number of endpoints in the network + */ +static int cm_ep_get_list_size(void __user *arg) +{ + u32 __user *p = arg; + u32 mport_id; + u32 count = 0; + struct cm_dev *cm; + + if (get_user(mport_id, p)) + return -EFAULT; + if (mport_id >= RIO_MAX_MPORTS) + return -EINVAL; + + /* Find a matching cm_dev object */ + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport->id == mport_id) { + count = cm->npeers; + up_read(&rdev_sem); + if (copy_to_user(arg, &count, sizeof(u32))) + return -EFAULT; + return 0; + } + } + up_read(&rdev_sem); + + return -ENODEV; +} + +/* + * cm_ep_get_list() - Returns list of attached endpoints + */ +static int cm_ep_get_list(void __user *arg) +{ + struct cm_dev *cm; + struct cm_peer *peer; + u32 info[2]; + void *buf; + u32 nent; + u32 *entry_ptr; + u32 i = 0; + int ret = 0; + + if (copy_from_user(&info, arg, sizeof(info))) + return -EFAULT; + + if (info[1] >= RIO_MAX_MPORTS || info[0] > RIOCM_MAX_EP_COUNT) + return -EINVAL; + + /* Find a matching cm_dev object */ + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) + if (cm->mport->id == (u8)info[1]) + goto found; + + up_read(&rdev_sem); + return -ENODEV; + +found: + nent = min(info[0], cm->npeers); + buf = kcalloc(nent + 2, sizeof(u32), GFP_KERNEL); + if (!buf) { + up_read(&rdev_sem); + return -ENOMEM; + } + + entry_ptr = (u32 *)((uintptr_t)buf + 2*sizeof(u32)); + + list_for_each_entry(peer, &cm->peers, node) { + *entry_ptr = (u32)peer->rdev->destid; + entry_ptr++; + if (++i == nent) + break; + } + up_read(&rdev_sem); + + ((u32 *)buf)[0] = i; /* report an updated number of entries */ + ((u32 *)buf)[1] = info[1]; /* put back an mport ID */ + if (copy_to_user(arg, buf, sizeof(u32) * (info[0] + 2))) + ret = -EFAULT; + + kfree(buf); + return ret; +} + +/* + * cm_mport_get_list() - Returns list of available local mport devices + */ +static int cm_mport_get_list(void __user *arg) +{ + int ret = 0; + u32 entries; + void *buf; + struct cm_dev *cm; + u32 *entry_ptr; + int count = 0; + + if (copy_from_user(&entries, arg, sizeof(entries))) + return -EFAULT; + if (entries == 0 || entries > RIO_MAX_MPORTS) + return -EINVAL; + buf = kcalloc(entries + 1, sizeof(u32), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Scan all registered cm_dev objects */ + entry_ptr = (u32 *)((uintptr_t)buf + sizeof(u32)); + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (count++ < entries) { + *entry_ptr = (cm->mport->id << 16) | + cm->mport->host_deviceid; + entry_ptr++; + } + } + up_read(&rdev_sem); + + *((u32 *)buf) = count; /* report a real number of entries */ + if (copy_to_user(arg, buf, sizeof(u32) * (count + 1))) + ret = -EFAULT; + + kfree(buf); + return ret; +} + +/* + * cm_chan_create() - Create a message exchange channel + */ +static int cm_chan_create(struct file *filp, void __user *arg) +{ + u16 __user *p = arg; + u16 ch_num; + struct rio_channel *ch; + + if (get_user(ch_num, p)) + return -EFAULT; + + riocm_debug(CHOP, "ch_%d requested by %s(%d)", + ch_num, current->comm, task_pid_nr(current)); + ch = riocm_ch_create(&ch_num); + if (IS_ERR(ch)) + return PTR_ERR(ch); + + ch->filp = filp; + riocm_debug(CHOP, "ch_%d created by %s(%d)", + ch_num, current->comm, task_pid_nr(current)); + return put_user(ch_num, p); +} + +/* + * cm_chan_close() - Close channel + * @filp: Pointer to file object + * @arg: Channel to close + */ +static int cm_chan_close(struct file *filp, void __user *arg) +{ + u16 __user *p = arg; + u16 ch_num; + struct rio_channel *ch; + + if (get_user(ch_num, p)) + return -EFAULT; + + riocm_debug(CHOP, "ch_%d by %s(%d)", + ch_num, current->comm, task_pid_nr(current)); + + spin_lock_bh(&idr_lock); + ch = idr_find(&ch_idr, ch_num); + if (!ch) { + spin_unlock_bh(&idr_lock); + return 0; + } + if (ch->filp != filp) { + spin_unlock_bh(&idr_lock); + return -EINVAL; + } + idr_remove(&ch_idr, ch->id); + spin_unlock_bh(&idr_lock); + + return riocm_ch_close(ch); +} + +/* + * cm_chan_bind() - Bind channel + * @arg: Channel number + */ +static int cm_chan_bind(void __user *arg) +{ + struct rio_cm_channel chan; + + if (copy_from_user(&chan, arg, sizeof(chan))) + return -EFAULT; + if (chan.mport_id >= RIO_MAX_MPORTS) + return -EINVAL; + + return riocm_ch_bind(chan.id, chan.mport_id, NULL); +} + +/* + * cm_chan_listen() - Listen on channel + * @arg: Channel number + */ +static int cm_chan_listen(void __user *arg) +{ + u16 __user *p = arg; + u16 ch_num; + + if (get_user(ch_num, p)) + return -EFAULT; + + return riocm_ch_listen(ch_num); +} + +/* + * cm_chan_accept() - Accept incoming connection + * @filp: Pointer to file object + * @arg: Channel number + */ +static int cm_chan_accept(struct file *filp, void __user *arg) +{ + struct rio_cm_accept param; + long accept_to; + struct rio_channel *ch; + + if (copy_from_user(¶m, arg, sizeof(param))) + return -EFAULT; + + riocm_debug(CHOP, "on ch_%d by %s(%d)", + param.ch_num, current->comm, task_pid_nr(current)); + + accept_to = param.wait_to ? + msecs_to_jiffies(param.wait_to) : 0; + + ch = riocm_ch_accept(param.ch_num, ¶m.ch_num, accept_to); + if (IS_ERR(ch)) + return PTR_ERR(ch); + ch->filp = filp; + + riocm_debug(CHOP, "new ch_%d for %s(%d)", + ch->id, current->comm, task_pid_nr(current)); + + if (copy_to_user(arg, ¶m, sizeof(param))) + return -EFAULT; + return 0; +} + +/* + * cm_chan_connect() - Connect on channel + * @arg: Channel information + */ +static int cm_chan_connect(void __user *arg) +{ + struct rio_cm_channel chan; + struct cm_dev *cm; + struct cm_peer *peer; + int ret = -ENODEV; + + if (copy_from_user(&chan, arg, sizeof(chan))) + return -EFAULT; + if (chan.mport_id >= RIO_MAX_MPORTS) + return -EINVAL; + + down_read(&rdev_sem); + + /* Find matching cm_dev object */ + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport->id == chan.mport_id) { + ret = 0; + break; + } + } + + if (ret) + goto err_out; + + if (chan.remote_destid >= RIO_ANY_DESTID(cm->mport->sys_size)) { + ret = -EINVAL; + goto err_out; + } + + /* Find corresponding RapidIO endpoint device object */ + ret = -ENODEV; + + list_for_each_entry(peer, &cm->peers, node) { + if (peer->rdev->destid == chan.remote_destid) { + ret = 0; + break; + } + } + + if (ret) + goto err_out; + + up_read(&rdev_sem); + + return riocm_ch_connect(chan.id, cm, peer, chan.remote_channel); +err_out: + up_read(&rdev_sem); + return ret; +} + +/* + * cm_chan_msg_send() - Send a message through channel + * @arg: Outbound message information + */ +static int cm_chan_msg_send(void __user *arg) +{ + struct rio_cm_msg msg; + void *buf; + int ret = 0; + + if (copy_from_user(&msg, arg, sizeof(msg))) + return -EFAULT; + if (msg.size > RIO_MAX_MSG_SIZE) + return -EINVAL; + + buf = kmalloc(msg.size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, (void __user *)(uintptr_t)msg.msg, msg.size)) { + ret = -EFAULT; + goto out; + } + + ret = riocm_ch_send(msg.ch_num, buf, msg.size); +out: + kfree(buf); + return ret; +} + +/* + * cm_chan_msg_rcv() - Receive a message through channel + * @arg: Inbound message information + */ +static int cm_chan_msg_rcv(void __user *arg) +{ + struct rio_cm_msg msg; + struct rio_channel *ch; + void *buf; + long rxto; + int ret = 0, msg_size; + + if (copy_from_user(&msg, arg, sizeof(msg))) + return -EFAULT; + + if (msg.ch_num == 0 || msg.size == 0) + return -EINVAL; + + ch = riocm_get_channel(msg.ch_num); + if (!ch) + return -ENODEV; + + rxto = msg.rxto ? msecs_to_jiffies(msg.rxto) : MAX_SCHEDULE_TIMEOUT; + + ret = riocm_ch_receive(ch, &buf, rxto); + if (ret) + goto out; + + msg_size = min(msg.size, (u16)(RIO_MAX_MSG_SIZE)); + + if (copy_to_user((void __user *)(uintptr_t)msg.msg, buf, msg_size)) + ret = -EFAULT; + + riocm_ch_free_rxbuf(ch, buf); +out: + riocm_put_channel(ch); + return ret; +} + +/* + * riocm_cdev_ioctl() - IOCTL requests handler + */ +static long +riocm_cdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case RIO_CM_EP_GET_LIST_SIZE: + return cm_ep_get_list_size((void __user *)arg); + case RIO_CM_EP_GET_LIST: + return cm_ep_get_list((void __user *)arg); + case RIO_CM_CHAN_CREATE: + return cm_chan_create(filp, (void __user *)arg); + case RIO_CM_CHAN_CLOSE: + return cm_chan_close(filp, (void __user *)arg); + case RIO_CM_CHAN_BIND: + return cm_chan_bind((void __user *)arg); + case RIO_CM_CHAN_LISTEN: + return cm_chan_listen((void __user *)arg); + case RIO_CM_CHAN_ACCEPT: + return cm_chan_accept(filp, (void __user *)arg); + case RIO_CM_CHAN_CONNECT: + return cm_chan_connect((void __user *)arg); + case RIO_CM_CHAN_SEND: + return cm_chan_msg_send((void __user *)arg); + case RIO_CM_CHAN_RECEIVE: + return cm_chan_msg_rcv((void __user *)arg); + case RIO_CM_MPORT_GET_LIST: + return cm_mport_get_list((void __user *)arg); + default: + break; + } + + return -EINVAL; +} + +static const struct file_operations riocm_cdev_fops = { + .owner = THIS_MODULE, + .open = riocm_cdev_open, + .release = riocm_cdev_release, + .unlocked_ioctl = riocm_cdev_ioctl, +}; + +/* + * riocm_add_dev - add new remote RapidIO device into channel management core + * @dev: device object associated with RapidIO device + * @sif: subsystem interface + * + * Adds the specified RapidIO device (if applicable) into peers list of + * the corresponding channel management device (cm_dev). + */ +static int riocm_add_dev(struct device *dev, struct subsys_interface *sif) +{ + struct cm_peer *peer; + struct rio_dev *rdev = to_rio_dev(dev); + struct cm_dev *cm; + + /* Check if the remote device has capabilities required to support CM */ + if (!dev_cm_capable(rdev)) + return 0; + + riocm_debug(RDEV, "(%s)", rio_name(rdev)); + + peer = kmalloc(sizeof(*peer), GFP_KERNEL); + if (!peer) + return -ENOMEM; + + /* Find a corresponding cm_dev object */ + down_write(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport == rdev->net->hport) + goto found; + } + + up_write(&rdev_sem); + kfree(peer); + return -ENODEV; + +found: + peer->rdev = rdev; + list_add_tail(&peer->node, &cm->peers); + cm->npeers++; + + up_write(&rdev_sem); + return 0; +} + +/* + * riocm_remove_dev - remove remote RapidIO device from channel management core + * @dev: device object associated with RapidIO device + * @sif: subsystem interface + * + * Removes the specified RapidIO device (if applicable) from peers list of + * the corresponding channel management device (cm_dev). + */ +static void riocm_remove_dev(struct device *dev, struct subsys_interface *sif) +{ + struct rio_dev *rdev = to_rio_dev(dev); + struct cm_dev *cm; + struct cm_peer *peer; + struct rio_channel *ch, *_c; + unsigned int i; + bool found = false; + LIST_HEAD(list); + + /* Check if the remote device has capabilities required to support CM */ + if (!dev_cm_capable(rdev)) + return; + + riocm_debug(RDEV, "(%s)", rio_name(rdev)); + + /* Find matching cm_dev object */ + down_write(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport == rdev->net->hport) { + found = true; + break; + } + } + + if (!found) { + up_write(&rdev_sem); + return; + } + + /* Remove remote device from the list of peers */ + found = false; + list_for_each_entry(peer, &cm->peers, node) { + if (peer->rdev == rdev) { + riocm_debug(RDEV, "removing peer %s", rio_name(rdev)); + found = true; + list_del(&peer->node); + cm->npeers--; + kfree(peer); + break; + } + } + + up_write(&rdev_sem); + + if (!found) + return; + + /* + * Release channels associated with this peer + */ + + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + if (ch && ch->rdev == rdev) { + if (atomic_read(&rdev->state) != RIO_DEVICE_SHUTDOWN) + riocm_exch(ch, RIO_CM_DISCONNECT); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } + } + spin_unlock_bh(&idr_lock); + + if (!list_empty(&list)) { + list_for_each_entry_safe(ch, _c, &list, ch_node) { + list_del(&ch->ch_node); + riocm_ch_close(ch); + } + } +} + +/* + * riocm_cdev_add() - Create rio_cm char device + * @devno: device number assigned to device (MAJ + MIN) + */ +static int riocm_cdev_add(dev_t devno) +{ + int ret; + + cdev_init(&riocm_cdev.cdev, &riocm_cdev_fops); + riocm_cdev.cdev.owner = THIS_MODULE; + ret = cdev_add(&riocm_cdev.cdev, devno, 1); + if (ret < 0) { + riocm_error("Cannot register a device with error %d", ret); + return ret; + } + + riocm_cdev.dev = device_create(dev_class, NULL, devno, NULL, DEV_NAME); + if (IS_ERR(riocm_cdev.dev)) { + cdev_del(&riocm_cdev.cdev); + return PTR_ERR(riocm_cdev.dev); + } + + riocm_debug(MPORT, "Added %s cdev(%d:%d)", + DEV_NAME, MAJOR(devno), MINOR(devno)); + + return 0; +} + +/* + * riocm_add_mport - add new local mport device into channel management core + * @dev: device object associated with mport + * @class_intf: class interface + * + * When a new mport device is added, CM immediately reserves inbound and + * outbound RapidIO mailboxes that will be used. + */ +static int riocm_add_mport(struct device *dev, + struct class_interface *class_intf) +{ + int rc; + int i; + struct cm_dev *cm; + struct rio_mport *mport = to_rio_mport(dev); + + riocm_debug(MPORT, "add mport %s", mport->name); + + cm = kzalloc(sizeof(*cm), GFP_KERNEL); + if (!cm) + return -ENOMEM; + + cm->mport = mport; + + rc = rio_request_outb_mbox(mport, cm, cmbox, + RIOCM_TX_RING_SIZE, riocm_outb_msg_event); + if (rc) { + riocm_error("failed to allocate OBMBOX_%d on %s", + cmbox, mport->name); + kfree(cm); + return -ENODEV; + } + + rc = rio_request_inb_mbox(mport, cm, cmbox, + RIOCM_RX_RING_SIZE, riocm_inb_msg_event); + if (rc) { + riocm_error("failed to allocate IBMBOX_%d on %s", + cmbox, mport->name); + rio_release_outb_mbox(mport, cmbox); + kfree(cm); + return -ENODEV; + } + + /* + * Allocate and register inbound messaging buffers to be ready + * to receive channel and system management requests + */ + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) + cm->rx_buf[i] = NULL; + + cm->rx_slots = RIOCM_RX_RING_SIZE; + mutex_init(&cm->rx_lock); + riocm_rx_fill(cm, RIOCM_RX_RING_SIZE); + cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); + INIT_WORK(&cm->rx_work, rio_ibmsg_handler); + + cm->tx_slot = 0; + cm->tx_cnt = 0; + cm->tx_ack_slot = 0; + spin_lock_init(&cm->tx_lock); + + INIT_LIST_HEAD(&cm->peers); + cm->npeers = 0; + INIT_LIST_HEAD(&cm->tx_reqs); + + down_write(&rdev_sem); + list_add_tail(&cm->list, &cm_dev_list); + up_write(&rdev_sem); + + return 0; +} + +/* + * riocm_remove_mport - remove local mport device from channel management core + * @dev: device object associated with mport + * @class_intf: class interface + * + * Removes a local mport device from the list of registered devices that provide + * channel management services. Returns an error if the specified mport is not + * registered with the CM core. + */ +static void riocm_remove_mport(struct device *dev, + struct class_interface *class_intf) +{ + struct rio_mport *mport = to_rio_mport(dev); + struct cm_dev *cm; + struct cm_peer *peer, *temp; + struct rio_channel *ch, *_c; + unsigned int i; + bool found = false; + LIST_HEAD(list); + + riocm_debug(MPORT, "%s", mport->name); + + /* Find a matching cm_dev object */ + down_write(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport == mport) { + list_del(&cm->list); + found = true; + break; + } + } + up_write(&rdev_sem); + if (!found) + return; + + flush_workqueue(cm->rx_wq); + destroy_workqueue(cm->rx_wq); + + /* Release channels bound to this mport */ + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + if (ch->cmdev == cm) { + riocm_debug(RDEV, "%s drop ch_%d", + mport->name, ch->id); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } + } + spin_unlock_bh(&idr_lock); + + if (!list_empty(&list)) { + list_for_each_entry_safe(ch, _c, &list, ch_node) { + list_del(&ch->ch_node); + riocm_ch_close(ch); + } + } + + rio_release_inb_mbox(mport, cmbox); + rio_release_outb_mbox(mport, cmbox); + + /* Remove and free peer entries */ + if (!list_empty(&cm->peers)) + riocm_debug(RDEV, "ATTN: peer list not empty"); + list_for_each_entry_safe(peer, temp, &cm->peers, node) { + riocm_debug(RDEV, "removing peer %s", rio_name(peer->rdev)); + list_del(&peer->node); + kfree(peer); + } + + riocm_rx_free(cm); + kfree(cm); + riocm_debug(MPORT, "%s done", mport->name); +} + +static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code, + void *unused) +{ + struct rio_channel *ch; + unsigned int i; + + riocm_debug(EXIT, "."); + + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + riocm_debug(EXIT, "close ch %d", ch->id); + if (ch->state == RIO_CM_CONNECTED) + riocm_send_close(ch); + } + spin_unlock_bh(&idr_lock); + + return NOTIFY_DONE; +} + +/* + * riocm_interface handles addition/removal of remote RapidIO devices + */ +static struct subsys_interface riocm_interface = { + .name = "rio_cm", + .subsys = &rio_bus_type, + .add_dev = riocm_add_dev, + .remove_dev = riocm_remove_dev, +}; + +/* + * rio_mport_interface handles addition/removal local mport devices + */ +static struct class_interface rio_mport_interface __refdata = { + .class = &rio_mport_class, + .add_dev = riocm_add_mport, + .remove_dev = riocm_remove_mport, +}; + +static struct notifier_block rio_cm_notifier = { + .notifier_call = rio_cm_shutdown, +}; + +static int __init riocm_init(void) +{ + int ret; + + /* Create device class needed by udev */ + dev_class = class_create(THIS_MODULE, DRV_NAME); + if (IS_ERR(dev_class)) { + riocm_error("Cannot create " DRV_NAME " class"); + return PTR_ERR(dev_class); + } + + ret = alloc_chrdev_region(&dev_number, 0, 1, DRV_NAME); + if (ret) { + class_destroy(dev_class); + return ret; + } + + dev_major = MAJOR(dev_number); + dev_minor_base = MINOR(dev_number); + riocm_debug(INIT, "Registered class with %d major", dev_major); + + /* + * Register as rapidio_port class interface to get notifications about + * mport additions and removals. + */ + ret = class_interface_register(&rio_mport_interface); + if (ret) { + riocm_error("class_interface_register error: %d", ret); + goto err_reg; + } + + /* + * Register as RapidIO bus interface to get notifications about + * addition/removal of remote RapidIO devices. + */ + ret = subsys_interface_register(&riocm_interface); + if (ret) { + riocm_error("subsys_interface_register error: %d", ret); + goto err_cl; + } + + ret = register_reboot_notifier(&rio_cm_notifier); + if (ret) { + riocm_error("failed to register reboot notifier (err=%d)", ret); + goto err_sif; + } + + ret = riocm_cdev_add(dev_number); + if (ret) { + unregister_reboot_notifier(&rio_cm_notifier); + ret = -ENODEV; + goto err_sif; + } + + return 0; +err_sif: + subsys_interface_unregister(&riocm_interface); +err_cl: + class_interface_unregister(&rio_mport_interface); +err_reg: + unregister_chrdev_region(dev_number, 1); + class_destroy(dev_class); + return ret; +} + +static void __exit riocm_exit(void) +{ + riocm_debug(EXIT, "enter"); + unregister_reboot_notifier(&rio_cm_notifier); + subsys_interface_unregister(&riocm_interface); + class_interface_unregister(&rio_mport_interface); + idr_destroy(&ch_idr); + + device_unregister(riocm_cdev.dev); + cdev_del(&(riocm_cdev.cdev)); + + class_destroy(dev_class); + unregister_chrdev_region(dev_number, 1); +} + +late_initcall(riocm_init); +module_exit(riocm_exit); diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig index 345841562f95..92767fd3b541 100644 --- a/drivers/rapidio/switches/Kconfig +++ b/drivers/rapidio/switches/Kconfig @@ -22,3 +22,9 @@ config RAPIDIO_CPS_GEN2 default n ---help--- Includes support for ITD CPS Gen.2 serial RapidIO switches. + +config RAPIDIO_RXS_GEN3 + tristate "IDT RXS Gen.3 SRIO switch support" + default n + ---help--- + Includes support for ITD RXS Gen.3 serial RapidIO switches. diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile index 051cc6b38188..6bdd54c4e733 100644 --- a/drivers/rapidio/switches/Makefile +++ b/drivers/rapidio/switches/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_RAPIDIO_TSI57X) += tsi57x.o obj-$(CONFIG_RAPIDIO_CPS_XX) += idtcps.o obj-$(CONFIG_RAPIDIO_TSI568) += tsi568.o obj-$(CONFIG_RAPIDIO_CPS_GEN2) += idt_gen2.o +obj-$(CONFIG_RAPIDIO_RXS_GEN3) += idt_gen3.o diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index 9f7fe21580bb..e67b923b1ca6 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -436,10 +436,11 @@ static int idtg2_probe(struct rio_dev *rdev, const struct rio_device_id *id) RIO_STD_RTE_DEFAULT_PORT, IDT_NO_ROUTE); } + spin_unlock(&rdev->rswitch->lock); + /* Create device-specific sysfs attributes */ idtg2_sysfs(rdev, true); - spin_unlock(&rdev->rswitch->lock); return 0; } @@ -452,11 +453,9 @@ static void idtg2_remove(struct rio_dev *rdev) return; } rdev->rswitch->ops = NULL; - + spin_unlock(&rdev->rswitch->lock); /* Remove device-specific sysfs attributes */ idtg2_sysfs(rdev, false); - - spin_unlock(&rdev->rswitch->lock); } static struct rio_device_id idtg2_id_table[] = { diff --git a/drivers/rapidio/switches/idt_gen3.c b/drivers/rapidio/switches/idt_gen3.c new file mode 100644 index 000000000000..c5923a547bed --- /dev/null +++ b/drivers/rapidio/switches/idt_gen3.c @@ -0,0 +1,382 @@ +/* + * IDT RXS Gen.3 Serial RapidIO switch family support + * + * Copyright 2016 Integrated Device Technology, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/stat.h> +#include <linux/module.h> +#include <linux/rio.h> +#include <linux/rio_drv.h> +#include <linux/rio_ids.h> +#include <linux/delay.h> + +#include <asm/page.h> +#include "../rio.h" + +#define RIO_EM_PW_STAT 0x40020 +#define RIO_PW_CTL 0x40204 +#define RIO_PW_CTL_PW_TMR 0xffffff00 +#define RIO_PW_ROUTE 0x40208 + +#define RIO_EM_DEV_INT_EN 0x40030 + +#define RIO_PLM_SPx_IMP_SPEC_CTL(x) (0x10100 + (x)*0x100) +#define RIO_PLM_SPx_IMP_SPEC_CTL_SOFT_RST 0x02000000 + +#define RIO_PLM_SPx_PW_EN(x) (0x10118 + (x)*0x100) +#define RIO_PLM_SPx_PW_EN_OK2U 0x40000000 +#define RIO_PLM_SPx_PW_EN_LINIT 0x10000000 + +#define RIO_BC_L2_Gn_ENTRYx_CSR(n, x) (0x31000 + (n)*0x400 + (x)*0x4) +#define RIO_SPx_L2_Gn_ENTRYy_CSR(x, n, y) \ + (0x51000 + (x)*0x2000 + (n)*0x400 + (y)*0x4) + +static int +idtg3_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 route_port) +{ + u32 rval; + u32 entry = route_port; + int err = 0; + + pr_debug("RIO: %s t=0x%x did_%x to p_%x\n", + __func__, table, route_destid, entry); + + if (route_destid > 0xFF) + return -EINVAL; + + if (route_port == RIO_INVALID_ROUTE) + entry = RIO_RT_ENTRY_DROP_PKT; + + if (table == RIO_GLOBAL_TABLE) { + /* Use broadcast register to update all per-port tables */ + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_BC_L2_Gn_ENTRYx_CSR(0, route_destid), + entry); + return err; + } + + /* + * Verify that specified port/table number is valid + */ + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SWP_INFO_CAR, &rval); + if (err) + return err; + + if (table >= RIO_GET_TOTAL_PORTS(rval)) + return -EINVAL; + + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_SPx_L2_Gn_ENTRYy_CSR(table, 0, route_destid), + entry); + return err; +} + +static int +idtg3_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 *route_port) +{ + u32 rval; + int err; + + if (route_destid > 0xFF) + return -EINVAL; + + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SWP_INFO_CAR, &rval); + if (err) + return err; + + /* + * This switch device does not have the dedicated global routing table. + * It is substituted by reading routing table of the ingress port of + * maintenance read requests. + */ + if (table == RIO_GLOBAL_TABLE) + table = RIO_GET_PORT_NUM(rval); + else if (table >= RIO_GET_TOTAL_PORTS(rval)) + return -EINVAL; + + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SPx_L2_Gn_ENTRYy_CSR(table, 0, route_destid), + &rval); + if (err) + return err; + + if (rval == RIO_RT_ENTRY_DROP_PKT) + *route_port = RIO_INVALID_ROUTE; + else + *route_port = (u8)rval; + + return 0; +} + +static int +idtg3_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table) +{ + u32 i; + u32 rval; + int err; + + if (table == RIO_GLOBAL_TABLE) { + for (i = 0; i <= 0xff; i++) { + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_BC_L2_Gn_ENTRYx_CSR(0, i), + RIO_RT_ENTRY_DROP_PKT); + if (err) + break; + } + + return err; + } + + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SWP_INFO_CAR, &rval); + if (err) + return err; + + if (table >= RIO_GET_TOTAL_PORTS(rval)) + return -EINVAL; + + for (i = 0; i <= 0xff; i++) { + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_SPx_L2_Gn_ENTRYy_CSR(table, 0, i), + RIO_RT_ENTRY_DROP_PKT); + if (err) + break; + } + + return err; +} + +/* + * This routine performs device-specific initialization only. + * All standard EM configuration should be performed at upper level. + */ +static int +idtg3_em_init(struct rio_dev *rdev) +{ + int i, tmp; + u32 rval; + + pr_debug("RIO: %s [%d:%d]\n", __func__, rdev->destid, rdev->hopcount); + + /* Disable assertion of interrupt signal */ + rio_write_config_32(rdev, RIO_EM_DEV_INT_EN, 0); + + /* Disable port-write event notifications during initialization */ + rio_write_config_32(rdev, rdev->em_efptr + RIO_EM_PW_TX_CTRL, + RIO_EM_PW_TX_CTRL_PW_DIS); + + /* Configure Port-Write notifications for hot-swap events */ + tmp = RIO_GET_TOTAL_PORTS(rdev->swpinfo); + for (i = 0; i < tmp; i++) { + + rio_read_config_32(rdev, + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, i), + &rval); + if (rval & RIO_PORT_N_ERR_STS_PORT_UA) + continue; + + /* Clear events signaled before enabling notification */ + rio_write_config_32(rdev, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(i), 0); + + /* Enable event notifications */ + rio_write_config_32(rdev, + rdev->em_efptr + RIO_EM_PN_ERRRATE_EN(i), + RIO_EM_PN_ERRRATE_EN_OK2U | RIO_EM_PN_ERRRATE_EN_U2OK); + /* Enable port-write generation on events */ + rio_write_config_32(rdev, RIO_PLM_SPx_PW_EN(i), + RIO_PLM_SPx_PW_EN_OK2U | RIO_PLM_SPx_PW_EN_LINIT); + + } + + /* Set Port-Write destination port */ + tmp = RIO_GET_PORT_NUM(rdev->swpinfo); + rio_write_config_32(rdev, RIO_PW_ROUTE, 1 << tmp); + + + /* Enable sending port-write event notifications */ + rio_write_config_32(rdev, rdev->em_efptr + RIO_EM_PW_TX_CTRL, 0); + + /* set TVAL = ~50us */ + rio_write_config_32(rdev, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + return 0; +} + + +/* + * idtg3_em_handler - device-specific error handler + * + * If the link is down (PORT_UNINIT) does nothing - this is considered + * as link partner removal from the port. + * + * If the link is up (PORT_OK) - situation is handled as *new* device insertion. + * In this case ERR_STOP bits are cleared by issuing soft reset command to the + * reporting port. Inbound and outbound ackIDs are cleared by the reset as well. + * This way the port is synchronized with freshly inserted device (assuming it + * was reset/powered-up on insertion). + * + * TODO: This is not sufficient in a situation when a link between two devices + * was down and up again (e.g. cable disconnect). For that situation full ackID + * realignment process has to be implemented. + */ +static int +idtg3_em_handler(struct rio_dev *rdev, u8 pnum) +{ + u32 err_status; + u32 rval; + + rio_read_config_32(rdev, + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), + &err_status); + + /* Do nothing for device/link removal */ + if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) + return 0; + + /* When link is OK we have a device insertion. + * Request port soft reset to clear errors if they present. + * Inbound and outbound ackIDs will be 0 after reset. + */ + if (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES)) { + rio_read_config_32(rdev, RIO_PLM_SPx_IMP_SPEC_CTL(pnum), &rval); + rio_write_config_32(rdev, RIO_PLM_SPx_IMP_SPEC_CTL(pnum), + rval | RIO_PLM_SPx_IMP_SPEC_CTL_SOFT_RST); + udelay(10); + rio_write_config_32(rdev, RIO_PLM_SPx_IMP_SPEC_CTL(pnum), rval); + msleep(500); + } + + return 0; +} + +static struct rio_switch_ops idtg3_switch_ops = { + .owner = THIS_MODULE, + .add_entry = idtg3_route_add_entry, + .get_entry = idtg3_route_get_entry, + .clr_table = idtg3_route_clr_table, + .em_init = idtg3_em_init, + .em_handle = idtg3_em_handler, +}; + +static int idtg3_probe(struct rio_dev *rdev, const struct rio_device_id *id) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + + spin_lock(&rdev->rswitch->lock); + + if (rdev->rswitch->ops) { + spin_unlock(&rdev->rswitch->lock); + return -EINVAL; + } + + rdev->rswitch->ops = &idtg3_switch_ops; + + if (rdev->do_enum) { + /* Disable hierarchical routing support: Existing fabric + * enumeration/discovery process (see rio-scan.c) uses 8-bit + * flat destination ID routing only. + */ + rio_write_config_32(rdev, 0x5000 + RIO_BC_RT_CTL_CSR, 0); + } + + spin_unlock(&rdev->rswitch->lock); + + return 0; +} + +static void idtg3_remove(struct rio_dev *rdev) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + spin_lock(&rdev->rswitch->lock); + if (rdev->rswitch->ops == &idtg3_switch_ops) + rdev->rswitch->ops = NULL; + spin_unlock(&rdev->rswitch->lock); +} + +/* + * Gen3 switches repeat sending PW messages until a corresponding event flag + * is cleared. Use shutdown notification to disable generation of port-write + * messages if their destination node is shut down. + */ +static void idtg3_shutdown(struct rio_dev *rdev) +{ + int i; + u32 rval; + u16 destid; + + /* Currently the enumerator node acts also as PW handler */ + if (!rdev->do_enum) + return; + + pr_debug("RIO: %s(%s)\n", __func__, rio_name(rdev)); + + rio_read_config_32(rdev, RIO_PW_ROUTE, &rval); + i = RIO_GET_PORT_NUM(rdev->swpinfo); + + /* Check port-write destination port */ + if (!((1 << i) & rval)) + return; + + /* Disable sending port-write event notifications if PW destID + * matches to one of the enumerator node + */ + rio_read_config_32(rdev, rdev->em_efptr + RIO_EM_PW_TGT_DEVID, &rval); + + if (rval & RIO_EM_PW_TGT_DEVID_DEV16) + destid = rval >> 16; + else + destid = ((rval & RIO_EM_PW_TGT_DEVID_D8) >> 16); + + if (rdev->net->hport->host_deviceid == destid) { + rio_write_config_32(rdev, + rdev->em_efptr + RIO_EM_PW_TX_CTRL, 0); + pr_debug("RIO: %s(%s) PW transmission disabled\n", + __func__, rio_name(rdev)); + } +} + +static struct rio_device_id idtg3_id_table[] = { + {RIO_DEVICE(RIO_DID_IDTRXS1632, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTRXS2448, RIO_VID_IDT)}, + { 0, } /* terminate list */ +}; + +static struct rio_driver idtg3_driver = { + .name = "idt_gen3", + .id_table = idtg3_id_table, + .probe = idtg3_probe, + .remove = idtg3_remove, + .shutdown = idtg3_shutdown, +}; + +static int __init idtg3_init(void) +{ + return rio_register_driver(&idtg3_driver); +} + +static void __exit idtg3_exit(void) +{ + pr_debug("RIO: %s\n", __func__); + rio_unregister_driver(&idtg3_driver); + pr_debug("RIO: %s done\n", __func__); +} + +device_initcall(idtg3_init); +module_exit(idtg3_exit); + +MODULE_DESCRIPTION("IDT RXS Gen.3 Serial RapidIO switch family driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index 42c8b014fe15..2700d15f7584 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -175,12 +175,10 @@ tsi57x_em_init(struct rio_dev *rdev) /* Clear all pending interrupts */ rio_read_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_ERR_STS_CSR(portnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), ®val); rio_write_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_ERR_STS_CSR(portnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), regval & 0x07120214); rio_read_config_32(rdev, @@ -198,7 +196,7 @@ tsi57x_em_init(struct rio_dev *rdev) /* Skip next (odd) port if the current port is in x4 mode */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), ®val); if ((regval & RIO_PORT_N_CTL_PWIDTH) == RIO_PORT_N_CTL_PWIDTH_4) portnum++; @@ -221,23 +219,23 @@ tsi57x_em_handler(struct rio_dev *rdev, u8 portnum) u32 regval; rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), &err_status); if ((err_status & RIO_PORT_N_ERR_STS_PORT_OK) && - (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | - RIO_PORT_N_ERR_STS_PW_INP_ES))) { + (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES))) { /* Remove any queued packets by locking/unlocking port */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), ®val); if (!(regval & RIO_PORT_N_CTL_LOCKOUT)) { rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), regval | RIO_PORT_N_CTL_LOCKOUT); udelay(50); rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), regval); } @@ -245,7 +243,7 @@ tsi57x_em_handler(struct rio_dev *rdev, u8 portnum) * valid bit */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(portnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, portnum), ®val); /* Send a Packet-Not-Accepted/Link-Request-Input-Status control @@ -259,8 +257,8 @@ tsi57x_em_handler(struct rio_dev *rdev, u8 portnum) while (checkcount--) { udelay(50); rio_read_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_MNT_RSP_CSR(portnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, + portnum), ®val); if (regval & RIO_PORT_N_MNT_RSP_RVAL) goto exit_es; diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 666bc3bb52ef..c24524242da2 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -22,6 +22,12 @@ #include <linux/pwm.h> #include <linux/gpio/consumer.h> +struct pwm_continuous_reg_data { + unsigned int min_uV_dutycycle; + unsigned int max_uV_dutycycle; + unsigned int dutycycle_unit; +}; + struct pwm_regulator_data { /* Shared */ struct pwm_device *pwm; @@ -29,6 +35,9 @@ struct pwm_regulator_data { /* Voltage table */ struct pwm_voltages *duty_cycle_table; + /* Continuous mode info */ + struct pwm_continuous_reg_data continuous; + /* regulator descriptor */ struct regulator_desc desc; @@ -37,9 +46,6 @@ struct pwm_regulator_data { int state; - /* Continuous voltage */ - int volt_uV; - /* Enable GPIO */ struct gpio_desc *enb_gpio; }; @@ -52,10 +58,31 @@ struct pwm_voltages { /** * Voltage table call-backs */ +static void pwm_regulator_init_state(struct regulator_dev *rdev) +{ + struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + struct pwm_state pwm_state; + unsigned int dutycycle; + int i; + + pwm_get_state(drvdata->pwm, &pwm_state); + dutycycle = pwm_get_relative_duty_cycle(&pwm_state, 100); + + for (i = 0; i < rdev->desc->n_voltages; i++) { + if (dutycycle == drvdata->duty_cycle_table[i].dutycycle) { + drvdata->state = i; + return; + } + } +} + static int pwm_regulator_get_voltage_sel(struct regulator_dev *rdev) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + if (drvdata->state < 0) + pwm_regulator_init_state(rdev); + return drvdata->state; } @@ -63,16 +90,14 @@ static int pwm_regulator_set_voltage_sel(struct regulator_dev *rdev, unsigned selector) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); - struct pwm_args pargs; - int dutycycle; + struct pwm_state pstate; int ret; - pwm_get_args(drvdata->pwm, &pargs); - - dutycycle = (pargs.period * - drvdata->duty_cycle_table[selector].dutycycle) / 100; + pwm_init_state(drvdata->pwm, &pstate); + pwm_set_relative_duty_cycle(&pstate, + drvdata->duty_cycle_table[selector].dutycycle, 100); - ret = pwm_config(drvdata->pwm, dutycycle, pargs.period); + ret = pwm_apply_state(drvdata->pwm, &pstate); if (ret) { dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret); return ret; @@ -129,57 +154,90 @@ static int pwm_regulator_is_enabled(struct regulator_dev *dev) static int pwm_regulator_get_voltage(struct regulator_dev *rdev) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + unsigned int min_uV_duty = drvdata->continuous.min_uV_dutycycle; + unsigned int max_uV_duty = drvdata->continuous.max_uV_dutycycle; + unsigned int duty_unit = drvdata->continuous.dutycycle_unit; + int min_uV = rdev->constraints->min_uV; + int max_uV = rdev->constraints->max_uV; + int diff_uV = max_uV - min_uV; + struct pwm_state pstate; + unsigned int diff_duty; + unsigned int voltage; + + pwm_get_state(drvdata->pwm, &pstate); + + voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + + /* + * The dutycycle for min_uV might be greater than the one for max_uV. + * This is happening when the user needs an inversed polarity, but the + * PWM device does not support inversing it in hardware. + */ + if (max_uV_duty < min_uV_duty) { + voltage = min_uV_duty - voltage; + diff_duty = min_uV_duty - max_uV_duty; + } else { + voltage = voltage - min_uV_duty; + diff_duty = max_uV_duty - min_uV_duty; + } - return drvdata->volt_uV; + voltage = DIV_ROUND_CLOSEST_ULL((u64)voltage * diff_uV, diff_duty); + + return voltage + min_uV; } static int pwm_regulator_set_voltage(struct regulator_dev *rdev, - int min_uV, int max_uV, - unsigned *selector) + int req_min_uV, int req_max_uV, + unsigned int *selector) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + unsigned int min_uV_duty = drvdata->continuous.min_uV_dutycycle; + unsigned int max_uV_duty = drvdata->continuous.max_uV_dutycycle; + unsigned int duty_unit = drvdata->continuous.dutycycle_unit; unsigned int ramp_delay = rdev->constraints->ramp_delay; - struct pwm_args pargs; - unsigned int req_diff = min_uV - rdev->constraints->min_uV; - unsigned int diff; - unsigned int duty_pulse; - u64 req_period; - u32 rem; + int min_uV = rdev->constraints->min_uV; + int max_uV = rdev->constraints->max_uV; + int diff_uV = max_uV - min_uV; + struct pwm_state pstate; int old_uV = pwm_regulator_get_voltage(rdev); + unsigned int diff_duty; + unsigned int dutycycle; int ret; - pwm_get_args(drvdata->pwm, &pargs); - diff = rdev->constraints->max_uV - rdev->constraints->min_uV; + pwm_init_state(drvdata->pwm, &pstate); - /* First try to find out if we get the iduty cycle time which is - * factor of PWM period time. If (request_diff_to_min * pwm_period) - * is perfect divided by voltage_range_diff then it is possible to - * get duty cycle time which is factor of PWM period. This will help - * to get output voltage nearer to requested value as there is no - * calculation loss. + /* + * The dutycycle for min_uV might be greater than the one for max_uV. + * This is happening when the user needs an inversed polarity, but the + * PWM device does not support inversing it in hardware. */ - req_period = req_diff * pargs.period; - div_u64_rem(req_period, diff, &rem); - if (!rem) { - do_div(req_period, diff); - duty_pulse = (unsigned int)req_period; - } else { - duty_pulse = (pargs.period / 100) * ((req_diff * 100) / diff); - } + if (max_uV_duty < min_uV_duty) + diff_duty = min_uV_duty - max_uV_duty; + else + diff_duty = max_uV_duty - min_uV_duty; + + dutycycle = DIV_ROUND_CLOSEST_ULL((u64)(req_min_uV - min_uV) * + diff_duty, + diff_uV); + + if (max_uV_duty < min_uV_duty) + dutycycle = min_uV_duty - dutycycle; + else + dutycycle = min_uV_duty + dutycycle; + + pwm_set_relative_duty_cycle(&pstate, dutycycle, duty_unit); - ret = pwm_config(drvdata->pwm, duty_pulse, pargs.period); + ret = pwm_apply_state(drvdata->pwm, &pstate); if (ret) { dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret); return ret; } - drvdata->volt_uV = min_uV; - if ((ramp_delay == 0) || !pwm_regulator_is_enabled(rdev)) return 0; /* Ramp delay is in uV/uS. Adjust to uS and delay */ - ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay); + ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay); usleep_range(ramp_delay, ramp_delay + DIV_ROUND_UP(ramp_delay, 10)); return 0; @@ -239,6 +297,7 @@ static int pwm_regulator_init_table(struct platform_device *pdev, return ret; } + drvdata->state = -EINVAL; drvdata->duty_cycle_table = duty_cycle_table; memcpy(&drvdata->ops, &pwm_regulator_voltage_table_ops, sizeof(drvdata->ops)); @@ -251,11 +310,28 @@ static int pwm_regulator_init_table(struct platform_device *pdev, static int pwm_regulator_init_continuous(struct platform_device *pdev, struct pwm_regulator_data *drvdata) { + u32 dutycycle_range[2] = { 0, 100 }; + u32 dutycycle_unit = 100; + memcpy(&drvdata->ops, &pwm_regulator_voltage_continuous_ops, sizeof(drvdata->ops)); drvdata->desc.ops = &drvdata->ops; drvdata->desc.continuous_voltage_range = true; + of_property_read_u32_array(pdev->dev.of_node, + "pwm-dutycycle-range", + dutycycle_range, 2); + of_property_read_u32(pdev->dev.of_node, "pwm-dutycycle-unit", + &dutycycle_unit); + + if (dutycycle_range[0] > dutycycle_unit || + dutycycle_range[1] > dutycycle_unit) + return -EINVAL; + + drvdata->continuous.dutycycle_unit = dutycycle_unit; + drvdata->continuous.min_uV_dutycycle = dutycycle_range[0]; + drvdata->continuous.max_uV_dutycycle = dutycycle_range[1]; + return 0; } @@ -316,11 +392,9 @@ static int pwm_regulator_probe(struct platform_device *pdev) return ret; } - /* - * FIXME: pwm_apply_args() should be removed when switching to the - * atomic PWM API. - */ - pwm_apply_args(drvdata->pwm); + ret = pwm_adjust_config(drvdata->pwm); + if (ret) + return ret; regulator = devm_regulator_register(&pdev->dev, &drvdata->desc, &config); diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index 24791886219a..2a1b2c7d8f2c 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -349,13 +349,12 @@ static void q6v5proc_halt_axi_port(struct q6v5 *qproc, static int q6v5_mpss_init_image(struct q6v5 *qproc, const struct firmware *fw) { - DEFINE_DMA_ATTRS(attrs); + unsigned long dma_attrs = DMA_ATTR_FORCE_CONTIGUOUS; dma_addr_t phys; void *ptr; int ret; - dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, &attrs); - ptr = dma_alloc_attrs(qproc->dev, fw->size, &phys, GFP_KERNEL, &attrs); + ptr = dma_alloc_attrs(qproc->dev, fw->size, &phys, GFP_KERNEL, dma_attrs); if (!ptr) { dev_err(qproc->dev, "failed to allocate mdt buffer\n"); return -ENOMEM; @@ -372,7 +371,7 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const struct firmware *fw) else if (ret < 0) dev_err(qproc->dev, "MPSS header authentication failed: %d\n", ret); - dma_free_attrs(qproc->dev, fw->size, ptr, phys, &attrs); + dma_free_attrs(qproc->dev, fw->size, ptr, phys, dma_attrs); return ret < 0 ? ret : 0; } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 18639e0cb6e2..e215f50794b6 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -5,6 +5,10 @@ config RTC_LIB bool +config RTC_MC146818_LIB + bool + select RTC_LIB + menuconfig RTC_CLASS bool "Real Time Clock" default n @@ -574,10 +578,10 @@ config RTC_DRV_EM3027 will be called rtc-em3027. config RTC_DRV_RV8803 - tristate "Micro Crystal RV8803" + tristate "Micro Crystal RV8803, Epson RX8900" help - If you say yes here you get support for the Micro Crystal - RV8803 RTC chips. + If you say yes here you get support for the Micro Crystal RV8803 and + Epson RX8900 RTC chips. This driver can also be built as a module. If so, the module will be called rtc-rv8803. @@ -670,6 +674,18 @@ config RTC_DRV_DS1390 This driver can also be built as a module. If so, the module will be called rtc-ds1390. +config RTC_DRV_MAX6916 + tristate "Maxim MAX6916" + help + If you say yes here you will get support for the + Maxim MAX6916 SPI RTC chip. + + This driver only supports the RTC feature, and not other chip + features such as alarms. + + This driver can also be built as a module. If so, the module + will be called rtc-max6916. + config RTC_DRV_R9701 tristate "Epson RTC-9701JE" help @@ -795,8 +811,9 @@ comment "Platform RTC drivers" config RTC_DRV_CMOS tristate "PC-style 'CMOS'" - depends on X86 || ARM || M32R || PPC || MIPS || SPARC64 + depends on X86 || ARM || M32R || PPC || MIPS || SPARC64 || MN10300 default y if X86 + select RTC_MC146818_LIB help Say "yes" here to get direct support for the real time clock found in every PC or ACPI-based system, and some other boards. @@ -815,6 +832,7 @@ config RTC_DRV_CMOS config RTC_DRV_ALPHA bool "Alpha PC-style CMOS" depends on ALPHA + select RTC_MC146818_LIB default y help Direct support for the real-time clock found on every Alpha diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index ea2833723fa9..7cf7ad559c79 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_RTC_LIB) += rtc-lib.o obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o obj-$(CONFIG_RTC_SYSTOHC) += systohc.o obj-$(CONFIG_RTC_CLASS) += rtc-core.o +obj-$(CONFIG_RTC_MC146818_LIB) += rtc-mc146818-lib.o rtc-core-y := class.o interface.o ifdef CONFIG_RTC_DRV_EFI @@ -85,6 +86,7 @@ obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o +obj-$(CONFIG_RTC_DRV_MAX6916) += rtc-max6916.o obj-$(CONFIG_RTC_DRV_MAX77686) += rtc-max77686.o obj-$(CONFIG_RTC_DRV_MAX8907) += rtc-max8907.o obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 9ef5f6f89f98..84a52db9b05f 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -104,7 +104,17 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al else if (!rtc->ops->read_alarm) err = -EINVAL; else { - memset(alarm, 0, sizeof(struct rtc_wkalrm)); + alarm->enabled = 0; + alarm->pending = 0; + alarm->time.tm_sec = -1; + alarm->time.tm_min = -1; + alarm->time.tm_hour = -1; + alarm->time.tm_mday = -1; + alarm->time.tm_mon = -1; + alarm->time.tm_year = -1; + alarm->time.tm_wday = -1; + alarm->time.tm_yday = -1; + alarm->time.tm_isdst = -1; err = rtc->ops->read_alarm(rtc->dev.parent, alarm); } @@ -383,7 +393,7 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); rtc->aie_timer.period = ktime_set(0, 0); - /* Alarm has to be enabled & in the futrure for us to enqueue it */ + /* Alarm has to be enabled & in the future for us to enqueue it */ if (alarm->enabled && (rtc_tm_to_ktime(now).tv64 < rtc->aie_timer.node.expires.tv64)) { @@ -395,8 +405,6 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } EXPORT_SYMBOL_GPL(rtc_initialize_alarm); - - int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err = mutex_lock_interruptible(&rtc->ops_lock); @@ -748,9 +756,23 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_freq); */ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) { + struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); + struct rtc_time tm; + ktime_t now; + timer->enabled = 1; + __rtc_read_time(rtc, &tm); + now = rtc_tm_to_ktime(tm); + + /* Skip over expired timers */ + while (next) { + if (next->expires.tv64 >= now.tv64) + break; + next = timerqueue_iterate_next(next); + } + timerqueue_add(&rtc->timerqueue, &timer->node); - if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) { + if (!next) { struct rtc_wkalrm alarm; int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index ba0d61934d35..fea9a60b06cf 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -643,17 +643,15 @@ static int abx80x_probe(struct i2c_client *client, return err; } - err = devm_add_action(&client->dev, rtc_calib_remove_sysfs_group, - &client->dev); - if (err) { - rtc_calib_remove_sysfs_group(&client->dev); + err = devm_add_action_or_reset(&client->dev, + rtc_calib_remove_sysfs_group, + &client->dev); + if (err) dev_err(&client->dev, "Failed to add sysfs cleanup action: %d\n", err); - return err; - } - return 0; + return err; } static int abx80x_remove(struct i2c_client *client) diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c index 355fdb97a006..5219916ce11d 100644 --- a/drivers/rtc/rtc-asm9260.c +++ b/drivers/rtc/rtc-asm9260.c @@ -343,7 +343,6 @@ static struct platform_driver asm9260_rtc_driver = { .remove = asm9260_rtc_remove, .driver = { .name = "asm9260-rtc", - .owner = THIS_MODULE, .of_match_table = asm9260_dt_ids, }, }; diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 99732e6f8c3b..7418a763ce52 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -375,6 +375,7 @@ static int at91_rtc_probe(struct platform_device *pdev) if (!rtc) return -ENOMEM; + spin_lock_init(&rtc->lock); rtc->irq = irq; /* platform setup code should have handled this; sigh */ diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index fbe9c72438e1..43745cac0141 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -43,7 +43,7 @@ #include <linux/of_platform.h> /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ -#include <asm-generic/rtc.h> +#include <linux/mc146818rtc.h> struct cmos_rtc { struct rtc_device *rtc; @@ -190,10 +190,10 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr) static int cmos_read_time(struct device *dev, struct rtc_time *t) { /* REVISIT: if the clock has a "century" register, use - * that instead of the heuristic in get_rtc_time(). + * that instead of the heuristic in mc146818_get_time(). * That'll make Y3K compatility (year > 2070) easy! */ - get_rtc_time(t); + mc146818_get_time(t); return 0; } @@ -205,7 +205,7 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t) * takes effect exactly 500ms after we write the register. * (Also queueing and other delays before we get this far.) */ - return set_rtc_time(t); + return mc146818_set_time(t); } static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) @@ -220,8 +220,6 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * Some also support day and month, for alarms up to a year in * the future. */ - t->time.tm_mday = -1; - t->time.tm_mon = -1; spin_lock_irq(&rtc_lock); t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM); @@ -272,7 +270,6 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) } } } - t->time.tm_year = -1; t->enabled = !!(rtc_control & RTC_AIE); t->pending = 0; @@ -630,7 +627,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) address_space = 64; #elif defined(__i386__) || defined(__x86_64__) || defined(__arm__) \ || defined(__sparc__) || defined(__mips__) \ - || defined(__powerpc__) + || defined(__powerpc__) || defined(CONFIG_MN10300) address_space = 128; #else #warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes. @@ -1142,14 +1139,14 @@ static __init void cmos_of_init(struct platform_device *pdev) if (val) CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT); - get_rtc_time(&time); + cmos_read_time(&pdev->dev, &time); ret = rtc_valid_tm(&time); if (ret) { struct rtc_time def_time = { .tm_year = 1, .tm_mday = 1, }; - set_rtc_time(&def_time); + cmos_set_time(&pdev->dev, &def_time); } } #else diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index a20bcf0e33cd..4273377562ec 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -85,6 +85,7 @@ static int da9052_read_alarm(struct da9052_rtc *rtc, struct rtc_time *rtc_tm) rtc_tm->tm_mday = v[0][2] & DA9052_RTC_DAY; rtc_tm->tm_hour = v[0][1] & DA9052_RTC_HOUR; rtc_tm->tm_min = v[0][0] & DA9052_RTC_MIN; + rtc_tm->tm_sec = 0; ret = rtc_valid_tm(rtc_tm); return ret; diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 7ec0872d5e3b..678af8648c45 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -74,6 +74,7 @@ static int da9055_read_alarm(struct da9055 *da9055, struct rtc_time *rtc_tm) rtc_tm->tm_mday = v[2] & DA9055_RTC_ALM_DAY; rtc_tm->tm_hour = v[1] & DA9055_RTC_ALM_HOUR; rtc_tm->tm_min = v[0] & DA9055_RTC_ALM_MIN; + rtc_tm->tm_sec = 0; return rtc_valid_tm(rtc_tm); } diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index c5432bf64e1c..dba60c1dfce2 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -388,6 +388,8 @@ static int davinci_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) u8 day0, day1; unsigned long flags; + alm->time.tm_sec = 0; + spin_lock_irqsave(&davinci_rtc_lock, flags); davinci_rtcss_calendar_wait(davinci_rtc); diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c index 756e509f6ed2..ef75c349dff9 100644 --- a/drivers/rtc/rtc-ds1286.c +++ b/drivers/rtc/rtc-ds1286.c @@ -16,7 +16,7 @@ #include <linux/rtc.h> #include <linux/platform_device.h> #include <linux/bcd.h> -#include <linux/ds1286.h> +#include <linux/rtc/ds1286.h> #include <linux/io.h> #include <linux/slab.h> diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index 8e41c4613e51..72b22935eb62 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -313,13 +313,6 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]); alm->time.tm_min = bcd2bin(buf[DS1305_MIN]); alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]); - alm->time.tm_mday = -1; - alm->time.tm_mon = -1; - alm->time.tm_year = -1; - /* next three fields are unused by Linux */ - alm->time.tm_wday = -1; - alm->time.tm_mday = -1; - alm->time.tm_isdst = -1; return 0; } diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 821d9c089cdb..8e1c5cb6ece6 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -482,11 +482,6 @@ static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_min = bcd2bin(ds1307->regs[1] & 0x7f); t->time.tm_hour = bcd2bin(ds1307->regs[2] & 0x3f); t->time.tm_mday = bcd2bin(ds1307->regs[3] & 0x3f); - t->time.tm_mon = -1; - t->time.tm_year = -1; - t->time.tm_wday = -1; - t->time.tm_yday = -1; - t->time.tm_isdst = -1; /* ... and status */ t->enabled = !!(ds1307->regs[7] & DS1337_BIT_A1IE); @@ -602,6 +597,8 @@ static const struct rtc_class_ops ds13xx_rtc_ops = { * Alarm support for mcp794xx devices. */ +#define MCP794XX_REG_WEEKDAY 0x3 +#define MCP794XX_REG_WEEKDAY_WDAY_MASK 0x7 #define MCP794XX_REG_CONTROL 0x07 # define MCP794XX_BIT_ALM0_EN 0x10 # define MCP794XX_BIT_ALM1_EN 0x20 @@ -1231,13 +1228,16 @@ static int ds1307_probe(struct i2c_client *client, { struct ds1307 *ds1307; int err = -ENODEV; - int tmp; + int tmp, wday; struct chip_desc *chip = &chips[id->driver_data]; struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); bool want_irq = false; bool ds1307_can_wakeup_device = false; unsigned char *buf; struct ds1307_platform_data *pdata = dev_get_platdata(&client->dev); + struct rtc_time tm; + unsigned long timestamp; + irq_handler_t irq_handler = ds1307_irq; static const int bbsqi_bitpos[] = { @@ -1526,6 +1526,27 @@ read_rtc: bin2bcd(tmp)); } + /* + * Some IPs have weekday reset value = 0x1 which might not correct + * hence compute the wday using the current date/month/year values + */ + ds1307_get_time(&client->dev, &tm); + wday = tm.tm_wday; + timestamp = rtc_tm_to_time64(&tm); + rtc_time64_to_tm(timestamp, &tm); + + /* + * Check if reset wday is different from the computed wday + * If different then set the wday which we computed using + * timestamp + */ + if (wday != tm.tm_wday) { + wday = i2c_smbus_read_byte_data(client, MCP794XX_REG_WEEKDAY); + wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK; + wday = wday | (tm.tm_wday + 1); + i2c_smbus_write_byte_data(client, MCP794XX_REG_WEEKDAY, wday); + } + if (want_irq) { device_set_wakeup_capable(&client->dev, true); set_bit(HAS_ALARM, &ds1307->flags); diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index 23fa9f0cb5e3..895fbeeb47fe 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -504,12 +504,6 @@ static int ds1343_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm->time.tm_hour = priv->alarm_hour < 0 ? 0 : priv->alarm_hour; alarm->time.tm_mday = priv->alarm_mday < 0 ? 0 : priv->alarm_mday; - alarm->time.tm_mon = -1; - alarm->time.tm_year = -1; - alarm->time.tm_wday = -1; - alarm->time.tm_yday = -1; - alarm->time.tm_isdst = -1; - out: mutex_unlock(&priv->mutex); return res; diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index b3ce3c652fcd..ed43b4311660 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -103,6 +103,26 @@ ds1685_rtc_bin2bcd(struct ds1685_priv *rtc, u8 val, u8 bin_mask, u8 bcd_mask) } /** + * s1685_rtc_check_mday - check validity of the day of month. + * @rtc: pointer to the ds1685 rtc structure. + * @mday: day of month. + * + * Returns -EDOM if the day of month is not within 1..31 range. + */ +static inline int +ds1685_rtc_check_mday(struct ds1685_priv *rtc, u8 mday) +{ + if (rtc->bcd_mode) { + if (mday < 0x01 || mday > 0x31 || (mday & 0x0f) > 0x09) + return -EDOM; + } else { + if (mday < 1 || mday > 31) + return -EDOM; + } + return 0; +} + +/** * ds1685_rtc_switch_to_bank0 - switch the rtc to bank 0. * @rtc: pointer to the ds1685 rtc structure. */ @@ -377,6 +397,7 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct ds1685_priv *rtc = platform_get_drvdata(pdev); u8 seconds, minutes, hours, mday, ctrlb, ctrlc; + int ret; /* Fetch the alarm info from the RTC alarm registers. */ ds1685_rtc_begin_data_access(rtc); @@ -388,34 +409,29 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) ctrlc = rtc->read(rtc, RTC_CTRL_C); ds1685_rtc_end_data_access(rtc); - /* Check month date. */ - if (!(mday >= 1) && (mday <= 31)) - return -EDOM; + /* Check the month date for validity. */ + ret = ds1685_rtc_check_mday(rtc, mday); + if (ret) + return ret; /* * Check the three alarm bytes. * * The Linux RTC system doesn't support the "don't care" capability * of this RTC chip. We check for it anyways in case support is - * added in the future. + * added in the future and only assign when we care. */ - if (unlikely(seconds >= 0xc0)) - alrm->time.tm_sec = -1; - else + if (likely(seconds < 0xc0)) alrm->time.tm_sec = ds1685_rtc_bcd2bin(rtc, seconds, RTC_SECS_BCD_MASK, RTC_SECS_BIN_MASK); - if (unlikely(minutes >= 0xc0)) - alrm->time.tm_min = -1; - else + if (likely(minutes < 0xc0)) alrm->time.tm_min = ds1685_rtc_bcd2bin(rtc, minutes, RTC_MINS_BCD_MASK, RTC_MINS_BIN_MASK); - if (unlikely(hours >= 0xc0)) - alrm->time.tm_hour = -1; - else + if (likely(hours < 0xc0)) alrm->time.tm_hour = ds1685_rtc_bcd2bin(rtc, hours, RTC_HRS_24_BCD_MASK, RTC_HRS_24_BIN_MASK); @@ -423,11 +439,6 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) /* Write the data to rtc_wkalrm. */ alrm->time.tm_mday = ds1685_rtc_bcd2bin(rtc, mday, RTC_MDAY_BCD_MASK, RTC_MDAY_BIN_MASK); - alrm->time.tm_mon = -1; - alrm->time.tm_year = -1; - alrm->time.tm_wday = -1; - alrm->time.tm_yday = -1; - alrm->time.tm_isdst = -1; alrm->enabled = !!(ctrlb & RTC_CTRL_B_AIE); alrm->pending = !!(ctrlc & RTC_CTRL_C_AF); @@ -445,6 +456,7 @@ ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct ds1685_priv *rtc = platform_get_drvdata(pdev); u8 ctrlb, seconds, minutes, hours, mday; + int ret; /* Fetch the alarm info and convert to BCD. */ seconds = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_sec, @@ -461,8 +473,9 @@ ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) RTC_MDAY_BCD_MASK); /* Check the month date for validity. */ - if (!(mday >= 1) && (mday <= 31)) - return -EDOM; + ret = ds1685_rtc_check_mday(rtc, mday); + if (ret) + return ret; /* * Check the three alarm bytes. diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c index 16310fe79d76..9a1582ed7070 100644 --- a/drivers/rtc/rtc-ds2404.c +++ b/drivers/rtc/rtc-ds2404.c @@ -13,7 +13,7 @@ #include <linux/rtc.h> #include <linux/types.h> #include <linux/bcd.h> -#include <linux/rtc-ds2404.h> +#include <linux/platform_data/rtc-ds2404.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/slab.h> diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index 04fbd7fffd0d..b1f20d8c358f 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -197,12 +197,6 @@ static int ds3232_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm->time.tm_hour = bcd2bin(buf[2] & 0x7F); alarm->time.tm_mday = bcd2bin(buf[3] & 0x7F); - alarm->time.tm_mon = -1; - alarm->time.tm_year = -1; - alarm->time.tm_wday = -1; - alarm->time.tm_yday = -1; - alarm->time.tm_isdst = -1; - alarm->enabled = !!(control & DS3232_REG_CR_A1IE); alarm->pending = !!(stat & DS3232_REG_SR_A1F); diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index 96d38609d803..0130afd7fe88 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -259,6 +259,12 @@ static const struct rtc_class_ops efi_rtc_ops = { static int __init efi_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; + efi_time_t eft; + efi_time_cap_t cap; + + /* First check if the RTC is usable */ + if (efi.get_time(&eft, &cap) != EFI_SUCCESS) + return -ENODEV; rtc = devm_rtc_device_register(&dev->dev, "rtc-efi", &efi_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c index d726c6aa96a8..1bf5d2347928 100644 --- a/drivers/rtc/rtc-generic.c +++ b/drivers/rtc/rtc-generic.c @@ -9,44 +9,10 @@ #include <linux/platform_device.h> #include <linux/rtc.h> -#if defined(CONFIG_M68K) || defined(CONFIG_PARISC) || \ - defined(CONFIG_PPC) || defined(CONFIG_SUPERH32) -#include <asm/rtc.h> - -static int generic_get_time(struct device *dev, struct rtc_time *tm) -{ - unsigned int ret = get_rtc_time(tm); - - if (ret & RTC_BATT_BAD) - return -EOPNOTSUPP; - - return rtc_valid_tm(tm); -} - -static int generic_set_time(struct device *dev, struct rtc_time *tm) -{ - if (set_rtc_time(tm) < 0) - return -EOPNOTSUPP; - - return 0; -} - -static const struct rtc_class_ops generic_rtc_ops = { - .read_time = generic_get_time, - .set_time = generic_set_time, -}; -#else -#define generic_rtc_ops *(struct rtc_class_ops*)NULL -#endif - static int __init generic_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; - const struct rtc_class_ops *ops; - - ops = dev_get_platdata(&dev->dev); - if (!ops) - ops = &generic_rtc_ops; + const struct rtc_class_ops *ops = dev_get_platdata(&dev->dev); rtc = devm_rtc_device_register(&dev->dev, "rtc-generic", ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 207270376b55..e5ad527cb75e 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -198,7 +198,7 @@ static int hym8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) return ret; /* The alarm only has a minute accuracy */ - alm_tm->tm_sec = -1; + alm_tm->tm_sec = 0; alm_tm->tm_min = (buf[0] & HYM8563_ALM_BIT_DISABLE) ? -1 : @@ -213,9 +213,6 @@ static int hym8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) -1 : bcd2bin(buf[3] & HYM8563_WEEKDAY_MASK); - alm_tm->tm_mon = -1; - alm_tm->tm_year = -1; - ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2); if (ret < 0) return ret; diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c index 54328d4ac0d3..0e7f0f52bfe4 100644 --- a/drivers/rtc/rtc-isl12057.c +++ b/drivers/rtc/rtc-isl12057.c @@ -245,8 +245,7 @@ static int isl12057_rtc_update_alarm(struct device *dev, int enable) static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct isl12057_rtc_data *data = dev_get_drvdata(dev); - struct rtc_time rtc_tm, *alarm_tm = &alarm->time; - unsigned long rtc_secs, alarm_secs; + struct rtc_time *alarm_tm = &alarm->time; u8 regs[ISL12057_A1_SEC_LEN]; unsigned int ir; int ret; @@ -264,36 +263,6 @@ static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm_tm->tm_min = bcd2bin(regs[1] & 0x7f); alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f); alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f); - alarm_tm->tm_wday = -1; - - /* - * The alarm section does not store year/month. We use the ones in rtc - * section as a basis and increment month and then year if needed to get - * alarm after current time. - */ - ret = _isl12057_rtc_read_time(dev, &rtc_tm); - if (ret) - goto err_unlock; - - alarm_tm->tm_year = rtc_tm.tm_year; - alarm_tm->tm_mon = rtc_tm.tm_mon; - - ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); - if (ret) - goto err_unlock; - - ret = rtc_tm_to_time(alarm_tm, &alarm_secs); - if (ret) - goto err_unlock; - - if (alarm_secs < rtc_secs) { - if (alarm_tm->tm_mon == 11) { - alarm_tm->tm_mon = 0; - alarm_tm->tm_year += 1; - } else { - alarm_tm->tm_mon += 1; - } - } ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir); if (ret) { diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index d1bf93a87200..58698d21c2c3 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -244,7 +244,7 @@ static int m41t80_alarm_irq_enable(struct device *dev, unsigned int enabled) retval = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON, flags); if (retval < 0) { - dev_info(dev, "Unable to enable alarm IRQ %d\n", retval); + dev_err(dev, "Unable to enable alarm IRQ %d\n", retval); return retval; } return 0; @@ -320,10 +320,8 @@ static int m41t80_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->time.tm_sec = bcd2bin(alarmvals[4] & 0x7f); alrm->time.tm_min = bcd2bin(alarmvals[3] & 0x7f); alrm->time.tm_hour = bcd2bin(alarmvals[2] & 0x3f); - alrm->time.tm_wday = -1; alrm->time.tm_mday = bcd2bin(alarmvals[1] & 0x3f); alrm->time.tm_mon = bcd2bin(alarmvals[0] & 0x3f); - alrm->time.tm_year = -1; alrm->enabled = !!(alarmvals[0] & M41T80_ALMON_AFE); alrm->pending = (flags & M41T80_FLAGS_AF) && alrm->enabled; @@ -337,6 +335,30 @@ static struct rtc_class_ops m41t80_rtc_ops = { .proc = m41t80_rtc_proc, }; +#ifdef CONFIG_PM_SLEEP +static int m41t80_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq >= 0 && device_may_wakeup(dev)) + enable_irq_wake(client->irq); + + return 0; +} + +static int m41t80_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq >= 0 && device_may_wakeup(dev)) + disable_irq_wake(client->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(m41t80_pm, m41t80_suspend, m41t80_resume); + static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -831,10 +853,9 @@ static int m41t80_probe(struct i2c_client *client, return rc; } - rc = devm_add_action(&client->dev, m41t80_remove_sysfs_group, - &client->dev); + rc = devm_add_action_or_reset(&client->dev, m41t80_remove_sysfs_group, + &client->dev); if (rc) { - m41t80_remove_sysfs_group(&client->dev); dev_err(&client->dev, "Failed to add sysfs cleanup action: %d\n", rc); return rc; @@ -873,6 +894,7 @@ static int m41t80_remove(struct i2c_client *client) static struct i2c_driver m41t80_driver = { .driver = { .name = "rtc-m41t80", + .pm = &m41t80_pm, }, .probe = m41t80_probe, .remove = m41t80_remove, diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index f72b91f2501f..0eeb5714c00f 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/rtc.h> #include <linux/platform_device.h> -#include <linux/m48t86.h> +#include <linux/platform_data/rtc-m48t86.h> #include <linux/bcd.h> #define M48T86_REG_SEC 0x00 diff --git a/drivers/rtc/rtc-max6916.c b/drivers/rtc/rtc-max6916.c new file mode 100644 index 000000000000..623ab27b2757 --- /dev/null +++ b/drivers/rtc/rtc-max6916.c @@ -0,0 +1,164 @@ +/* rtc-max6916.c + * + * Driver for MAXIM max6916 Low Current, SPI Compatible + * Real Time Clock + * + * Author : Venkat Prashanth B U <venkat.prashanth2498@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> +#include <linux/spi/spi.h> +#include <linux/bcd.h> + +/* Registers in max6916 rtc */ + +#define MAX6916_SECONDS_REG 0x01 +#define MAX6916_MINUTES_REG 0x02 +#define MAX6916_HOURS_REG 0x03 +#define MAX6916_DATE_REG 0x04 +#define MAX6916_MONTH_REG 0x05 +#define MAX6916_DAY_REG 0x06 +#define MAX6916_YEAR_REG 0x07 +#define MAX6916_CONTROL_REG 0x08 +#define MAX6916_STATUS_REG 0x0C +#define MAX6916_CLOCK_BURST 0x3F + +static int max6916_read_reg(struct device *dev, unsigned char address, + unsigned char *data) +{ + struct spi_device *spi = to_spi_device(dev); + + *data = address | 0x80; + + return spi_write_then_read(spi, data, 1, data, 1); +} + +static int max6916_write_reg(struct device *dev, unsigned char address, + unsigned char data) +{ + struct spi_device *spi = to_spi_device(dev); + unsigned char buf[2]; + + buf[0] = address & 0x7F; + buf[1] = data; + + return spi_write_then_read(spi, buf, 2, NULL, 0); +} + +static int max6916_read_time(struct device *dev, struct rtc_time *dt) +{ + struct spi_device *spi = to_spi_device(dev); + int err; + unsigned char buf[8]; + + buf[0] = MAX6916_CLOCK_BURST | 0x80; + + err = spi_write_then_read(spi, buf, 1, buf, 8); + + if (err) + return err; + + dt->tm_sec = bcd2bin(buf[0]); + dt->tm_min = bcd2bin(buf[1]); + dt->tm_hour = bcd2bin(buf[2] & 0x3F); + dt->tm_mday = bcd2bin(buf[3]); + dt->tm_mon = bcd2bin(buf[4]) - 1; + dt->tm_wday = bcd2bin(buf[5]) - 1; + dt->tm_year = bcd2bin(buf[6]) + 100; + + return rtc_valid_tm(dt); +} + +static int max6916_set_time(struct device *dev, struct rtc_time *dt) +{ + struct spi_device *spi = to_spi_device(dev); + unsigned char buf[9]; + + if (dt->tm_year < 100 || dt->tm_year > 199) { + dev_err(&spi->dev, "Year must be between 2000 and 2099. It's %d.\n", + dt->tm_year + 1900); + return -EINVAL; + } + + buf[0] = MAX6916_CLOCK_BURST & 0x7F; + buf[1] = bin2bcd(dt->tm_sec); + buf[2] = bin2bcd(dt->tm_min); + buf[3] = (bin2bcd(dt->tm_hour) & 0X3F); + buf[4] = bin2bcd(dt->tm_mday); + buf[5] = bin2bcd(dt->tm_mon + 1); + buf[6] = bin2bcd(dt->tm_wday + 1); + buf[7] = bin2bcd(dt->tm_year % 100); + buf[8] = bin2bcd(0x00); + + /* write the rtc settings */ + return spi_write_then_read(spi, buf, 9, NULL, 0); +} + +static const struct rtc_class_ops max6916_rtc_ops = { + .read_time = max6916_read_time, + .set_time = max6916_set_time, +}; + +static int max6916_probe(struct spi_device *spi) +{ + struct rtc_device *rtc; + unsigned char data; + int res; + + /* spi setup with max6916 in mode 3 and bits per word as 8 */ + spi->mode = SPI_MODE_3; + spi->bits_per_word = 8; + spi_setup(spi); + + /* RTC Settings */ + res = max6916_read_reg(&spi->dev, MAX6916_SECONDS_REG, &data); + if (res) + return res; + + /* Disable the write protect of rtc */ + max6916_read_reg(&spi->dev, MAX6916_CONTROL_REG, &data); + data = data & ~(1 << 7); + max6916_write_reg(&spi->dev, MAX6916_CONTROL_REG, data); + + /*Enable oscillator,disable oscillator stop flag, glitch filter*/ + max6916_read_reg(&spi->dev, MAX6916_STATUS_REG, &data); + data = data & 0x1B; + max6916_write_reg(&spi->dev, MAX6916_STATUS_REG, data); + + /* display the settings */ + max6916_read_reg(&spi->dev, MAX6916_CONTROL_REG, &data); + dev_info(&spi->dev, "MAX6916 RTC CTRL Reg = 0x%02x\n", data); + + max6916_read_reg(&spi->dev, MAX6916_STATUS_REG, &data); + dev_info(&spi->dev, "MAX6916 RTC Status Reg = 0x%02x\n", data); + + rtc = devm_rtc_device_register(&spi->dev, "max6916", + &max6916_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + spi_set_drvdata(spi, rtc); + + return 0; +} + +static struct spi_driver max6916_driver = { + .driver = { + .name = "max6916", + }, + .probe = max6916_probe, +}; +module_spi_driver(max6916_driver); + +MODULE_DESCRIPTION("MAX6916 SPI RTC DRIVER"); +MODULE_AUTHOR("Venkat Prashanth B U <venkat.prashanth2498@gmail.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c new file mode 100644 index 000000000000..2f1772a358ca --- /dev/null +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -0,0 +1,198 @@ +#include <linux/bcd.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/mc146818rtc.h> + +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#endif + +/* + * Returns true if a clock update is in progress + */ +static inline unsigned char mc146818_is_updating(void) +{ + unsigned char uip; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); + spin_unlock_irqrestore(&rtc_lock, flags); + return uip; +} + +unsigned int mc146818_get_time(struct rtc_time *time) +{ + unsigned char ctrl; + unsigned long flags; + unsigned char century = 0; + +#ifdef CONFIG_MACH_DECSTATION + unsigned int real_year; +#endif + + /* + * read RTC once any update in progress is done. The update + * can take just over 2ms. We wait 20ms. There is no need to + * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP. + * If you need to know *exactly* when a second has started, enable + * periodic update complete interrupts, (via ioctl) and then + * immediately read /dev/rtc which will block until you get the IRQ. + * Once the read clears, read the RTC time (again via ioctl). Easy. + */ + if (mc146818_is_updating()) + mdelay(20); + + /* + * Only the values that we read from the RTC are set. We leave + * tm_wday, tm_yday and tm_isdst untouched. Even though the + * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated + * by the RTC when initially set to a non-zero value. + */ + spin_lock_irqsave(&rtc_lock, flags); + time->tm_sec = CMOS_READ(RTC_SECONDS); + time->tm_min = CMOS_READ(RTC_MINUTES); + time->tm_hour = CMOS_READ(RTC_HOURS); + time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); + time->tm_mon = CMOS_READ(RTC_MONTH); + time->tm_year = CMOS_READ(RTC_YEAR); +#ifdef CONFIG_MACH_DECSTATION + real_year = CMOS_READ(RTC_DEC_YEAR); +#endif +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + century = CMOS_READ(acpi_gbl_FADT.century); +#endif + ctrl = CMOS_READ(RTC_CONTROL); + spin_unlock_irqrestore(&rtc_lock, flags); + + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + time->tm_sec = bcd2bin(time->tm_sec); + time->tm_min = bcd2bin(time->tm_min); + time->tm_hour = bcd2bin(time->tm_hour); + time->tm_mday = bcd2bin(time->tm_mday); + time->tm_mon = bcd2bin(time->tm_mon); + time->tm_year = bcd2bin(time->tm_year); + century = bcd2bin(century); + } + +#ifdef CONFIG_MACH_DECSTATION + time->tm_year += real_year - 72; +#endif + + if (century) + time->tm_year += (century - 19) * 100; + + /* + * Account for differences between how the RTC uses the values + * and how they are defined in a struct rtc_time; + */ + if (time->tm_year <= 69) + time->tm_year += 100; + + time->tm_mon--; + + return RTC_24H; +} +EXPORT_SYMBOL_GPL(mc146818_get_time); + +/* Set the current date and time in the real time clock. */ +int mc146818_set_time(struct rtc_time *time) +{ + unsigned long flags; + unsigned char mon, day, hrs, min, sec; + unsigned char save_control, save_freq_select; + unsigned int yrs; +#ifdef CONFIG_MACH_DECSTATION + unsigned int real_yrs, leap_yr; +#endif + unsigned char century = 0; + + yrs = time->tm_year; + mon = time->tm_mon + 1; /* tm_mon starts at zero */ + day = time->tm_mday; + hrs = time->tm_hour; + min = time->tm_min; + sec = time->tm_sec; + + if (yrs > 255) /* They are unsigned */ + return -EINVAL; + + spin_lock_irqsave(&rtc_lock, flags); +#ifdef CONFIG_MACH_DECSTATION + real_yrs = yrs; + leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) || + !((yrs + 1900) % 400)); + yrs = 72; + + /* + * We want to keep the year set to 73 until March + * for non-leap years, so that Feb, 29th is handled + * correctly. + */ + if (!leap_yr && mon < 3) { + real_yrs--; + yrs = 73; + } +#endif + +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) { + century = (yrs + 1900) / 100; + yrs %= 100; + } +#endif + + /* These limits and adjustments are independent of + * whether the chip is in binary mode or not. + */ + if (yrs > 169) { + spin_unlock_irqrestore(&rtc_lock, flags); + return -EINVAL; + } + + if (yrs >= 100) + yrs -= 100; + + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) + || RTC_ALWAYS_BCD) { + sec = bin2bcd(sec); + min = bin2bcd(min); + hrs = bin2bcd(hrs); + day = bin2bcd(day); + mon = bin2bcd(mon); + yrs = bin2bcd(yrs); + century = bin2bcd(century); + } + + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + +#ifdef CONFIG_MACH_DECSTATION + CMOS_WRITE(real_yrs, RTC_DEC_YEAR); +#endif + CMOS_WRITE(yrs, RTC_YEAR); + CMOS_WRITE(mon, RTC_MONTH); + CMOS_WRITE(day, RTC_DAY_OF_MONTH); + CMOS_WRITE(hrs, RTC_HOURS); + CMOS_WRITE(min, RTC_MINUTES); + CMOS_WRITE(sec, RTC_SECONDS); +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + CMOS_WRITE(century, acpi_gbl_FADT.century); +#endif + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(mc146818_set_time); diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index 0094d9bdd1e6..7334c44fa7c3 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -32,11 +32,11 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/kernel.h> +#include <linux/mc146818rtc.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sfi.h> -#include <asm-generic/rtc.h> #include <asm/intel_scu_ipc.h> #include <asm/intel-mid.h> #include <asm/intel_mid_vrtc.h> @@ -149,14 +149,6 @@ static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t) if (mrst->irq <= 0) return -EIO; - /* Basic alarms only support hour, minute, and seconds fields. - * Some also support day and month, for alarms up to a year in - * the future. - */ - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - /* vRTC only supports binary mode */ spin_lock_irq(&rtc_lock); t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM); diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index f22e060709e5..b4478cc92b55 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -96,7 +96,7 @@ #define CD_TMR_TE BIT(3) /* Countdown timer enable */ /* PCF2123_REG_OFFSET BITS */ -#define OFFSET_SIGN_BIT BIT(6) /* 2's complement sign bit */ +#define OFFSET_SIGN_BIT 6 /* 2's complement sign bit */ #define OFFSET_COARSE BIT(7) /* Coarse mode offset */ #define OFFSET_STEP (2170) /* Offset step in parts per billion */ @@ -217,7 +217,7 @@ static int pcf2123_read_offset(struct device *dev, long *offset) if (reg & OFFSET_COARSE) reg <<= 1; /* multiply by 2 and sign extend */ else - reg |= (reg & OFFSET_SIGN_BIT) << 1; /* sign extend only */ + reg = sign_extend32(reg, OFFSET_SIGN_BIT); *offset = ((long)reg) * OFFSET_STEP; diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index e8ddbb359d11..efb0a08ac117 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -16,6 +16,16 @@ #include <linux/rtc.h> #include <linux/module.h> +/* + * Information for this driver was pulled from the following datasheets. + * + * http://www.nxp.com/documents/data_sheet/PCF85063A.pdf + * http://www.nxp.com/documents/data_sheet/PCF85063TP.pdf + * + * PCF85063A -- Rev. 6 — 18 November 2015 + * PCF85063TP -- Rev. 4 — 6 May 2015 +*/ + #define PCF85063_REG_CTRL1 0x00 /* status */ #define PCF85063_REG_CTRL1_STOP BIT(5) #define PCF85063_REG_CTRL2 0x01 @@ -55,10 +65,22 @@ static int pcf85063_stop_clock(struct i2c_client *client, u8 *ctrl1) return 0; } -/* - * In the routines that deal directly with the pcf85063 hardware, we use - * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch. - */ +static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1) +{ + s32 ret; + + /* start the clock */ + ctrl1 &= PCF85063_REG_CTRL1_STOP; + + ret = i2c_smbus_write_byte_data(client, PCF85063_REG_CTRL1, ctrl1); + if (ret < 0) { + dev_err(&client->dev, "Failing to start the clock\n"); + return -EIO; + } + + return 0; +} + static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm) { int rc; @@ -90,8 +112,7 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm) tm->tm_wday = regs[4] & 0x07; tm->tm_mon = bcd2bin(regs[5] & 0x1F) - 1; /* rtc mn 1-12 */ tm->tm_year = bcd2bin(regs[6]); - if (tm->tm_year < 70) - tm->tm_year += 100; /* assume we are in 1970...2069 */ + tm->tm_year += 100; return rtc_valid_tm(tm); } @@ -99,13 +120,17 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm) static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm) { int rc; - u8 regs[8]; + u8 regs[7]; + u8 ctrl1; + + if ((tm->tm_year < 100) || (tm->tm_year > 199)) + return -EINVAL; /* * to accurately set the time, reset the divider chain and keep it in * reset state until all time/date registers are written */ - rc = pcf85063_stop_clock(client, ®s[7]); + rc = pcf85063_stop_clock(client, &ctrl1); if (rc != 0) return rc; @@ -125,14 +150,7 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm) regs[5] = bin2bcd(tm->tm_mon + 1); /* year and century */ - regs[6] = bin2bcd(tm->tm_year % 100); - - /* - * after all time/date registers are written, let the 'address auto - * increment' feature wrap around and write register CTRL1 to re-enable - * the clock divider chain again - */ - regs[7] &= ~PCF85063_REG_CTRL1_STOP; + regs[6] = bin2bcd(tm->tm_year - 100); /* write all registers at once */ rc = i2c_smbus_write_i2c_block_data(client, PCF85063_REG_SC, @@ -142,6 +160,15 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm) return rc; } + /* + * Write the control register as a separate action since the size of + * the register space is different between the PCF85063TP and + * PCF85063A devices. The rollover point can not be used. + */ + rc = pcf85063_start_clock(client, ctrl1); + if (rc != 0) + return rc; + return 0; } diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index b9ddbb001283..1227ceab61ee 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -341,14 +341,11 @@ static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm) "%s: raw data is min=%02x, hr=%02x, mday=%02x, wday=%02x\n", __func__, buf[0], buf[1], buf[2], buf[3]); + tm->time.tm_sec = 0; tm->time.tm_min = bcd2bin(buf[0] & 0x7F); tm->time.tm_hour = bcd2bin(buf[1] & 0x3F); tm->time.tm_mday = bcd2bin(buf[2] & 0x3F); tm->time.tm_wday = bcd2bin(buf[3] & 0x7); - tm->time.tm_mon = -1; - tm->time.tm_year = -1; - tm->time.tm_yday = -1; - tm->time.tm_isdst = -1; err = pcf8563_get_alarm_mode(client, &tm->enabled, &tm->pending); if (err < 0) diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index f28d57788951..68ce77414bdc 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -128,6 +128,7 @@ static int rc5t583_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) return ret; } + alm->time.tm_sec = 0; alm->time.tm_min = bcd2bin(alarm_data[0]); alm->time.tm_hour = bcd2bin(alarm_data[1]); alm->time.tm_mday = bcd2bin(alarm_data[2]); diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index ef86229428fc..c8c757466783 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -341,12 +341,6 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_sec = 0; t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f); t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]); - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - t->time.tm_wday = -1; - t->time.tm_yday = -1; - t->time.tm_isdst = -1; /* ... and status */ t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE); diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index f623038e586e..9a2f6a95d5a7 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -13,12 +13,15 @@ #include <linux/bcd.h> #include <linux/bitops.h> +#include <linux/log2.h> #include <linux/i2c.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtc.h> +#define RV8803_I2C_TRY_COUNT 4 + #define RV8803_SEC 0x00 #define RV8803_MIN 0x01 #define RV8803_HOUR 0x02 @@ -56,19 +59,85 @@ struct rv8803_data { u8 ctrl; }; +static int rv8803_read_reg(const struct i2c_client *client, u8 reg) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + /* + * There is a 61µs window during which the RTC does not acknowledge I2C + * transfers. In that case, ensure that there are multiple attempts. + */ + do + ret = i2c_smbus_read_byte_data(client, reg); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret < 0) + dev_err(&client->dev, "Unable to read register 0x%02x\n", reg); + + return ret; +} + +static int rv8803_read_regs(const struct i2c_client *client, + u8 reg, u8 count, u8 *values) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + do + ret = i2c_smbus_read_i2c_block_data(client, reg, count, values); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret != count) { + dev_err(&client->dev, + "Unable to read registers 0x%02x..0x%02x\n", + reg, reg + count - 1); + return ret < 0 ? ret : -EIO; + } + + return 0; +} + +static int rv8803_write_reg(const struct i2c_client *client, u8 reg, u8 value) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + do + ret = i2c_smbus_write_byte_data(client, reg, value); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret) + dev_err(&client->dev, "Unable to write register 0x%02x\n", reg); + + return ret; +} + +static int rv8803_write_regs(const struct i2c_client *client, + u8 reg, u8 count, const u8 *values) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + do + ret = i2c_smbus_write_i2c_block_data(client, reg, count, + values); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret) + dev_err(&client->dev, + "Unable to write registers 0x%02x..0x%02x\n", + reg, reg + count - 1); + + return ret; +} + static irqreturn_t rv8803_handle_irq(int irq, void *dev_id) { struct i2c_client *client = dev_id; struct rv8803_data *rv8803 = i2c_get_clientdata(client); unsigned long events = 0; - int flags, try = 0; + int flags; mutex_lock(&rv8803->flags_lock); - do { - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); - try++; - } while ((flags == -ENXIO) && (try < 3)); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags <= 0) { mutex_unlock(&rv8803->flags_lock); return IRQ_NONE; @@ -100,9 +169,8 @@ static irqreturn_t rv8803_handle_irq(int irq, void *dev_id) if (events) { rtc_update_irq(rv8803->rtc, 1, events); - i2c_smbus_write_byte_data(client, RV8803_FLAG, flags); - i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL, - rv8803->ctrl); + rv8803_write_reg(client, RV8803_FLAG, flags); + rv8803_write_reg(rv8803->client, RV8803_CTRL, rv8803->ctrl); } mutex_unlock(&rv8803->flags_lock); @@ -118,7 +186,7 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm) u8 *date = date1; int ret, flags; - flags = i2c_smbus_read_byte_data(rv8803->client, RV8803_FLAG); + flags = rv8803_read_reg(rv8803->client, RV8803_FLAG); if (flags < 0) return flags; @@ -127,16 +195,14 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm) return -EINVAL; } - ret = i2c_smbus_read_i2c_block_data(rv8803->client, RV8803_SEC, - 7, date); - if (ret != 7) - return ret < 0 ? ret : -EIO; + ret = rv8803_read_regs(rv8803->client, RV8803_SEC, 7, date); + if (ret) + return ret; if ((date1[RV8803_SEC] & 0x7f) == bin2bcd(59)) { - ret = i2c_smbus_read_i2c_block_data(rv8803->client, RV8803_SEC, - 7, date2); - if (ret != 7) - return ret < 0 ? ret : -EIO; + ret = rv8803_read_regs(rv8803->client, RV8803_SEC, 7, date2); + if (ret) + return ret; if ((date2[RV8803_SEC] & 0x7f) != bin2bcd(59)) date = date2; @@ -145,23 +211,33 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm) tm->tm_sec = bcd2bin(date[RV8803_SEC] & 0x7f); tm->tm_min = bcd2bin(date[RV8803_MIN] & 0x7f); tm->tm_hour = bcd2bin(date[RV8803_HOUR] & 0x3f); - tm->tm_wday = ffs(date[RV8803_WEEK] & 0x7f); + tm->tm_wday = ilog2(date[RV8803_WEEK] & 0x7f); tm->tm_mday = bcd2bin(date[RV8803_DAY] & 0x3f); tm->tm_mon = bcd2bin(date[RV8803_MONTH] & 0x1f) - 1; tm->tm_year = bcd2bin(date[RV8803_YEAR]) + 100; - return rtc_valid_tm(tm); + return 0; } static int rv8803_set_time(struct device *dev, struct rtc_time *tm) { struct rv8803_data *rv8803 = dev_get_drvdata(dev); u8 date[7]; - int flags, ret; + int ctrl, flags, ret; if ((tm->tm_year < 100) || (tm->tm_year > 199)) return -EINVAL; + ctrl = rv8803_read_reg(rv8803->client, RV8803_CTRL); + if (ctrl < 0) + return ctrl; + + /* Stop the clock */ + ret = rv8803_write_reg(rv8803->client, RV8803_CTRL, + ctrl | RV8803_CTRL_RESET); + if (ret) + return ret; + date[RV8803_SEC] = bin2bcd(tm->tm_sec); date[RV8803_MIN] = bin2bcd(tm->tm_min); date[RV8803_HOUR] = bin2bcd(tm->tm_hour); @@ -170,21 +246,26 @@ static int rv8803_set_time(struct device *dev, struct rtc_time *tm) date[RV8803_MONTH] = bin2bcd(tm->tm_mon + 1); date[RV8803_YEAR] = bin2bcd(tm->tm_year - 100); - ret = i2c_smbus_write_i2c_block_data(rv8803->client, RV8803_SEC, - 7, date); - if (ret < 0) + ret = rv8803_write_regs(rv8803->client, RV8803_SEC, 7, date); + if (ret) + return ret; + + /* Restart the clock */ + ret = rv8803_write_reg(rv8803->client, RV8803_CTRL, + ctrl & ~RV8803_CTRL_RESET); + if (ret) return ret; mutex_lock(&rv8803->flags_lock); - flags = i2c_smbus_read_byte_data(rv8803->client, RV8803_FLAG); + flags = rv8803_read_reg(rv8803->client, RV8803_FLAG); if (flags < 0) { mutex_unlock(&rv8803->flags_lock); return flags; } - ret = i2c_smbus_write_byte_data(rv8803->client, RV8803_FLAG, - flags & ~RV8803_FLAG_V2F); + ret = rv8803_write_reg(rv8803->client, RV8803_FLAG, + flags & ~(RV8803_FLAG_V1F | RV8803_FLAG_V2F)); mutex_unlock(&rv8803->flags_lock); @@ -198,22 +279,18 @@ static int rv8803_get_alarm(struct device *dev, struct rtc_wkalrm *alrm) u8 alarmvals[3]; int flags, ret; - ret = i2c_smbus_read_i2c_block_data(client, RV8803_ALARM_MIN, - 3, alarmvals); - if (ret != 3) - return ret < 0 ? ret : -EIO; + ret = rv8803_read_regs(client, RV8803_ALARM_MIN, 3, alarmvals); + if (ret) + return ret; - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) return flags; alrm->time.tm_sec = 0; alrm->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); alrm->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f); - alrm->time.tm_wday = -1; alrm->time.tm_mday = bcd2bin(alarmvals[2] & 0x3f); - alrm->time.tm_mon = -1; - alrm->time.tm_year = -1; alrm->enabled = !!(rv8803->ctrl & RV8803_CTRL_AIE); alrm->pending = (flags & RV8803_FLAG_AF) && alrm->enabled; @@ -239,10 +316,10 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) mutex_lock(&rv8803->flags_lock); - ret = i2c_smbus_read_i2c_block_data(client, RV8803_FLAG, 2, ctrl); - if (ret != 2) { + ret = rv8803_read_regs(client, RV8803_FLAG, 2, ctrl); + if (ret) { mutex_unlock(&rv8803->flags_lock); - return ret < 0 ? ret : -EIO; + return ret; } alarmvals[0] = bin2bcd(alrm->time.tm_min); @@ -251,8 +328,8 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (rv8803->ctrl & (RV8803_CTRL_AIE | RV8803_CTRL_UIE)) { rv8803->ctrl &= ~(RV8803_CTRL_AIE | RV8803_CTRL_UIE); - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL, - rv8803->ctrl); + err = rv8803_write_reg(rv8803->client, RV8803_CTRL, + rv8803->ctrl); if (err) { mutex_unlock(&rv8803->flags_lock); return err; @@ -260,13 +337,12 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) } ctrl[1] &= ~RV8803_FLAG_AF; - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_FLAG, ctrl[1]); + err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[1]); mutex_unlock(&rv8803->flags_lock); if (err) return err; - err = i2c_smbus_write_i2c_block_data(rv8803->client, RV8803_ALARM_MIN, - 3, alarmvals); + err = rv8803_write_regs(rv8803->client, RV8803_ALARM_MIN, 3, alarmvals); if (err) return err; @@ -276,8 +352,8 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (rv8803->rtc->aie_timer.enabled) rv8803->ctrl |= RV8803_CTRL_AIE; - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL, - rv8803->ctrl); + err = rv8803_write_reg(rv8803->client, RV8803_CTRL, + rv8803->ctrl); if (err) return err; } @@ -306,21 +382,20 @@ static int rv8803_alarm_irq_enable(struct device *dev, unsigned int enabled) } mutex_lock(&rv8803->flags_lock); - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) { mutex_unlock(&rv8803->flags_lock); return flags; } flags &= ~(RV8803_FLAG_AF | RV8803_FLAG_UF); - err = i2c_smbus_write_byte_data(client, RV8803_FLAG, flags); + err = rv8803_write_reg(client, RV8803_FLAG, flags); mutex_unlock(&rv8803->flags_lock); if (err) return err; if (ctrl != rv8803->ctrl) { rv8803->ctrl = ctrl; - err = i2c_smbus_write_byte_data(client, RV8803_CTRL, - rv8803->ctrl); + err = rv8803_write_reg(client, RV8803_CTRL, rv8803->ctrl); if (err) return err; } @@ -336,7 +411,7 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) switch (cmd) { case RTC_VL_READ: - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) return flags; @@ -355,16 +430,16 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) case RTC_VL_CLR: mutex_lock(&rv8803->flags_lock); - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) { mutex_unlock(&rv8803->flags_lock); return flags; } flags &= ~(RV8803_FLAG_V1F | RV8803_FLAG_V2F); - ret = i2c_smbus_write_byte_data(client, RV8803_FLAG, flags); + ret = rv8803_write_reg(client, RV8803_FLAG, flags); mutex_unlock(&rv8803->flags_lock); - if (ret < 0) + if (ret) return ret; return 0; @@ -382,8 +457,8 @@ static ssize_t rv8803_nvram_write(struct file *filp, struct kobject *kobj, struct i2c_client *client = to_i2c_client(dev); int ret; - ret = i2c_smbus_write_byte_data(client, RV8803_RAM, buf[0]); - if (ret < 0) + ret = rv8803_write_reg(client, RV8803_RAM, buf[0]); + if (ret) return ret; return 1; @@ -397,7 +472,7 @@ static ssize_t rv8803_nvram_read(struct file *filp, struct kobject *kobj, struct i2c_client *client = to_i2c_client(dev); int ret; - ret = i2c_smbus_read_byte_data(client, RV8803_RAM); + ret = rv8803_read_reg(client, RV8803_RAM); if (ret < 0) return ret; @@ -427,7 +502,7 @@ static int rv8803_probe(struct i2c_client *client, { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); struct rv8803_data *rv8803; - int err, flags, try = 0; + int err, flags; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_I2C_BLOCK)) { @@ -444,16 +519,7 @@ static int rv8803_probe(struct i2c_client *client, rv8803->client = client; i2c_set_clientdata(client, rv8803); - /* - * There is a 60µs window where the RTC may not reply on the i2c bus in - * that case, the transfer is not ACKed. In that case, ensure there are - * multiple attempts. - */ - do { - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); - try++; - } while ((flags == -ENXIO) && (try < 3)); - + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) return flags; @@ -488,12 +554,7 @@ static int rv8803_probe(struct i2c_client *client, return PTR_ERR(rv8803->rtc); } - try = 0; - do { - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT, - RV8803_EXT_WADA); - try++; - } while ((err == -ENXIO) && (try < 3)); + err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA); if (err) return err; diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 772d221ec2d9..7163b91bb773 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -272,15 +272,9 @@ static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); t->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f); - if (alarmvals[2] & RX8010_ALARM_AE) - t->time.tm_mday = -1; - else + if (!(alarmvals[2] & RX8010_ALARM_AE)) t->time.tm_mday = bcd2bin(alarmvals[2] & 0x7f); - t->time.tm_wday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - t->enabled = !!(rx8010->ctrlreg & RX8010_CTRL_AIE); t->pending = (flagreg & RX8010_FLAG_AF) && t->enabled; diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 9f105efbc546..2b85cc7a24e7 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -319,11 +319,6 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12 + (ald[1] & 0x20 ? 12 : 0); - t->time.tm_wday = -1; - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - dev_dbg(dev, "%s: date: %ds %dm %dh %dmd %dm %dy\n", __func__, t->time.tm_sec, t->time.tm_min, t->time.tm_hour, diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index f40afdd0e5f5..5dab4665ca3b 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -15,6 +15,7 @@ #include <linux/bitrev.h> #include <linux/bcd.h> #include <linux/slab.h> +#include <linux/delay.h> #define S35390A_CMD_STATUS1 0 #define S35390A_CMD_STATUS2 1 @@ -34,10 +35,14 @@ #define S35390A_ALRM_BYTE_HOURS 1 #define S35390A_ALRM_BYTE_MINS 2 +/* flags for STATUS1 */ #define S35390A_FLAG_POC 0x01 #define S35390A_FLAG_BLD 0x02 +#define S35390A_FLAG_INT2 0x04 #define S35390A_FLAG_24H 0x40 #define S35390A_FLAG_RESET 0x80 + +/* flag for STATUS2 */ #define S35390A_FLAG_TEST 0x01 #define S35390A_INT2_MODE_MASK 0xF0 @@ -94,19 +99,63 @@ static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len) return 0; } -static int s35390a_reset(struct s35390a *s35390a) +/* + * Returns <0 on error, 0 if rtc is setup fine and 1 if the chip was reset. + * To keep the information if an irq is pending, pass the value read from + * STATUS1 to the caller. + */ +static int s35390a_reset(struct s35390a *s35390a, char *status1) { - char buf[1]; - - if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)) < 0) - return -EIO; - - if (!(buf[0] & (S35390A_FLAG_POC | S35390A_FLAG_BLD))) + char buf; + int ret; + unsigned initcount = 0; + + ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, status1, 1); + if (ret < 0) + return ret; + + if (*status1 & S35390A_FLAG_POC) + /* + * Do not communicate for 0.5 seconds since the power-on + * detection circuit is in operation. + */ + msleep(500); + else if (!(*status1 & S35390A_FLAG_BLD)) + /* + * If both POC and BLD are unset everything is fine. + */ return 0; - buf[0] |= (S35390A_FLAG_RESET | S35390A_FLAG_24H); - buf[0] &= 0xf0; - return s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); + /* + * At least one of POC and BLD are set, so reinitialise chip. Keeping + * this information in the hardware to know later that the time isn't + * valid is unfortunately not possible because POC and BLD are cleared + * on read. So the reset is best done now. + * + * The 24H bit is kept over reset, so set it already here. + */ +initialize: + *status1 = S35390A_FLAG_24H; + buf = S35390A_FLAG_RESET | S35390A_FLAG_24H; + ret = s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1); + + if (ret < 0) + return ret; + + ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1); + if (ret < 0) + return ret; + + if (buf & (S35390A_FLAG_POC | S35390A_FLAG_BLD)) { + /* Try up to five times to reset the chip */ + if (initcount < 5) { + ++initcount; + goto initialize; + } else + return -EIO; + } + + return 1; } static int s35390a_disable_test_mode(struct s35390a *s35390a) @@ -217,12 +266,12 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) alm->time.tm_min, alm->time.tm_hour, alm->time.tm_mday, alm->time.tm_mon, alm->time.tm_year, alm->time.tm_wday); - /* disable interrupt */ + /* disable interrupt (which deasserts the irq line) */ err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts)); if (err < 0) return err; - /* clear pending interrupt, if any */ + /* clear pending interrupt (in STATUS1 only), if any */ err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &sts, sizeof(sts)); if (err < 0) return err; @@ -242,6 +291,8 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) if (alm->time.tm_wday != -1) buf[S35390A_ALRM_BYTE_WDAY] = bin2bcd(alm->time.tm_wday) | 0x80; + else + buf[S35390A_ALRM_BYTE_WDAY] = 0; buf[S35390A_ALRM_BYTE_HOURS] = s35390a_hr2reg(s35390a, alm->time.tm_hour) | 0x80; @@ -269,23 +320,43 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) if (err < 0) return err; - if (bitrev8(sts) != S35390A_INT2_MODE_ALARM) - return -EINVAL; + if ((bitrev8(sts) & S35390A_INT2_MODE_MASK) != S35390A_INT2_MODE_ALARM) { + /* + * When the alarm isn't enabled, the register to configure + * the alarm time isn't accessible. + */ + alm->enabled = 0; + return 0; + } else { + alm->enabled = 1; + } err = s35390a_get_reg(s35390a, S35390A_CMD_INT2_REG1, buf, sizeof(buf)); if (err < 0) return err; /* This chip returns the bits of each byte in reverse order */ - for (i = 0; i < 3; ++i) { + for (i = 0; i < 3; ++i) buf[i] = bitrev8(buf[i]); - buf[i] &= ~0x80; - } - alm->time.tm_wday = bcd2bin(buf[S35390A_ALRM_BYTE_WDAY]); - alm->time.tm_hour = s35390a_reg2hr(s35390a, - buf[S35390A_ALRM_BYTE_HOURS]); - alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS]); + /* + * B0 of the three matching registers is an enable flag. Iff it is set + * the configured value is used for matching. + */ + if (buf[S35390A_ALRM_BYTE_WDAY] & 0x80) + alm->time.tm_wday = + bcd2bin(buf[S35390A_ALRM_BYTE_WDAY] & ~0x80); + + if (buf[S35390A_ALRM_BYTE_HOURS] & 0x80) + alm->time.tm_hour = + s35390a_reg2hr(s35390a, + buf[S35390A_ALRM_BYTE_HOURS] & ~0x80); + + if (buf[S35390A_ALRM_BYTE_MINS] & 0x80) + alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS] & ~0x80); + + /* alarm triggers always at s=0 */ + alm->time.tm_sec = 0; dev_dbg(&client->dev, "%s: alm is mins=%d, hours=%d, wday=%d\n", __func__, alm->time.tm_min, alm->time.tm_hour, @@ -327,11 +398,11 @@ static struct i2c_driver s35390a_driver; static int s35390a_probe(struct i2c_client *client, const struct i2c_device_id *id) { - int err; + int err, err_reset; unsigned int i; struct s35390a *s35390a; struct rtc_time tm; - char buf[1]; + char buf, status1; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { err = -ENODEV; @@ -360,29 +431,35 @@ static int s35390a_probe(struct i2c_client *client, } } - err = s35390a_reset(s35390a); - if (err < 0) { + err_reset = s35390a_reset(s35390a, &status1); + if (err_reset < 0) { + err = err_reset; dev_err(&client->dev, "error resetting chip\n"); goto exit_dummy; } - err = s35390a_disable_test_mode(s35390a); - if (err < 0) { - dev_err(&client->dev, "error disabling test mode\n"); - goto exit_dummy; - } - - err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); - if (err < 0) { - dev_err(&client->dev, "error checking 12/24 hour mode\n"); - goto exit_dummy; - } - if (buf[0] & S35390A_FLAG_24H) + if (status1 & S35390A_FLAG_24H) s35390a->twentyfourhour = 1; else s35390a->twentyfourhour = 0; - if (s35390a_get_datetime(client, &tm) < 0) + if (status1 & S35390A_FLAG_INT2) { + /* disable alarm (and maybe test mode) */ + buf = 0; + err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &buf, 1); + if (err < 0) { + dev_err(&client->dev, "error disabling alarm"); + goto exit_dummy; + } + } else { + err = s35390a_disable_test_mode(s35390a); + if (err < 0) { + dev_err(&client->dev, "error disabling test mode\n"); + goto exit_dummy; + } + } + + if (err_reset > 0 || s35390a_get_datetime(client, &tm) < 0) dev_warn(&client->dev, "clock needs to be set\n"); device_set_wakeup_capable(&client->dev, 1); @@ -395,6 +472,10 @@ static int s35390a_probe(struct i2c_client *client, err = PTR_ERR(s35390a->rtc); goto exit_dummy; } + + if (status1 & S35390A_FLAG_INT2) + rtc_update_irq(s35390a->rtc, 1, RTC_AF); + return 0; exit_dummy: diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index d01ad7e8078e..d44fb34df8fe 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -149,12 +149,14 @@ static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq) if (!is_power_of_2(freq)) return -EINVAL; + s3c_rtc_enable_clk(info); spin_lock_irq(&info->pie_lock); if (info->data->set_freq) info->data->set_freq(info, freq); spin_unlock_irq(&info->pie_lock); + s3c_rtc_disable_clk(info); return 0; } @@ -264,35 +266,23 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) /* decode the alarm enable field */ if (alm_en & S3C2410_RTCALM_SECEN) alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec); - else - alm_tm->tm_sec = -1; if (alm_en & S3C2410_RTCALM_MINEN) alm_tm->tm_min = bcd2bin(alm_tm->tm_min); - else - alm_tm->tm_min = -1; if (alm_en & S3C2410_RTCALM_HOUREN) alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour); - else - alm_tm->tm_hour = -1; if (alm_en & S3C2410_RTCALM_DAYEN) alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday); - else - alm_tm->tm_mday = -1; if (alm_en & S3C2410_RTCALM_MONEN) { alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon); alm_tm->tm_mon -= 1; - } else { - alm_tm->tm_mon = -1; } if (alm_en & S3C2410_RTCALM_YEAREN) alm_tm->tm_year = bcd2bin(alm_tm->tm_year); - else - alm_tm->tm_year = -1; return 0; } @@ -577,8 +567,6 @@ static int s3c_rtc_probe(struct platform_device *pdev) s3c_rtc_setfreq(info, 1); - s3c_rtc_disable_clk(info); - return 0; err_nortc: diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index a45845a571e5..17b6235d67a5 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -481,7 +481,6 @@ static int sh_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) tm->tm_mon = sh_rtc_read_alarm_value(rtc, RMONAR); if (tm->tm_mon > 0) tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */ - tm->tm_year = 0xffff; wkalrm->enabled = (readb(rtc->regbase + RCR1) & RCR1_AIE) ? 1 : 0; @@ -500,52 +499,13 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc, writeb(bin2bcd(value) | AR_ENB, rtc->regbase + reg_off); } -static int sh_rtc_check_alarm(struct rtc_time *tm) -{ - /* - * The original rtc says anything > 0xc0 is "don't care" or "match - * all" - most users use 0xff but rtc-dev uses -1 for the same thing. - * The original rtc doesn't support years - some things use -1 and - * some 0xffff. We use -1 to make out tests easier. - */ - if (tm->tm_year == 0xffff) - tm->tm_year = -1; - if (tm->tm_mon >= 0xff) - tm->tm_mon = -1; - if (tm->tm_mday >= 0xff) - tm->tm_mday = -1; - if (tm->tm_wday >= 0xff) - tm->tm_wday = -1; - if (tm->tm_hour >= 0xff) - tm->tm_hour = -1; - if (tm->tm_min >= 0xff) - tm->tm_min = -1; - if (tm->tm_sec >= 0xff) - tm->tm_sec = -1; - - if (tm->tm_year > 9999 || - tm->tm_mon >= 12 || - tm->tm_mday == 0 || tm->tm_mday >= 32 || - tm->tm_wday >= 7 || - tm->tm_hour >= 24 || - tm->tm_min >= 60 || - tm->tm_sec >= 60) - return -EINVAL; - - return 0; -} - static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) { struct platform_device *pdev = to_platform_device(dev); struct sh_rtc *rtc = platform_get_drvdata(pdev); unsigned int rcr1; struct rtc_time *tm = &wkalrm->time; - int mon, err; - - err = sh_rtc_check_alarm(tm); - if (unlikely(err < 0)) - return err; + int mon; spin_lock_irq(&rtc->lock); diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 60232bd366ef..15ac597d54da 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -179,12 +179,6 @@ static int tegra_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) if (sec == 0) { /* alarm is disabled. */ alarm->enabled = 0; - alarm->time.tm_mon = -1; - alarm->time.tm_mday = -1; - alarm->time.tm_year = -1; - alarm->time.tm_hour = -1; - alarm->time.tm_min = -1; - alarm->time.tm_sec = -1; } else { /* alarm is enabled. */ alarm->enabled = 1; diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index 7a0436329d6c..1f3117b5a83c 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -25,7 +25,7 @@ #include <linux/rtc.h> #include <linux/types.h> #include <linux/bcd.h> -#include <linux/rtc-v3020.h> +#include <linux/platform_data/rtc-v3020.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/slab.h> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 1918f5483b23..7d1b4317eccc 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -838,6 +838,23 @@ config SCSI_IBMVSCSI To compile this driver as a module, choose M here: the module will be called ibmvscsi. +config SCSI_IBMVSCSIS + tristate "IBM Virtual SCSI Server support" + depends on PPC_PSERIES && TARGET_CORE && SCSI && PCI + help + This is the IBM POWER Virtual SCSI Target Server + This driver uses the SRP protocol for communication betwen servers + guest and/or the host that run on the same server. + More information on VSCSI protocol can be found at www.power.org + + The userspace configuration needed to initialize the driver can be + be found here: + + https://github.com/powervm/ibmvscsis/wiki/Configuration + + To compile this driver as a module, choose M here: the + module will be called ibmvscsis. + config SCSI_IBMVFC tristate "IBM Virtual FC support" depends on PPC_PSERIES && SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 862ab4efad61..d5397987e731 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -128,6 +128,7 @@ obj-$(CONFIG_SCSI_SNI_53C710) += 53c700.o sni_53c710.o obj-$(CONFIG_SCSI_NSP32) += nsp32.o obj-$(CONFIG_SCSI_IPR) += ipr.o obj-$(CONFIG_SCSI_IBMVSCSI) += ibmvscsi/ +obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsi_tgt/ obj-$(CONFIG_SCSI_IBMVFC) += ibmvscsi/ obj-$(CONFIG_SCSI_HPTIOP) += hptiop.o obj-$(CONFIG_SCSI_STEX) += stex.o diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 860008d42466..661bb94e2548 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -778,7 +778,7 @@ static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) { struct afu *afu = cfg->afu; struct device *dev = &cfg->dev->dev; - struct sisl_global_map __iomem *global = &afu->afu_map->global; + struct sisl_global_map __iomem *global; struct dev_dependent_vals *ddv; u64 reg, status; int i, retry_cnt = 0; @@ -787,6 +787,14 @@ static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) if (!(ddv->flags & CXLFLASH_NOTIFY_SHUTDOWN)) return; + if (!afu || !afu->afu_map) { + dev_dbg(dev, "%s: The problem state area is not mapped\n", + __func__); + return; + } + + global = &afu->afu_map->global; + /* Notify AFU */ for (i = 0; i < NUM_FC_PORTS; i++) { reg = readq_be(&global->fc_regs[i][FC_CONFIG2 / 8]); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index c8a4305c7662..9bd41a35a78a 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -92,6 +92,8 @@ static struct fcoe_interface static int fcoe_fip_recv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +static int fcoe_fip_vlan_recv(struct sk_buff *, struct net_device *, + struct packet_type *, struct net_device *); static void fcoe_fip_send(struct fcoe_ctlr *, struct sk_buff *); static void fcoe_update_src_mac(struct fc_lport *, u8 *); @@ -363,6 +365,12 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe, fcoe->fip_packet_type.dev = netdev; dev_add_pack(&fcoe->fip_packet_type); + if (netdev != real_dev) { + fcoe->fip_vlan_packet_type.func = fcoe_fip_vlan_recv; + fcoe->fip_vlan_packet_type.type = htons(ETH_P_FIP); + fcoe->fip_vlan_packet_type.dev = real_dev; + dev_add_pack(&fcoe->fip_vlan_packet_type); + } return 0; } @@ -450,6 +458,8 @@ static void fcoe_interface_remove(struct fcoe_interface *fcoe) */ __dev_remove_pack(&fcoe->fcoe_packet_type); __dev_remove_pack(&fcoe->fip_packet_type); + if (netdev != fcoe->realdev) + __dev_remove_pack(&fcoe->fip_vlan_packet_type); synchronize_net(); /* Delete secondary MAC addresses */ @@ -520,6 +530,29 @@ static int fcoe_fip_recv(struct sk_buff *skb, struct net_device *netdev, } /** + * fcoe_fip_vlan_recv() - Handler for received FIP VLAN discovery frames + * @skb: The receive skb + * @netdev: The associated net device + * @ptype: The packet_type structure which was used to register this handler + * @orig_dev: The original net_device the the skb was received on. + * (in case dev is a bond) + * + * Returns: 0 for success + */ +static int fcoe_fip_vlan_recv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *ptype, + struct net_device *orig_dev) +{ + struct fcoe_interface *fcoe; + struct fcoe_ctlr *ctlr; + + fcoe = container_of(ptype, struct fcoe_interface, fip_vlan_packet_type); + ctlr = fcoe_to_ctlr(fcoe); + fcoe_ctlr_recv(ctlr, skb); + return 0; +} + +/** * fcoe_port_send() - Send an Ethernet-encapsulated FIP/FCoE frame * @port: The FCoE port * @skb: The FIP/FCoE packet to be sent @@ -539,7 +572,21 @@ static void fcoe_port_send(struct fcoe_port *port, struct sk_buff *skb) */ static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb) { - skb->dev = fcoe_from_ctlr(fip)->netdev; + struct fcoe_interface *fcoe = fcoe_from_ctlr(fip); + struct fip_frame { + struct ethhdr eth; + struct fip_header fip; + } __packed *frame; + + /* + * Use default VLAN for FIP VLAN discovery protocol + */ + frame = (struct fip_frame *)skb->data; + if (frame->fip.fip_op == ntohs(FIP_OP_VLAN) && + fcoe->realdev != fcoe->netdev) + skb->dev = fcoe->realdev; + else + skb->dev = fcoe->netdev; fcoe_port_send(lport_priv(fip->lp), skb); } @@ -2448,7 +2495,7 @@ static int __init fcoe_init(void) if (rc) { printk(KERN_ERR "failed to register an fcoe transport, check " "if libfcoe is loaded\n"); - return rc; + goto out_destroy; } mutex_lock(&fcoe_config_mutex); @@ -2471,6 +2518,7 @@ static int __init fcoe_init(void) out_free: mutex_unlock(&fcoe_config_mutex); +out_destroy: destroy_workqueue(fcoe_wq); return rc; } diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h index 2b53672bf932..6aa4820f6cc0 100644 --- a/drivers/scsi/fcoe/fcoe.h +++ b/drivers/scsi/fcoe/fcoe.h @@ -80,6 +80,7 @@ struct fcoe_interface { struct net_device *realdev; struct packet_type fcoe_packet_type; struct packet_type fip_packet_type; + struct packet_type fip_vlan_packet_type; struct fc_exch_mgr *oem; u8 removed; u8 priority; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 8fae03215a85..5c70a52ad346 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -26,7 +26,7 @@ #include <linux/list.h> #include <linux/types.h> -#include "viosrp.h" +#include <scsi/viosrp.h> #define IBMVFC_NAME "ibmvfc" #define IBMVFC_DRIVER_VERSION "1.0.11" diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h index 1067367395cd..e0f6c3aeb4ee 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.h +++ b/drivers/scsi/ibmvscsi/ibmvscsi.h @@ -33,7 +33,7 @@ #include <linux/list.h> #include <linux/completion.h> #include <linux/interrupt.h> -#include "viosrp.h" +#include <scsi/viosrp.h> struct scsi_cmnd; struct Scsi_Host; diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h deleted file mode 100644 index c1ab8a4c3161..000000000000 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ /dev/null @@ -1,225 +0,0 @@ -/*****************************************************************************/ -/* srp.h -- SCSI RDMA Protocol definitions */ -/* */ -/* Written By: Colin Devilbis, IBM Corporation */ -/* */ -/* Copyright (C) 2003 IBM Corporation */ -/* */ -/* This program is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation; either version 2 of the License, or */ -/* (at your option) any later version. */ -/* */ -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU General Public License */ -/* along with this program; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* */ -/* */ -/* This file contains structures and definitions for IBM RPA (RS/6000 */ -/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */ -/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */ -/* commands between logical partitions. */ -/* */ -/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */ -/* between partitions. The definitions in this file are architected, */ -/* and cannot be changed without breaking compatibility with other versions */ -/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/ -/* between logical partitions */ -/*****************************************************************************/ -#ifndef VIOSRP_H -#define VIOSRP_H -#include <scsi/srp.h> - -#define SRP_VERSION "16.a" -#define SRP_MAX_IU_LEN 256 -#define SRP_MAX_LOC_LEN 32 - -union srp_iu { - struct srp_login_req login_req; - struct srp_login_rsp login_rsp; - struct srp_login_rej login_rej; - struct srp_i_logout i_logout; - struct srp_t_logout t_logout; - struct srp_tsk_mgmt tsk_mgmt; - struct srp_cmd cmd; - struct srp_rsp rsp; - u8 reserved[SRP_MAX_IU_LEN]; -}; - -enum viosrp_crq_headers { - VIOSRP_CRQ_FREE = 0x00, - VIOSRP_CRQ_CMD_RSP = 0x80, - VIOSRP_CRQ_INIT_RSP = 0xC0, - VIOSRP_CRQ_XPORT_EVENT = 0xFF -}; - -enum viosrp_crq_init_formats { - VIOSRP_CRQ_INIT = 0x01, - VIOSRP_CRQ_INIT_COMPLETE = 0x02 -}; - -enum viosrp_crq_formats { - VIOSRP_SRP_FORMAT = 0x01, - VIOSRP_MAD_FORMAT = 0x02, - VIOSRP_OS400_FORMAT = 0x03, - VIOSRP_AIX_FORMAT = 0x04, - VIOSRP_LINUX_FORMAT = 0x05, - VIOSRP_INLINE_FORMAT = 0x06 -}; - -enum viosrp_crq_status { - VIOSRP_OK = 0x0, - VIOSRP_NONRECOVERABLE_ERR = 0x1, - VIOSRP_VIOLATES_MAX_XFER = 0x2, - VIOSRP_PARTNER_PANIC = 0x3, - VIOSRP_DEVICE_BUSY = 0x8, - VIOSRP_ADAPTER_FAIL = 0x10, - VIOSRP_OK2 = 0x99, -}; - -struct viosrp_crq { - u8 valid; /* used by RPA */ - u8 format; /* SCSI vs out-of-band */ - u8 reserved; - u8 status; /* non-scsi failure? (e.g. DMA failure) */ - __be16 timeout; /* in seconds */ - __be16 IU_length; /* in bytes */ - __be64 IU_data_ptr; /* the TCE for transferring data */ -}; - -/* MADs are Management requests above and beyond the IUs defined in the SRP - * standard. - */ -enum viosrp_mad_types { - VIOSRP_EMPTY_IU_TYPE = 0x01, - VIOSRP_ERROR_LOG_TYPE = 0x02, - VIOSRP_ADAPTER_INFO_TYPE = 0x03, - VIOSRP_CAPABILITIES_TYPE = 0x05, - VIOSRP_ENABLE_FAST_FAIL = 0x08, -}; - -enum viosrp_mad_status { - VIOSRP_MAD_SUCCESS = 0x00, - VIOSRP_MAD_NOT_SUPPORTED = 0xF1, - VIOSRP_MAD_FAILED = 0xF7, -}; - -enum viosrp_capability_type { - MIGRATION_CAPABILITIES = 0x01, - RESERVATION_CAPABILITIES = 0x02, -}; - -enum viosrp_capability_support { - SERVER_DOES_NOT_SUPPORTS_CAP = 0x0, - SERVER_SUPPORTS_CAP = 0x01, - SERVER_CAP_DATA = 0x02, -}; - -enum viosrp_reserve_type { - CLIENT_RESERVE_SCSI_2 = 0x01, -}; - -enum viosrp_capability_flag { - CLIENT_MIGRATED = 0x01, - CLIENT_RECONNECT = 0x02, - CAP_LIST_SUPPORTED = 0x04, - CAP_LIST_DATA = 0x08, -}; - -/* - * Common MAD header - */ -struct mad_common { - __be32 type; - __be16 status; - __be16 length; - __be64 tag; -}; - -/* - * All SRP (and MAD) requests normally flow from the - * client to the server. There is no way for the server to send - * an asynchronous message back to the client. The Empty IU is used - * to hang out a meaningless request to the server so that it can respond - * asynchrouously with something like a SCSI AER - */ -struct viosrp_empty_iu { - struct mad_common common; - __be64 buffer; - __be32 port; -}; - -struct viosrp_error_log { - struct mad_common common; - __be64 buffer; -}; - -struct viosrp_adapter_info { - struct mad_common common; - __be64 buffer; -}; - -struct viosrp_fast_fail { - struct mad_common common; -}; - -struct viosrp_capabilities { - struct mad_common common; - __be64 buffer; -}; - -struct mad_capability_common { - __be32 cap_type; - __be16 length; - __be16 server_support; -}; - -struct mad_reserve_cap { - struct mad_capability_common common; - __be32 type; -}; - -struct mad_migration_cap { - struct mad_capability_common common; - __be32 ecl; -}; - -struct capabilities{ - __be32 flags; - char name[SRP_MAX_LOC_LEN]; - char loc[SRP_MAX_LOC_LEN]; - struct mad_migration_cap migration; - struct mad_reserve_cap reserve; -}; - -union mad_iu { - struct viosrp_empty_iu empty_iu; - struct viosrp_error_log error_log; - struct viosrp_adapter_info adapter_info; - struct viosrp_fast_fail fast_fail; - struct viosrp_capabilities capabilities; -}; - -union viosrp_iu { - union srp_iu srp; - union mad_iu mad; -}; - -struct mad_adapter_info_data { - char srp_version[8]; - char partition_name[96]; - __be32 partition_number; -#define SRP_MAD_VERSION_1 1 - __be32 mad_version; -#define SRP_MAD_OS_LINUX 2 -#define SRP_MAD_OS_AIX 3 - __be32 os_type; - __be32 port_max_txu[8]; /* per-port maximum transfer */ -}; - -#endif diff --git a/drivers/scsi/ibmvscsi_tgt/Makefile b/drivers/scsi/ibmvscsi_tgt/Makefile new file mode 100644 index 000000000000..0c060ce64cb0 --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsis.o + +ibmvscsis-y := libsrp.o ibmvscsi_tgt.o diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c new file mode 100644 index 000000000000..b29fef9d0f27 --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -0,0 +1,4087 @@ +/******************************************************************************* + * IBM Virtual SCSI Target Driver + * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp. + * Santiago Leon (santil@us.ibm.com) IBM Corp. + * Linda Xie (lxie@us.ibm.com) IBM Corp. + * + * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org> + * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org> + * + * Authors: Bryant G. Ly <bryantly@linux.vnet.ibm.com> + * Authors: Michael Cyr <mikecyr@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ****************************************************************************/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/string.h> + +#include <target/target_core_base.h> +#include <target/target_core_fabric.h> + +#include <asm/hvcall.h> +#include <asm/vio.h> + +#include <scsi/viosrp.h> + +#include "ibmvscsi_tgt.h" + +#define IBMVSCSIS_VERSION "v0.2" + +#define INITIAL_SRP_LIMIT 800 +#define DEFAULT_MAX_SECTORS 256 + +static uint max_vdma_size = MAX_H_COPY_RDMA; + +static char system_id[SYS_ID_NAME_LEN] = ""; +static char partition_name[PARTITION_NAMELEN] = "UNKNOWN"; +static uint partition_number = -1; + +/* Adapter list and lock to control it */ +static DEFINE_SPINLOCK(ibmvscsis_dev_lock); +static LIST_HEAD(ibmvscsis_dev_list); + +static long ibmvscsis_parse_command(struct scsi_info *vscsi, + struct viosrp_crq *crq); + +static void ibmvscsis_adapter_idle(struct scsi_info *vscsi); + +static void ibmvscsis_determine_resid(struct se_cmd *se_cmd, + struct srp_rsp *rsp) +{ + u32 residual_count = se_cmd->residual_count; + + if (!residual_count) + return; + + if (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (se_cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + rsp->flags = SRP_RSP_FLAG_DOUNDER; + rsp->data_out_res_cnt = cpu_to_be32(residual_count); + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + rsp->flags = SRP_RSP_FLAG_DIUNDER; + rsp->data_in_res_cnt = cpu_to_be32(residual_count); + } + } else if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (se_cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + rsp->flags = SRP_RSP_FLAG_DOOVER; + rsp->data_out_res_cnt = cpu_to_be32(residual_count); + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + rsp->flags = SRP_RSP_FLAG_DIOVER; + rsp->data_in_res_cnt = cpu_to_be32(residual_count); + } + } +} + +/** + * connection_broken() - Determine if the connection to the client is good + * @vscsi: Pointer to our adapter structure + * + * This function attempts to send a ping MAD to the client. If the call to + * queue the request returns H_CLOSED then the connection has been broken + * and the function returns TRUE. + * + * EXECUTION ENVIRONMENT: + * Interrupt or Process environment + */ +static bool connection_broken(struct scsi_info *vscsi) +{ + struct viosrp_crq *crq; + u64 buffer[2] = { 0, 0 }; + long h_return_code; + bool rc = false; + + /* create a PING crq */ + crq = (struct viosrp_crq *)&buffer; + crq->valid = VALID_CMD_RESP_EL; + crq->format = MESSAGE_IN_CRQ; + crq->status = PING; + + h_return_code = h_send_crq(vscsi->dds.unit_id, + cpu_to_be64(buffer[MSG_HI]), + cpu_to_be64(buffer[MSG_LOW])); + + pr_debug("connection_broken: rc %ld\n", h_return_code); + + if (h_return_code == H_CLOSED) + rc = true; + + return rc; +} + +/** + * ibmvscsis_unregister_command_q() - Helper Function-Unregister Command Queue + * @vscsi: Pointer to our adapter structure + * + * This function calls h_free_q then frees the interrupt bit etc. + * It must release the lock before doing so because of the time it can take + * for h_free_crq in PHYP + * NOTE: the caller must make sure that state and or flags will prevent + * interrupt handler from scheduling work. + * NOTE: anyone calling this function may need to set the CRQ_CLOSED flag + * we can't do it here, because we don't have the lock + * + * EXECUTION ENVIRONMENT: + * Process level + */ +static long ibmvscsis_unregister_command_q(struct scsi_info *vscsi) +{ + long qrc; + long rc = ADAPT_SUCCESS; + int ticks = 0; + + do { + qrc = h_free_crq(vscsi->dds.unit_id); + switch (qrc) { + case H_SUCCESS: + break; + + case H_HARDWARE: + case H_PARAMETER: + dev_err(&vscsi->dev, "unregister_command_q: error from h_free_crq %ld\n", + qrc); + rc = ERROR; + break; + + case H_BUSY: + case H_LONG_BUSY_ORDER_1_MSEC: + /* msleep not good for small values */ + usleep_range(1000, 2000); + ticks += 1; + break; + case H_LONG_BUSY_ORDER_10_MSEC: + usleep_range(10000, 20000); + ticks += 10; + break; + case H_LONG_BUSY_ORDER_100_MSEC: + msleep(100); + ticks += 100; + break; + case H_LONG_BUSY_ORDER_1_SEC: + ssleep(1); + ticks += 1000; + break; + case H_LONG_BUSY_ORDER_10_SEC: + ssleep(10); + ticks += 10000; + break; + case H_LONG_BUSY_ORDER_100_SEC: + ssleep(100); + ticks += 100000; + break; + default: + dev_err(&vscsi->dev, "unregister_command_q: unknown error %ld from h_free_crq\n", + qrc); + rc = ERROR; + break; + } + + /* + * dont wait more then 300 seconds + * ticks are in milliseconds more or less + */ + if (ticks > 300000 && qrc != H_SUCCESS) { + rc = ERROR; + dev_err(&vscsi->dev, "Excessive wait for h_free_crq\n"); + } + } while (qrc != H_SUCCESS && rc == ADAPT_SUCCESS); + + pr_debug("Freeing CRQ: phyp rc %ld, rc %ld\n", qrc, rc); + + return rc; +} + +/** + * ibmvscsis_delete_client_info() - Helper function to Delete Client Info + * @vscsi: Pointer to our adapter structure + * @client_closed: True if client closed its queue + * + * Deletes information specific to the client when the client goes away + * + * EXECUTION ENVIRONMENT: + * Interrupt or Process + */ +static void ibmvscsis_delete_client_info(struct scsi_info *vscsi, + bool client_closed) +{ + vscsi->client_cap = 0; + + /* + * Some things we don't want to clear if we're closing the queue, + * because some clients don't resend the host handshake when they + * get a transport event. + */ + if (client_closed) + vscsi->client_data.os_type = 0; +} + +/** + * ibmvscsis_free_command_q() - Free Command Queue + * @vscsi: Pointer to our adapter structure + * + * This function calls unregister_command_q, then clears interrupts and + * any pending interrupt acknowledgments associated with the command q. + * It also clears memory if there is no error. + * + * PHYP did not meet the PAPR architecture so that we must give up the + * lock. This causes a timing hole regarding state change. To close the + * hole this routine does accounting on any change that occurred during + * the time the lock is not held. + * NOTE: must give up and then acquire the interrupt lock, the caller must + * make sure that state and or flags will prevent interrupt handler from + * scheduling work. + * + * EXECUTION ENVIRONMENT: + * Process level, interrupt lock is held + */ +static long ibmvscsis_free_command_q(struct scsi_info *vscsi) +{ + int bytes; + u32 flags_under_lock; + u16 state_under_lock; + long rc = ADAPT_SUCCESS; + + if (!(vscsi->flags & CRQ_CLOSED)) { + vio_disable_interrupts(vscsi->dma_dev); + + state_under_lock = vscsi->new_state; + flags_under_lock = vscsi->flags; + vscsi->phyp_acr_state = 0; + vscsi->phyp_acr_flags = 0; + + spin_unlock_bh(&vscsi->intr_lock); + rc = ibmvscsis_unregister_command_q(vscsi); + spin_lock_bh(&vscsi->intr_lock); + + if (state_under_lock != vscsi->new_state) + vscsi->phyp_acr_state = vscsi->new_state; + + vscsi->phyp_acr_flags = ((~flags_under_lock) & vscsi->flags); + + if (rc == ADAPT_SUCCESS) { + bytes = vscsi->cmd_q.size * PAGE_SIZE; + memset(vscsi->cmd_q.base_addr, 0, bytes); + vscsi->cmd_q.index = 0; + vscsi->flags |= CRQ_CLOSED; + + ibmvscsis_delete_client_info(vscsi, false); + } + + pr_debug("free_command_q: flags 0x%x, state 0x%hx, acr_flags 0x%x, acr_state 0x%hx\n", + vscsi->flags, vscsi->state, vscsi->phyp_acr_flags, + vscsi->phyp_acr_state); + } + return rc; +} + +/** + * ibmvscsis_cmd_q_dequeue() - Get valid Command element + * @mask: Mask to use in case index wraps + * @current_index: Current index into command queue + * @base_addr: Pointer to start of command queue + * + * Returns a pointer to a valid command element or NULL, if the command + * queue is empty + * + * EXECUTION ENVIRONMENT: + * Interrupt environment, interrupt lock held + */ +static struct viosrp_crq *ibmvscsis_cmd_q_dequeue(uint mask, + uint *current_index, + struct viosrp_crq *base_addr) +{ + struct viosrp_crq *ptr; + + ptr = base_addr + *current_index; + + if (ptr->valid) { + *current_index = (*current_index + 1) & mask; + dma_rmb(); + } else { + ptr = NULL; + } + + return ptr; +} + +/** + * ibmvscsis_send_init_message() - send initialize message to the client + * @vscsi: Pointer to our adapter structure + * @format: Which Init Message format to send + * + * EXECUTION ENVIRONMENT: + * Interrupt environment interrupt lock held + */ +static long ibmvscsis_send_init_message(struct scsi_info *vscsi, u8 format) +{ + struct viosrp_crq *crq; + u64 buffer[2] = { 0, 0 }; + long rc; + + crq = (struct viosrp_crq *)&buffer; + crq->valid = VALID_INIT_MSG; + crq->format = format; + rc = h_send_crq(vscsi->dds.unit_id, cpu_to_be64(buffer[MSG_HI]), + cpu_to_be64(buffer[MSG_LOW])); + + return rc; +} + +/** + * ibmvscsis_check_init_msg() - Check init message valid + * @vscsi: Pointer to our adapter structure + * @format: Pointer to return format of Init Message, if any. + * Set to UNUSED_FORMAT if no Init Message in queue. + * + * Checks if an initialize message was queued by the initiatior + * after the queue was created and before the interrupt was enabled. + * + * EXECUTION ENVIRONMENT: + * Process level only, interrupt lock held + */ +static long ibmvscsis_check_init_msg(struct scsi_info *vscsi, uint *format) +{ + struct viosrp_crq *crq; + long rc = ADAPT_SUCCESS; + + crq = ibmvscsis_cmd_q_dequeue(vscsi->cmd_q.mask, &vscsi->cmd_q.index, + vscsi->cmd_q.base_addr); + if (!crq) { + *format = (uint)UNUSED_FORMAT; + } else if (crq->valid == VALID_INIT_MSG && crq->format == INIT_MSG) { + *format = (uint)INIT_MSG; + crq->valid = INVALIDATE_CMD_RESP_EL; + dma_rmb(); + + /* + * the caller has ensured no initialize message was + * sent after the queue was + * created so there should be no other message on the queue. + */ + crq = ibmvscsis_cmd_q_dequeue(vscsi->cmd_q.mask, + &vscsi->cmd_q.index, + vscsi->cmd_q.base_addr); + if (crq) { + *format = (uint)(crq->format); + rc = ERROR; + crq->valid = INVALIDATE_CMD_RESP_EL; + dma_rmb(); + } + } else { + *format = (uint)(crq->format); + rc = ERROR; + crq->valid = INVALIDATE_CMD_RESP_EL; + dma_rmb(); + } + + return rc; +} + +/** + * ibmvscsis_establish_new_q() - Establish new CRQ queue + * @vscsi: Pointer to our adapter structure + * @new_state: New state being established after resetting the queue + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) +{ + long rc = ADAPT_SUCCESS; + uint format; + + vscsi->flags &= PRESERVE_FLAG_FIELDS; + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + + rc = vio_enable_interrupts(vscsi->dma_dev); + if (rc) { + pr_warn("reset_queue: failed to enable interrupts, rc %ld\n", + rc); + return rc; + } + + rc = ibmvscsis_check_init_msg(vscsi, &format); + if (rc) { + dev_err(&vscsi->dev, "reset_queue: check_init_msg failed, rc %ld\n", + rc); + return rc; + } + + if (format == UNUSED_FORMAT && new_state == WAIT_CONNECTION) { + rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); + switch (rc) { + case H_SUCCESS: + case H_DROPPED: + case H_CLOSED: + rc = ADAPT_SUCCESS; + break; + + case H_PARAMETER: + case H_HARDWARE: + break; + + default: + vscsi->state = UNDEFINED; + rc = H_HARDWARE; + break; + } + } + + return rc; +} + +/** + * ibmvscsis_reset_queue() - Reset CRQ Queue + * @vscsi: Pointer to our adapter structure + * @new_state: New state to establish after resetting the queue + * + * This function calls h_free_q and then calls h_reg_q and does all + * of the bookkeeping to get us back to where we can communicate. + * + * Actually, we don't always call h_free_crq. A problem was discovered + * where one partition would close and reopen his queue, which would + * cause his partner to get a transport event, which would cause him to + * close and reopen his queue, which would cause the original partition + * to get a transport event, etc., etc. To prevent this, we don't + * actually close our queue if the client initiated the reset, (i.e. + * either we got a transport event or we have detected that the client's + * queue is gone) + * + * EXECUTION ENVIRONMENT: + * Process environment, called with interrupt lock held + */ +static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state) +{ + int bytes; + long rc = ADAPT_SUCCESS; + + pr_debug("reset_queue: flags 0x%x\n", vscsi->flags); + + /* don't reset, the client did it for us */ + if (vscsi->flags & (CLIENT_FAILED | TRANS_EVENT)) { + vscsi->flags &= PRESERVE_FLAG_FIELDS; + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + vscsi->state = new_state; + vio_enable_interrupts(vscsi->dma_dev); + } else { + rc = ibmvscsis_free_command_q(vscsi); + if (rc == ADAPT_SUCCESS) { + vscsi->state = new_state; + + bytes = vscsi->cmd_q.size * PAGE_SIZE; + rc = h_reg_crq(vscsi->dds.unit_id, + vscsi->cmd_q.crq_token, bytes); + if (rc == H_CLOSED || rc == H_SUCCESS) { + rc = ibmvscsis_establish_new_q(vscsi, + new_state); + } + + if (rc != ADAPT_SUCCESS) { + pr_debug("reset_queue: reg_crq rc %ld\n", rc); + + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; + ibmvscsis_free_command_q(vscsi); + } + } else { + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; + } + } +} + +/** + * ibmvscsis_free_cmd_resources() - Free command resources + * @vscsi: Pointer to our adapter structure + * @cmd: Command which is not longer in use + * + * Must be called with interrupt lock held. + */ +static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + + switch (cmd->type) { + case TASK_MANAGEMENT: + case SCSI_CDB: + /* + * When the queue goes down this value is cleared, so it + * cannot be cleared in this general purpose function. + */ + if (vscsi->debit) + vscsi->debit -= 1; + break; + case ADAPTER_MAD: + vscsi->flags &= ~PROCESSING_MAD; + break; + case UNSET_TYPE: + break; + default: + dev_err(&vscsi->dev, "free_cmd_resources unknown type %d\n", + cmd->type); + break; + } + + cmd->iue = NULL; + list_add_tail(&cmd->list, &vscsi->free_cmd); + srp_iu_put(iue); + + if (list_empty(&vscsi->active_q) && list_empty(&vscsi->schedule_q) && + list_empty(&vscsi->waiting_rsp) && (vscsi->flags & WAIT_FOR_IDLE)) { + vscsi->flags &= ~WAIT_FOR_IDLE; + complete(&vscsi->wait_idle); + } +} + +/** + * ibmvscsis_disconnect() - Helper function to disconnect + * @work: Pointer to work_struct, gives access to our adapter structure + * + * An error has occurred or the driver received a Transport event, + * and the driver is requesting that the command queue be de-registered + * in a safe manner. If there is no outstanding I/O then we can stop the + * queue. If we are restarting the queue it will be reflected in the + * the state of the adapter. + * + * EXECUTION ENVIRONMENT: + * Process environment + */ +static void ibmvscsis_disconnect(struct work_struct *work) +{ + struct scsi_info *vscsi = container_of(work, struct scsi_info, + proc_work); + u16 new_state; + bool wait_idle = false; + long rc = ADAPT_SUCCESS; + + spin_lock_bh(&vscsi->intr_lock); + new_state = vscsi->new_state; + vscsi->new_state = 0; + + pr_debug("disconnect: flags 0x%x, state 0x%hx\n", vscsi->flags, + vscsi->state); + + /* + * check which state we are in and see if we + * should transitition to the new state + */ + switch (vscsi->state) { + /* Should never be called while in this state. */ + case NO_QUEUE: + /* + * Can never transition from this state; + * igonore errors and logout. + */ + case UNCONFIGURING: + break; + + /* can transition from this state to UNCONFIGURING */ + case ERR_DISCONNECT: + if (new_state == UNCONFIGURING) + vscsi->state = new_state; + break; + + /* + * Can transition from this state to to unconfiguring + * or err disconnect. + */ + case ERR_DISCONNECT_RECONNECT: + switch (new_state) { + case UNCONFIGURING: + case ERR_DISCONNECT: + vscsi->state = new_state; + break; + + case WAIT_IDLE: + break; + default: + break; + } + break; + + /* can transition from this state to UNCONFIGURING */ + case ERR_DISCONNECTED: + if (new_state == UNCONFIGURING) + vscsi->state = new_state; + break; + + /* + * If this is a transition into an error state. + * a client is attempting to establish a connection + * and has violated the RPA protocol. + * There can be nothing pending on the adapter although + * there can be requests in the command queue. + */ + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + switch (new_state) { + case ERR_DISCONNECT: + vscsi->flags |= RESPONSE_Q_DOWN; + vscsi->state = new_state; + vscsi->flags &= ~(SCHEDULE_DISCONNECT | + DISCONNECT_SCHEDULED); + ibmvscsis_free_command_q(vscsi); + break; + case ERR_DISCONNECT_RECONNECT: + ibmvscsis_reset_queue(vscsi, WAIT_ENABLED); + break; + + /* should never happen */ + case WAIT_IDLE: + rc = ERROR; + dev_err(&vscsi->dev, "disconnect: invalid state %d for WAIT_IDLE\n", + vscsi->state); + break; + } + break; + + case WAIT_IDLE: + switch (new_state) { + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + vscsi->state = new_state; + break; + } + break; + + /* + * Initiator has not done a successful srp login + * or has done a successful srp logout ( adapter was not + * busy). In the first case there can be responses queued + * waiting for space on the initiators response queue (MAD) + * The second case the adapter is idle. Assume the worse case, + * i.e. the second case. + */ + case WAIT_CONNECTION: + case CONNECTED: + case SRP_PROCESSING: + wait_idle = true; + vscsi->state = new_state; + break; + + /* can transition from this state to UNCONFIGURING */ + case UNDEFINED: + if (new_state == UNCONFIGURING) + vscsi->state = new_state; + break; + default: + break; + } + + if (wait_idle) { + pr_debug("disconnect start wait, active %d, sched %d\n", + (int)list_empty(&vscsi->active_q), + (int)list_empty(&vscsi->schedule_q)); + if (!list_empty(&vscsi->active_q) || + !list_empty(&vscsi->schedule_q)) { + vscsi->flags |= WAIT_FOR_IDLE; + pr_debug("disconnect flags 0x%x\n", vscsi->flags); + /* + * This routine is can not be called with the interrupt + * lock held. + */ + spin_unlock_bh(&vscsi->intr_lock); + wait_for_completion(&vscsi->wait_idle); + spin_lock_bh(&vscsi->intr_lock); + } + pr_debug("disconnect stop wait\n"); + + ibmvscsis_adapter_idle(vscsi); + } + + spin_unlock_bh(&vscsi->intr_lock); +} + +/** + * ibmvscsis_post_disconnect() - Schedule the disconnect + * @vscsi: Pointer to our adapter structure + * @new_state: State to move to after disconnecting + * @flag_bits: Flags to turn on in adapter structure + * + * If it's already been scheduled, then see if we need to "upgrade" + * the new state (if the one passed in is more "severe" than the + * previous one). + * + * PRECONDITION: + * interrupt lock is held + */ +static void ibmvscsis_post_disconnect(struct scsi_info *vscsi, uint new_state, + uint flag_bits) +{ + uint state; + + /* check the validity of the new state */ + switch (new_state) { + case UNCONFIGURING: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case WAIT_IDLE: + break; + + default: + dev_err(&vscsi->dev, "post_disconnect: Invalid new state %d\n", + new_state); + return; + } + + vscsi->flags |= flag_bits; + + pr_debug("post_disconnect: new_state 0x%x, flag_bits 0x%x, vscsi->flags 0x%x, state %hx\n", + new_state, flag_bits, vscsi->flags, vscsi->state); + + if (!(vscsi->flags & (DISCONNECT_SCHEDULED | SCHEDULE_DISCONNECT))) { + vscsi->flags |= SCHEDULE_DISCONNECT; + vscsi->new_state = new_state; + + INIT_WORK(&vscsi->proc_work, ibmvscsis_disconnect); + (void)queue_work(vscsi->work_q, &vscsi->proc_work); + } else { + if (vscsi->new_state) + state = vscsi->new_state; + else + state = vscsi->state; + + switch (state) { + case NO_QUEUE: + case UNCONFIGURING: + break; + + case ERR_DISCONNECTED: + case ERR_DISCONNECT: + case UNDEFINED: + if (new_state == UNCONFIGURING) + vscsi->new_state = new_state; + break; + + case ERR_DISCONNECT_RECONNECT: + switch (new_state) { + case UNCONFIGURING: + case ERR_DISCONNECT: + vscsi->new_state = new_state; + break; + default: + break; + } + break; + + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + case WAIT_IDLE: + case WAIT_CONNECTION: + case CONNECTED: + case SRP_PROCESSING: + vscsi->new_state = new_state; + break; + + default: + break; + } + } + + pr_debug("Leaving post_disconnect: flags 0x%x, new_state 0x%x\n", + vscsi->flags, vscsi->new_state); +} + +/** + * ibmvscsis_trans_event() - Handle a Transport Event + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ entry containing the Transport Event + * + * Do the logic to close the I_T nexus. This function may not + * behave to specification. + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_trans_event(struct scsi_info *vscsi, + struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + pr_debug("trans_event: format %d, flags 0x%x, state 0x%hx\n", + (int)crq->format, vscsi->flags, vscsi->state); + + switch (crq->format) { + case MIGRATED: + case PARTNER_FAILED: + case PARTNER_DEREGISTER: + ibmvscsis_delete_client_info(vscsi, true); + break; + + default: + rc = ERROR; + dev_err(&vscsi->dev, "trans_event: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, + RESPONSE_Q_DOWN); + break; + } + + if (rc == ADAPT_SUCCESS) { + switch (vscsi->state) { + case NO_QUEUE: + case ERR_DISCONNECTED: + case UNDEFINED: + break; + + case UNCONFIGURING: + vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT); + break; + + case WAIT_ENABLED: + break; + + case WAIT_CONNECTION: + break; + + case CONNECTED: + ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, + (RESPONSE_Q_DOWN | + TRANS_EVENT)); + break; + + case PART_UP_WAIT_ENAB: + vscsi->state = WAIT_ENABLED; + break; + + case SRP_PROCESSING: + if ((vscsi->debit > 0) || + !list_empty(&vscsi->schedule_q) || + !list_empty(&vscsi->waiting_rsp) || + !list_empty(&vscsi->active_q)) { + pr_debug("debit %d, sched %d, wait %d, active %d\n", + vscsi->debit, + (int)list_empty(&vscsi->schedule_q), + (int)list_empty(&vscsi->waiting_rsp), + (int)list_empty(&vscsi->active_q)); + pr_warn("connection lost with outstanding work\n"); + } else { + pr_debug("trans_event: SRP Processing, but no outstanding work\n"); + } + + ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, + (RESPONSE_Q_DOWN | + TRANS_EVENT)); + break; + + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case WAIT_IDLE: + vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT); + break; + } + } + + rc = vscsi->flags & SCHEDULE_DISCONNECT; + + pr_debug("Leaving trans_event: flags 0x%x, state 0x%hx, rc %ld\n", + vscsi->flags, vscsi->state, rc); + + return rc; +} + +/** + * ibmvscsis_poll_cmd_q() - Poll Command Queue + * @vscsi: Pointer to our adapter structure + * + * Called to handle command elements that may have arrived while + * interrupts were disabled. + * + * EXECUTION ENVIRONMENT: + * intr_lock must be held + */ +static void ibmvscsis_poll_cmd_q(struct scsi_info *vscsi) +{ + struct viosrp_crq *crq; + long rc; + bool ack = true; + volatile u8 valid; + + pr_debug("poll_cmd_q: flags 0x%x, state 0x%hx, q index %ud\n", + vscsi->flags, vscsi->state, vscsi->cmd_q.index); + + rc = vscsi->flags & SCHEDULE_DISCONNECT; + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + + while (valid) { +poll_work: + vscsi->cmd_q.index = + (vscsi->cmd_q.index + 1) & vscsi->cmd_q.mask; + + if (!rc) { + rc = ibmvscsis_parse_command(vscsi, crq); + } else { + if ((uint)crq->valid == VALID_TRANS_EVENT) { + /* + * must service the transport layer events even + * in an error state, dont break out until all + * the consecutive transport events have been + * processed + */ + rc = ibmvscsis_trans_event(vscsi, crq); + } else if (vscsi->flags & TRANS_EVENT) { + /* + * if a tranport event has occurred leave + * everything but transport events on the queue + */ + pr_debug("poll_cmd_q, ignoring\n"); + + /* + * need to decrement the queue index so we can + * look at the elment again + */ + if (vscsi->cmd_q.index) + vscsi->cmd_q.index -= 1; + else + /* + * index is at 0 it just wrapped. + * have it index last element in q + */ + vscsi->cmd_q.index = vscsi->cmd_q.mask; + break; + } + } + + crq->valid = INVALIDATE_CMD_RESP_EL; + + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + } + + if (!rc) { + if (ack) { + vio_enable_interrupts(vscsi->dma_dev); + ack = false; + pr_debug("poll_cmd_q, reenabling interrupts\n"); + } + valid = crq->valid; + dma_rmb(); + if (valid) + goto poll_work; + } + + pr_debug("Leaving poll_cmd_q: rc %ld\n", rc); +} + +/** + * ibmvscsis_free_cmd_qs() - Free elements in queue + * @vscsi: Pointer to our adapter structure + * + * Free all of the elements on all queues that are waiting for + * whatever reason. + * + * PRECONDITION: + * Called with interrupt lock held + */ +static void ibmvscsis_free_cmd_qs(struct scsi_info *vscsi) +{ + struct ibmvscsis_cmd *cmd, *nxt; + + pr_debug("free_cmd_qs: waiting_rsp empty %d, timer starter %d\n", + (int)list_empty(&vscsi->waiting_rsp), + vscsi->rsp_q_timer.started); + + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { + list_del(&cmd->list); + ibmvscsis_free_cmd_resources(vscsi, cmd); + } +} + +/** + * ibmvscsis_get_free_cmd() - Get free command from list + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) +{ + struct ibmvscsis_cmd *cmd = NULL; + struct iu_entry *iue; + + iue = srp_iu_get(&vscsi->target); + if (iue) { + cmd = list_first_entry_or_null(&vscsi->free_cmd, + struct ibmvscsis_cmd, list); + if (cmd) { + list_del(&cmd->list); + cmd->iue = iue; + cmd->type = UNSET_TYPE; + memset(&cmd->se_cmd, 0, sizeof(cmd->se_cmd)); + } else { + srp_iu_put(iue); + } + } + + return cmd; +} + +/** + * ibmvscsis_adapter_idle() - Helper function to handle idle adapter + * @vscsi: Pointer to our adapter structure + * + * This function is called when the adapter is idle when the driver + * is attempting to clear an error condition. + * The adapter is considered busy if any of its cmd queues + * are non-empty. This function can be invoked + * from the off level disconnect function. + * + * EXECUTION ENVIRONMENT: + * Process environment called with interrupt lock held + */ +static void ibmvscsis_adapter_idle(struct scsi_info *vscsi) +{ + int free_qs = false; + + pr_debug("adapter_idle: flags 0x%x, state 0x%hx\n", vscsi->flags, + vscsi->state); + + /* Only need to free qs if we're disconnecting from client */ + if (vscsi->state != WAIT_CONNECTION || vscsi->flags & TRANS_EVENT) + free_qs = true; + + switch (vscsi->state) { + case ERR_DISCONNECT_RECONNECT: + ibmvscsis_reset_queue(vscsi, WAIT_CONNECTION); + pr_debug("adapter_idle, disc_rec: flags 0x%x\n", vscsi->flags); + break; + + case ERR_DISCONNECT: + ibmvscsis_free_command_q(vscsi); + vscsi->flags &= ~DISCONNECT_SCHEDULED; + vscsi->flags |= RESPONSE_Q_DOWN; + vscsi->state = ERR_DISCONNECTED; + pr_debug("adapter_idle, disc: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + break; + + case WAIT_IDLE: + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + if (vscsi->flags & TRANS_EVENT) { + vscsi->state = WAIT_CONNECTION; + vscsi->flags &= PRESERVE_FLAG_FIELDS; + } else { + vscsi->state = CONNECTED; + vscsi->flags &= ~DISCONNECT_SCHEDULED; + } + + pr_debug("adapter_idle, wait: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + ibmvscsis_poll_cmd_q(vscsi); + break; + + case ERR_DISCONNECTED: + vscsi->flags &= ~DISCONNECT_SCHEDULED; + pr_debug("adapter_idle, disconnected: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + break; + + default: + dev_err(&vscsi->dev, "adapter_idle: in invalid state %d\n", + vscsi->state); + break; + } + + if (free_qs) + ibmvscsis_free_cmd_qs(vscsi); + + /* + * There is a timing window where we could lose a disconnect request. + * The known path to this window occurs during the DISCONNECT_RECONNECT + * case above: reset_queue calls free_command_q, which will release the + * interrupt lock. During that time, a new post_disconnect call can be + * made with a "more severe" state (DISCONNECT or UNCONFIGURING). + * Because the DISCONNECT_SCHEDULED flag is already set, post_disconnect + * will only set the new_state. Now free_command_q reacquires the intr + * lock and clears the DISCONNECT_SCHEDULED flag (using PRESERVE_FLAG_ + * FIELDS), and the disconnect is lost. This is particularly bad when + * the new disconnect was for UNCONFIGURING, since the unconfigure hangs + * forever. + * Fix is that free command queue sets acr state and acr flags if there + * is a change under the lock + * note free command queue writes to this state it clears it + * before releasing the lock, different drivers call the free command + * queue different times so dont initialize above + */ + if (vscsi->phyp_acr_state != 0) { + /* + * set any bits in flags that may have been cleared by + * a call to free command queue in switch statement + * or reset queue + */ + vscsi->flags |= vscsi->phyp_acr_flags; + ibmvscsis_post_disconnect(vscsi, vscsi->phyp_acr_state, 0); + vscsi->phyp_acr_state = 0; + vscsi->phyp_acr_flags = 0; + + pr_debug("adapter_idle: flags 0x%x, state 0x%hx, acr_flags 0x%x, acr_state 0x%hx\n", + vscsi->flags, vscsi->state, vscsi->phyp_acr_flags, + vscsi->phyp_acr_state); + } + + pr_debug("Leaving adapter_idle: flags 0x%x, state 0x%hx, new_state 0x%x\n", + vscsi->flags, vscsi->state, vscsi->new_state); +} + +/** + * ibmvscsis_copy_crq_packet() - Copy CRQ Packet + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command element to use to process the request + * @crq: Pointer to CRQ entry containing the request + * + * Copy the srp information unit from the hosted + * partition using remote dma + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_copy_crq_packet(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + long rc = 0; + u16 len; + + len = be16_to_cpu(crq->IU_length); + if ((len > SRP_MAX_IU_LEN) || (len == 0)) { + dev_err(&vscsi->dev, "copy_crq: Invalid len %d passed", len); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return SRP_VIOLATION; + } + + rc = h_copy_rdma(len, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(crq->IU_data_ptr), + vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma); + + switch (rc) { + case H_SUCCESS: + cmd->init_time = mftb(); + iue->remote_token = crq->IU_data_ptr; + iue->iu_len = len; + pr_debug("copy_crq: ioba 0x%llx, init_time 0x%llx\n", + be64_to_cpu(crq->IU_data_ptr), cmd->init_time); + break; + case H_PERMISSION: + if (connection_broken(vscsi)) + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + (RESPONSE_Q_DOWN | + CLIENT_FAILED)); + else + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + + dev_err(&vscsi->dev, "copy_crq: h_copy_rdma failed, rc %ld\n", + rc); + break; + case H_DEST_PARM: + case H_SOURCE_PARM: + default: + dev_err(&vscsi->dev, "copy_crq: h_copy_rdma failed, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_adapter_info - Service an Adapter Info MAnagement Data gram + * @vscsi: Pointer to our adapter structure + * @iue: Information Unit containing the Adapter Info MAD request + * + * EXECUTION ENVIRONMENT: + * Interrupt adpater lock is held + */ +static long ibmvscsis_adapter_info(struct scsi_info *vscsi, + struct iu_entry *iue) +{ + struct viosrp_adapter_info *mad = &vio_iu(iue)->mad.adapter_info; + struct mad_adapter_info_data *info; + uint flag_bits = 0; + dma_addr_t token; + long rc; + + mad->common.status = cpu_to_be16(VIOSRP_MAD_SUCCESS); + + if (be16_to_cpu(mad->common.length) > sizeof(*info)) { + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + + info = dma_alloc_coherent(&vscsi->dma_dev->dev, sizeof(*info), &token, + GFP_KERNEL); + if (!info) { + dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", + iue->target); + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + + /* Get remote info */ + rc = h_copy_rdma(be16_to_cpu(mad->common.length), + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer), + vscsi->dds.window[LOCAL].liobn, token); + + if (rc != H_SUCCESS) { + if (rc == H_PERMISSION) { + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED); + } + pr_warn("adapter_info: h_copy_rdma from client failed, rc %ld\n", + rc); + pr_debug("adapter_info: ioba 0x%llx, flags 0x%x, flag_bits 0x%x\n", + be64_to_cpu(mad->buffer), vscsi->flags, flag_bits); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + goto free_dma; + } + + /* + * Copy client info, but ignore partition number, which we + * already got from phyp - unless we failed to get it from + * phyp (e.g. if we're running on a p5 system). + */ + if (vscsi->client_data.partition_number == 0) + vscsi->client_data.partition_number = + be32_to_cpu(info->partition_number); + strncpy(vscsi->client_data.srp_version, info->srp_version, + sizeof(vscsi->client_data.srp_version)); + strncpy(vscsi->client_data.partition_name, info->partition_name, + sizeof(vscsi->client_data.partition_name)); + vscsi->client_data.mad_version = be32_to_cpu(info->mad_version); + vscsi->client_data.os_type = be32_to_cpu(info->os_type); + + /* Copy our info */ + strncpy(info->srp_version, SRP_VERSION, + sizeof(info->srp_version)); + strncpy(info->partition_name, vscsi->dds.partition_name, + sizeof(info->partition_name)); + info->partition_number = cpu_to_be32(vscsi->dds.partition_num); + info->mad_version = cpu_to_be32(MAD_VERSION_1); + info->os_type = cpu_to_be32(LINUX); + memset(&info->port_max_txu[0], 0, sizeof(info->port_max_txu)); + info->port_max_txu[0] = cpu_to_be32(128 * PAGE_SIZE); + + dma_wmb(); + rc = h_copy_rdma(sizeof(*info), vscsi->dds.window[LOCAL].liobn, + token, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer)); + switch (rc) { + case H_SUCCESS: + break; + + case H_SOURCE_PARM: + case H_DEST_PARM: + case H_PERMISSION: + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED); + default: + dev_err(&vscsi->dev, "adapter_info: h_copy_rdma to client failed, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + flag_bits); + break; + } + +free_dma: + dma_free_coherent(&vscsi->dma_dev->dev, sizeof(*info), info, token); + pr_debug("Leaving adapter_info, rc %ld\n", rc); + + return rc; +} + +/** + * ibmvscsis_cap_mad() - Service a Capabilities MAnagement Data gram + * @vscsi: Pointer to our adapter structure + * @iue: Information Unit containing the Capabilities MAD request + * + * NOTE: if you return an error from this routine you must be + * disconnecting or you will cause a hang + * + * EXECUTION ENVIRONMENT: + * Interrupt called with adapter lock held + */ +static int ibmvscsis_cap_mad(struct scsi_info *vscsi, struct iu_entry *iue) +{ + struct viosrp_capabilities *mad = &vio_iu(iue)->mad.capabilities; + struct capabilities *cap; + struct mad_capability_common *common; + dma_addr_t token; + u16 olen, len, status, min_len, cap_len; + u32 flag; + uint flag_bits = 0; + long rc = 0; + + olen = be16_to_cpu(mad->common.length); + /* + * struct capabilities hardcodes a couple capabilities after the + * header, but the capabilities can actually be in any order. + */ + min_len = offsetof(struct capabilities, migration); + if ((olen < min_len) || (olen > PAGE_SIZE)) { + pr_warn("cap_mad: invalid len %d\n", olen); + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + + cap = dma_alloc_coherent(&vscsi->dma_dev->dev, olen, &token, + GFP_KERNEL); + if (!cap) { + dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", + iue->target); + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + rc = h_copy_rdma(olen, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer), + vscsi->dds.window[LOCAL].liobn, token); + if (rc == H_SUCCESS) { + strncpy(cap->name, dev_name(&vscsi->dma_dev->dev), + SRP_MAX_LOC_LEN); + + len = olen - min_len; + status = VIOSRP_MAD_SUCCESS; + common = (struct mad_capability_common *)&cap->migration; + + while ((len > 0) && (status == VIOSRP_MAD_SUCCESS) && !rc) { + pr_debug("cap_mad: len left %hd, cap type %d, cap len %hd\n", + len, be32_to_cpu(common->cap_type), + be16_to_cpu(common->length)); + + cap_len = be16_to_cpu(common->length); + if (cap_len > len) { + dev_err(&vscsi->dev, "cap_mad: cap len mismatch with total len\n"); + status = VIOSRP_MAD_FAILED; + break; + } + + if (cap_len == 0) { + dev_err(&vscsi->dev, "cap_mad: cap len is 0\n"); + status = VIOSRP_MAD_FAILED; + break; + } + + switch (common->cap_type) { + default: + pr_debug("cap_mad: unsupported capability\n"); + common->server_support = 0; + flag = cpu_to_be32((u32)CAP_LIST_SUPPORTED); + cap->flags &= ~flag; + break; + } + + len = len - cap_len; + common = (struct mad_capability_common *) + ((char *)common + cap_len); + } + + mad->common.status = cpu_to_be16(status); + + dma_wmb(); + rc = h_copy_rdma(olen, vscsi->dds.window[LOCAL].liobn, token, + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer)); + + if (rc != H_SUCCESS) { + pr_debug("cap_mad: failed to copy to client, rc %ld\n", + rc); + + if (rc == H_PERMISSION) { + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | + CLIENT_FAILED); + } + + pr_warn("cap_mad: error copying data to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + flag_bits); + } + } + + dma_free_coherent(&vscsi->dma_dev->dev, olen, cap, token); + + pr_debug("Leaving cap_mad, rc %ld, client_cap 0x%x\n", + rc, vscsi->client_cap); + + return rc; +} + +/** + * ibmvscsis_process_mad() - Service a MAnagement Data gram + * @vscsi: Pointer to our adapter structure + * @iue: Information Unit containing the MAD request + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_process_mad(struct scsi_info *vscsi, struct iu_entry *iue) +{ + struct mad_common *mad = (struct mad_common *)&vio_iu(iue)->mad; + struct viosrp_empty_iu *empty; + long rc = ADAPT_SUCCESS; + + switch (be32_to_cpu(mad->type)) { + case VIOSRP_EMPTY_IU_TYPE: + empty = &vio_iu(iue)->mad.empty_iu; + vscsi->empty_iu_id = be64_to_cpu(empty->buffer); + vscsi->empty_iu_tag = be64_to_cpu(empty->common.tag); + mad->status = cpu_to_be16(VIOSRP_MAD_SUCCESS); + break; + case VIOSRP_ADAPTER_INFO_TYPE: + rc = ibmvscsis_adapter_info(vscsi, iue); + break; + case VIOSRP_CAPABILITIES_TYPE: + rc = ibmvscsis_cap_mad(vscsi, iue); + break; + case VIOSRP_ENABLE_FAST_FAIL: + if (vscsi->state == CONNECTED) { + vscsi->fast_fail = true; + mad->status = cpu_to_be16(VIOSRP_MAD_SUCCESS); + } else { + pr_warn("fast fail mad sent after login\n"); + mad->status = cpu_to_be16(VIOSRP_MAD_FAILED); + } + break; + default: + mad->status = cpu_to_be16(VIOSRP_MAD_NOT_SUPPORTED); + break; + } + + return rc; +} + +/** + * srp_snd_msg_failed() - Handle an error when sending a response + * @vscsi: Pointer to our adapter structure + * @rc: The return code from the h_send_crq command + * + * Must be called with interrupt lock held. + */ +static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc) +{ + ktime_t kt; + + if (rc != H_DROPPED) { + ibmvscsis_free_cmd_qs(vscsi); + + if (rc == H_CLOSED) + vscsi->flags |= CLIENT_FAILED; + + /* don't flag the same problem multiple times */ + if (!(vscsi->flags & RESPONSE_Q_DOWN)) { + vscsi->flags |= RESPONSE_Q_DOWN; + if (!(vscsi->state & (ERR_DISCONNECT | + ERR_DISCONNECT_RECONNECT | + ERR_DISCONNECTED | UNDEFINED))) { + dev_err(&vscsi->dev, "snd_msg_failed: setting RESPONSE_Q_DOWN, state 0x%hx, flags 0x%x, rc %ld\n", + vscsi->state, vscsi->flags, rc); + } + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + } + return; + } + + /* + * The response queue is full. + * If the server is processing SRP requests, i.e. + * the client has successfully done an + * SRP_LOGIN, then it will wait forever for room in + * the queue. However if the system admin + * is attempting to unconfigure the server then one + * or more children will be in a state where + * they are being removed. So if there is even one + * child being removed then the driver assumes + * the system admin is attempting to break the + * connection with the client and MAX_TIMER_POPS + * is honored. + */ + if ((vscsi->rsp_q_timer.timer_pops < MAX_TIMER_POPS) || + (vscsi->state == SRP_PROCESSING)) { + pr_debug("snd_msg_failed: response queue full, flags 0x%x, timer started %d, pops %d\n", + vscsi->flags, (int)vscsi->rsp_q_timer.started, + vscsi->rsp_q_timer.timer_pops); + + /* + * Check if the timer is running; if it + * is not then start it up. + */ + if (!vscsi->rsp_q_timer.started) { + if (vscsi->rsp_q_timer.timer_pops < + MAX_TIMER_POPS) { + kt = ktime_set(0, WAIT_NANO_SECONDS); + } else { + /* + * slide the timeslice if the maximum + * timer pops have already happened + */ + kt = ktime_set(WAIT_SECONDS, 0); + } + + vscsi->rsp_q_timer.started = true; + hrtimer_start(&vscsi->rsp_q_timer.timer, kt, + HRTIMER_MODE_REL); + } + } else { + /* + * TBD: Do we need to worry about this? Need to get + * remove working. + */ + /* + * waited a long time and it appears the system admin + * is bring this driver down + */ + vscsi->flags |= RESPONSE_Q_DOWN; + ibmvscsis_free_cmd_qs(vscsi); + /* + * if the driver is already attempting to disconnect + * from the client and has already logged an error + * trace this event but don't put it in the error log + */ + if (!(vscsi->state & (ERR_DISCONNECT | + ERR_DISCONNECT_RECONNECT | + ERR_DISCONNECTED | UNDEFINED))) { + dev_err(&vscsi->dev, "client crq full too long\n"); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + 0); + } + } +} + +/** + * ibmvscsis_send_messages() - Send a Response + * @vscsi: Pointer to our adapter structure + * + * Send a response, first checking the waiting queue. Responses are + * sent in order they are received. If the response cannot be sent, + * because the client queue is full, it stays on the waiting queue. + * + * PRECONDITION: + * Called with interrupt lock held + */ +static void ibmvscsis_send_messages(struct scsi_info *vscsi) +{ + u64 msg_hi = 0; + /* note do not attmempt to access the IU_data_ptr with this pointer + * it is not valid + */ + struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi; + struct ibmvscsis_cmd *cmd, *nxt; + struct iu_entry *iue; + long rc = ADAPT_SUCCESS; + + if (!(vscsi->flags & RESPONSE_Q_DOWN)) { + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { + pr_debug("send_messages cmd %p\n", cmd); + + iue = cmd->iue; + + crq->valid = VALID_CMD_RESP_EL; + crq->format = cmd->rsp.format; + + if (cmd->flags & CMD_FAST_FAIL) + crq->status = VIOSRP_ADAPTER_FAIL; + + crq->IU_length = cpu_to_be16(cmd->rsp.len); + + rc = h_send_crq(vscsi->dma_dev->unit_address, + be64_to_cpu(msg_hi), + be64_to_cpu(cmd->rsp.tag)); + + pr_debug("send_messages: tag 0x%llx, rc %ld\n", + be64_to_cpu(cmd->rsp.tag), rc); + + /* if all ok free up the command element resources */ + if (rc == H_SUCCESS) { + /* some movement has occurred */ + vscsi->rsp_q_timer.timer_pops = 0; + list_del(&cmd->list); + + ibmvscsis_free_cmd_resources(vscsi, cmd); + } else { + srp_snd_msg_failed(vscsi, rc); + break; + } + } + + if (!rc) { + /* + * The timer could pop with the queue empty. If + * this happens, rc will always indicate a + * success; clear the pop count. + */ + vscsi->rsp_q_timer.timer_pops = 0; + } + } else { + ibmvscsis_free_cmd_qs(vscsi); + } +} + +/* Called with intr lock held */ +static void ibmvscsis_send_mad_resp(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + struct mad_common *mad = (struct mad_common *)&vio_iu(iue)->mad; + uint flag_bits = 0; + long rc; + + dma_wmb(); + rc = h_copy_rdma(sizeof(struct mad_common), + vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma, + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(crq->IU_data_ptr)); + if (!rc) { + cmd->rsp.format = VIOSRP_MAD_FORMAT; + cmd->rsp.len = sizeof(struct mad_common); + cmd->rsp.tag = mad->tag; + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + } else { + pr_debug("Error sending mad response, rc %ld\n", rc); + if (rc == H_PERMISSION) { + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED); + } + dev_err(&vscsi->dev, "mad: failed to copy to client, rc %ld\n", + rc); + + ibmvscsis_free_cmd_resources(vscsi, cmd); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + } +} + +/** + * ibmvscsis_mad() - Service a MAnagement Data gram. + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to the CRQ entry containing the MAD request + * + * EXECUTION ENVIRONMENT: + * Interrupt called with adapter lock held + */ +static long ibmvscsis_mad(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + struct iu_entry *iue; + struct ibmvscsis_cmd *cmd; + struct mad_common *mad; + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + /* + * We have not exchanged Init Msgs yet, so this MAD was sent + * before the last Transport Event; client will not be + * expecting a response. + */ + case WAIT_CONNECTION: + pr_debug("mad: in Wait Connection state, ignoring MAD, flags %d\n", + vscsi->flags); + return ADAPT_SUCCESS; + + case SRP_PROCESSING: + case CONNECTED: + break; + + /* + * We should never get here while we're in these states. + * Just log an error and get out. + */ + case UNCONFIGURING: + case WAIT_IDLE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + default: + dev_err(&vscsi->dev, "mad: invalid adapter state %d for mad\n", + vscsi->state); + return ADAPT_SUCCESS; + } + + cmd = ibmvscsis_get_free_cmd(vscsi); + if (!cmd) { + dev_err(&vscsi->dev, "mad: failed to get cmd, debit %d\n", + vscsi->debit); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return ERROR; + } + iue = cmd->iue; + cmd->type = ADAPTER_MAD; + + rc = ibmvscsis_copy_crq_packet(vscsi, cmd, crq); + if (!rc) { + mad = (struct mad_common *)&vio_iu(iue)->mad; + + pr_debug("mad: type %d\n", be32_to_cpu(mad->type)); + + if (be16_to_cpu(mad->length) < 0) { + dev_err(&vscsi->dev, "mad: length is < 0\n"); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + rc = SRP_VIOLATION; + } else { + rc = ibmvscsis_process_mad(vscsi, iue); + } + + pr_debug("mad: status %hd, rc %ld\n", be16_to_cpu(mad->status), + rc); + + if (!rc) + ibmvscsis_send_mad_resp(vscsi, cmd, crq); + } else { + ibmvscsis_free_cmd_resources(vscsi, cmd); + } + + pr_debug("Leaving mad, rc %ld\n", rc); + return rc; +} + +/** + * ibmvscsis_login_rsp() - Create/copy a login response notice to the client + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to the command for the SRP Login request + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_login_rsp(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct srp_login_rsp *rsp = &vio_iu(iue)->srp.login_rsp; + struct format_code *fmt; + uint flag_bits = 0; + long rc = ADAPT_SUCCESS; + + memset(rsp, 0, sizeof(struct srp_login_rsp)); + + rsp->opcode = SRP_LOGIN_RSP; + rsp->req_lim_delta = cpu_to_be32(vscsi->request_limit); + rsp->tag = cmd->rsp.tag; + rsp->max_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); + rsp->max_ti_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); + fmt = (struct format_code *)&rsp->buf_fmt; + fmt->buffers = SUPPORTED_FORMATS; + vscsi->credit = 0; + + cmd->rsp.len = sizeof(struct srp_login_rsp); + + dma_wmb(); + rc = h_copy_rdma(cmd->rsp.len, vscsi->dds.window[LOCAL].liobn, + iue->sbuf->dma, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(iue->remote_token)); + + switch (rc) { + case H_SUCCESS: + break; + + case H_PERMISSION: + if (connection_broken(vscsi)) + flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED; + dev_err(&vscsi->dev, "login_rsp: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + break; + case H_SOURCE_PARM: + case H_DEST_PARM: + default: + dev_err(&vscsi->dev, "login_rsp: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_srp_login_rej() - Create/copy a login rejection notice to client + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to the command for the SRP Login request + * @reason: The reason the SRP Login is being rejected, per SRP protocol + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_srp_login_rej(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, u32 reason) +{ + struct iu_entry *iue = cmd->iue; + struct srp_login_rej *rej = &vio_iu(iue)->srp.login_rej; + struct format_code *fmt; + uint flag_bits = 0; + long rc = ADAPT_SUCCESS; + + memset(rej, 0, sizeof(*rej)); + + rej->opcode = SRP_LOGIN_REJ; + rej->reason = cpu_to_be32(reason); + rej->tag = cmd->rsp.tag; + fmt = (struct format_code *)&rej->buf_fmt; + fmt->buffers = SUPPORTED_FORMATS; + + cmd->rsp.len = sizeof(*rej); + + dma_wmb(); + rc = h_copy_rdma(cmd->rsp.len, vscsi->dds.window[LOCAL].liobn, + iue->sbuf->dma, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(iue->remote_token)); + + switch (rc) { + case H_SUCCESS: + break; + case H_PERMISSION: + if (connection_broken(vscsi)) + flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED; + dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + break; + case H_SOURCE_PARM: + case H_DEST_PARM: + default: + dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +static int ibmvscsis_make_nexus(struct ibmvscsis_tport *tport) +{ + char *name = tport->tport_name; + struct ibmvscsis_nexus *nexus; + int rc; + + if (tport->ibmv_nexus) { + pr_debug("tport->ibmv_nexus already exists\n"); + return 0; + } + + nexus = kzalloc(sizeof(*nexus), GFP_KERNEL); + if (!nexus) { + pr_err("Unable to allocate struct ibmvscsis_nexus\n"); + return -ENOMEM; + } + + nexus->se_sess = target_alloc_session(&tport->se_tpg, 0, 0, + TARGET_PROT_NORMAL, name, nexus, + NULL); + if (IS_ERR(nexus->se_sess)) { + rc = PTR_ERR(nexus->se_sess); + goto transport_init_fail; + } + + tport->ibmv_nexus = nexus; + + return 0; + +transport_init_fail: + kfree(nexus); + return rc; +} + +static int ibmvscsis_drop_nexus(struct ibmvscsis_tport *tport) +{ + struct se_session *se_sess; + struct ibmvscsis_nexus *nexus; + + nexus = tport->ibmv_nexus; + if (!nexus) + return -ENODEV; + + se_sess = nexus->se_sess; + if (!se_sess) + return -ENODEV; + + /* + * Release the SCSI I_T Nexus to the emulated ibmvscsis Target Port + */ + transport_deregister_session(se_sess); + tport->ibmv_nexus = NULL; + kfree(nexus); + + return 0; +} + +/** + * ibmvscsis_srp_login() - Process an SRP Login Request + * @vscsi: Pointer to our adapter structure + * @cmd: Command element to use to process the SRP Login request + * @crq: Pointer to CRQ entry containing the SRP Login request + * + * EXECUTION ENVIRONMENT: + * Interrupt, called with interrupt lock held + */ +static long ibmvscsis_srp_login(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + struct srp_login_req *req = &vio_iu(iue)->srp.login_req; + struct port_id { + __be64 id_extension; + __be64 io_guid; + } *iport, *tport; + struct format_code *fmt; + u32 reason = 0x0; + long rc = ADAPT_SUCCESS; + + iport = (struct port_id *)req->initiator_port_id; + tport = (struct port_id *)req->target_port_id; + fmt = (struct format_code *)&req->req_buf_fmt; + if (be32_to_cpu(req->req_it_iu_len) > SRP_MAX_IU_LEN) + reason = SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE; + else if (be32_to_cpu(req->req_it_iu_len) < 64) + reason = SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL; + else if ((be64_to_cpu(iport->id_extension) > (MAX_NUM_PORTS - 1)) || + (be64_to_cpu(tport->id_extension) > (MAX_NUM_PORTS - 1))) + reason = SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL; + else if (req->req_flags & SRP_MULTICHAN_MULTI) + reason = SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED; + else if (fmt->buffers & (~SUPPORTED_FORMATS)) + reason = SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT; + else if ((fmt->buffers | SUPPORTED_FORMATS) == 0) + reason = SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT; + + if (vscsi->state == SRP_PROCESSING) + reason = SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED; + + rc = ibmvscsis_make_nexus(&vscsi->tport); + if (rc) + reason = SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL; + + cmd->rsp.format = VIOSRP_SRP_FORMAT; + cmd->rsp.tag = req->tag; + + pr_debug("srp_login: reason 0x%x\n", reason); + + if (reason) + rc = ibmvscsis_srp_login_rej(vscsi, cmd, reason); + else + rc = ibmvscsis_login_rsp(vscsi, cmd); + + if (!rc) { + if (!reason) + vscsi->state = SRP_PROCESSING; + + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + } else { + ibmvscsis_free_cmd_resources(vscsi, cmd); + } + + pr_debug("Leaving srp_login, rc %ld\n", rc); + return rc; +} + +/** + * ibmvscsis_srp_i_logout() - Helper Function to close I_T Nexus + * @vscsi: Pointer to our adapter structure + * @cmd: Command element to use to process the Implicit Logout request + * @crq: Pointer to CRQ entry containing the Implicit Logout request + * + * Do the logic to close the I_T nexus. This function may not + * behave to specification. + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_srp_i_logout(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + struct srp_i_logout *log_out = &vio_iu(iue)->srp.i_logout; + long rc = ADAPT_SUCCESS; + + if ((vscsi->debit > 0) || !list_empty(&vscsi->schedule_q) || + !list_empty(&vscsi->waiting_rsp)) { + dev_err(&vscsi->dev, "i_logout: outstanding work\n"); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + } else { + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.tag = log_out->tag; + cmd->rsp.len = sizeof(struct mad_common); + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + + ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, 0); + } + + return rc; +} + +/* Called with intr lock held */ +static void ibmvscsis_srp_cmd(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + struct ibmvscsis_cmd *cmd; + struct iu_entry *iue; + struct srp_cmd *srp; + struct srp_tsk_mgmt *tsk; + long rc; + + if (vscsi->request_limit - vscsi->debit <= 0) { + /* Client has exceeded request limit */ + dev_err(&vscsi->dev, "Client exceeded the request limit (%d), debit %d\n", + vscsi->request_limit, vscsi->debit); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return; + } + + cmd = ibmvscsis_get_free_cmd(vscsi); + if (!cmd) { + dev_err(&vscsi->dev, "srp_cmd failed to get cmd, debit %d\n", + vscsi->debit); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return; + } + iue = cmd->iue; + srp = &vio_iu(iue)->srp.cmd; + + rc = ibmvscsis_copy_crq_packet(vscsi, cmd, crq); + if (rc) { + ibmvscsis_free_cmd_resources(vscsi, cmd); + return; + } + + if (vscsi->state == SRP_PROCESSING) { + switch (srp->opcode) { + case SRP_LOGIN_REQ: + rc = ibmvscsis_srp_login(vscsi, cmd, crq); + break; + + case SRP_TSK_MGMT: + tsk = &vio_iu(iue)->srp.tsk_mgmt; + pr_debug("tsk_mgmt tag: %llu (0x%llx)\n", tsk->tag, + tsk->tag); + cmd->rsp.tag = tsk->tag; + vscsi->debit += 1; + cmd->type = TASK_MANAGEMENT; + list_add_tail(&cmd->list, &vscsi->schedule_q); + queue_work(vscsi->work_q, &cmd->work); + break; + + case SRP_CMD: + pr_debug("srp_cmd tag: %llu (0x%llx)\n", srp->tag, + srp->tag); + cmd->rsp.tag = srp->tag; + vscsi->debit += 1; + cmd->type = SCSI_CDB; + /* + * We want to keep track of work waiting for + * the workqueue. + */ + list_add_tail(&cmd->list, &vscsi->schedule_q); + queue_work(vscsi->work_q, &cmd->work); + break; + + case SRP_I_LOGOUT: + rc = ibmvscsis_srp_i_logout(vscsi, cmd, crq); + break; + + case SRP_CRED_RSP: + case SRP_AER_RSP: + default: + ibmvscsis_free_cmd_resources(vscsi, cmd); + dev_err(&vscsi->dev, "invalid srp cmd, opcode %d\n", + (uint)srp->opcode); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + } + } else if (srp->opcode == SRP_LOGIN_REQ && vscsi->state == CONNECTED) { + rc = ibmvscsis_srp_login(vscsi, cmd, crq); + } else { + ibmvscsis_free_cmd_resources(vscsi, cmd); + dev_err(&vscsi->dev, "Invalid state %d to handle srp cmd\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + } +} + +/** + * ibmvscsis_ping_response() - Respond to a ping request + * @vscsi: Pointer to our adapter structure + * + * Let the client know that the server is alive and waiting on + * its native I/O stack. + * If any type of error occurs from the call to queue a ping + * response then the client is either not accepting or receiving + * interrupts. Disconnect with an error. + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_ping_response(struct scsi_info *vscsi) +{ + struct viosrp_crq *crq; + u64 buffer[2] = { 0, 0 }; + long rc; + + crq = (struct viosrp_crq *)&buffer; + crq->valid = VALID_CMD_RESP_EL; + crq->format = (u8)MESSAGE_IN_CRQ; + crq->status = PING_RESPONSE; + + rc = h_send_crq(vscsi->dds.unit_id, cpu_to_be64(buffer[MSG_HI]), + cpu_to_be64(buffer[MSG_LOW])); + + switch (rc) { + case H_SUCCESS: + break; + case H_CLOSED: + vscsi->flags |= CLIENT_FAILED; + case H_DROPPED: + vscsi->flags |= RESPONSE_Q_DOWN; + case H_REMOTE_PARM: + dev_err(&vscsi->dev, "ping_response: h_send_crq failed, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + default: + dev_err(&vscsi->dev, "ping_response: h_send_crq returned unknown rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_handle_init_compl_msg() - Respond to an Init Complete Message + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_handle_init_compl_msg(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + case NO_QUEUE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case ERR_DISCONNECTED: + case UNCONFIGURING: + case UNDEFINED: + rc = ERROR; + break; + + case WAIT_CONNECTION: + vscsi->state = CONNECTED; + break; + + case WAIT_IDLE: + case SRP_PROCESSING: + case CONNECTED: + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + default: + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid state %d to get init compl msg\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_handle_init_msg() - Respond to an Init Message + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + case WAIT_ENABLED: + vscsi->state = PART_UP_WAIT_ENAB; + break; + + case WAIT_CONNECTION: + rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); + switch (rc) { + case H_SUCCESS: + vscsi->state = CONNECTED; + break; + + case H_PARAMETER: + dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + break; + + case H_DROPPED: + dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", + rc); + rc = ERROR; + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + + case H_CLOSED: + pr_warn("init_msg: failed to send, rc %ld\n", rc); + rc = 0; + break; + } + break; + + case UNDEFINED: + rc = ERROR; + break; + + case UNCONFIGURING: + break; + + case PART_UP_WAIT_ENAB: + case CONNECTED: + case SRP_PROCESSING: + case WAIT_IDLE: + case NO_QUEUE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case ERR_DISCONNECTED: + default: + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid state %d to get init msg\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_init_msg() - Respond to an init message + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ element containing the Init Message + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_init_msg(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + pr_debug("init_msg: state 0x%hx\n", vscsi->state); + + rc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO, + (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0, + 0); + if (rc == H_SUCCESS) { + vscsi->client_data.partition_number = + be64_to_cpu(*(u64 *)vscsi->map_buf); + pr_debug("init_msg, part num %d\n", + vscsi->client_data.partition_number); + } else { + pr_debug("init_msg h_vioctl rc %ld\n", rc); + rc = ADAPT_SUCCESS; + } + + if (crq->format == INIT_MSG) { + rc = ibmvscsis_handle_init_msg(vscsi); + } else if (crq->format == INIT_COMPLETE_MSG) { + rc = ibmvscsis_handle_init_compl_msg(vscsi); + } else { + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + } + + return rc; +} + +/** + * ibmvscsis_parse_command() - Parse an element taken from the cmd rsp queue. + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ element containing the SRP request + * + * This function will return success if the command queue element is valid + * and the srp iu or MAD request it pointed to was also valid. That does + * not mean that an error was not returned to the client. + * + * EXECUTION ENVIRONMENT: + * Interrupt, intr lock held + */ +static long ibmvscsis_parse_command(struct scsi_info *vscsi, + struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + switch (crq->valid) { + case VALID_CMD_RESP_EL: + switch (crq->format) { + case OS400_FORMAT: + case AIX_FORMAT: + case LINUX_FORMAT: + case MAD_FORMAT: + if (vscsi->flags & PROCESSING_MAD) { + rc = ERROR; + dev_err(&vscsi->dev, "parse_command: already processing mad\n"); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + 0); + } else { + vscsi->flags |= PROCESSING_MAD; + rc = ibmvscsis_mad(vscsi, crq); + } + break; + + case SRP_FORMAT: + ibmvscsis_srp_cmd(vscsi, crq); + break; + + case MESSAGE_IN_CRQ: + if (crq->status == PING) + ibmvscsis_ping_response(vscsi); + break; + + default: + dev_err(&vscsi->dev, "parse_command: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + } + break; + + case VALID_TRANS_EVENT: + rc = ibmvscsis_trans_event(vscsi, crq); + break; + + case VALID_INIT_MSG: + rc = ibmvscsis_init_msg(vscsi, crq); + break; + + default: + dev_err(&vscsi->dev, "parse_command: invalid valid field %d\n", + (uint)crq->valid); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + /* + * Return only what the interrupt handler cares + * about. Most errors we keep right on trucking. + */ + rc = vscsi->flags & SCHEDULE_DISCONNECT; + + return rc; +} + +static int read_dma_window(struct scsi_info *vscsi) +{ + struct vio_dev *vdev = vscsi->dma_dev; + const __be32 *dma_window; + const __be32 *prop; + + /* TODO Using of_parse_dma_window would be better, but it doesn't give + * a way to read multiple windows without already knowing the size of + * a window or the number of windows. + */ + dma_window = (const __be32 *)vio_get_attribute(vdev, + "ibm,my-dma-window", + NULL); + if (!dma_window) { + pr_err("Couldn't find ibm,my-dma-window property\n"); + return -1; + } + + vscsi->dds.window[LOCAL].liobn = be32_to_cpu(*dma_window); + dma_window++; + + prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells", + NULL); + if (!prop) { + pr_warn("Couldn't find ibm,#dma-address-cells property\n"); + dma_window++; + } else { + dma_window += be32_to_cpu(*prop); + } + + prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells", + NULL); + if (!prop) { + pr_warn("Couldn't find ibm,#dma-size-cells property\n"); + dma_window++; + } else { + dma_window += be32_to_cpu(*prop); + } + + /* dma_window should point to the second window now */ + vscsi->dds.window[REMOTE].liobn = be32_to_cpu(*dma_window); + + return 0; +} + +static struct ibmvscsis_tport *ibmvscsis_lookup_port(const char *name) +{ + struct ibmvscsis_tport *tport = NULL; + struct vio_dev *vdev; + struct scsi_info *vscsi; + + spin_lock_bh(&ibmvscsis_dev_lock); + list_for_each_entry(vscsi, &ibmvscsis_dev_list, list) { + vdev = vscsi->dma_dev; + if (!strcmp(dev_name(&vdev->dev), name)) { + tport = &vscsi->tport; + break; + } + } + spin_unlock_bh(&ibmvscsis_dev_lock); + + return tport; +} + +/** + * ibmvscsis_parse_cmd() - Parse SRP Command + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command element with SRP command + * + * Parse the srp command; if it is valid then submit it to tcm. + * Note: The return code does not reflect the status of the SCSI CDB. + * + * EXECUTION ENVIRONMENT: + * Process level + */ +static void ibmvscsis_parse_cmd(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf; + struct ibmvscsis_nexus *nexus; + u64 data_len = 0; + enum dma_data_direction dir; + int attr = 0; + int rc = 0; + + nexus = vscsi->tport.ibmv_nexus; + /* + * additional length in bytes. Note that the SRP spec says that + * additional length is in 4-byte words, but technically the + * additional length field is only the upper 6 bits of the byte. + * The lower 2 bits are reserved. If the lower 2 bits are 0 (as + * all reserved fields should be), then interpreting the byte as + * an int will yield the length in bytes. + */ + if (srp->add_cdb_len & 0x03) { + dev_err(&vscsi->dev, "parse_cmd: reserved bits set in IU\n"); + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + ibmvscsis_free_cmd_resources(vscsi, cmd); + spin_unlock_bh(&vscsi->intr_lock); + return; + } + + if (srp_get_desc_table(srp, &dir, &data_len)) { + dev_err(&vscsi->dev, "0x%llx: parsing SRP descriptor table failed.\n", + srp->tag); + goto fail; + return; + } + + cmd->rsp.sol_not = srp->sol_not; + + switch (srp->task_attr) { + case SRP_SIMPLE_TASK: + attr = TCM_SIMPLE_TAG; + break; + case SRP_ORDERED_TASK: + attr = TCM_ORDERED_TAG; + break; + case SRP_HEAD_TASK: + attr = TCM_HEAD_TAG; + break; + case SRP_ACA_TASK: + attr = TCM_ACA_TAG; + break; + default: + dev_err(&vscsi->dev, "Invalid task attribute %d\n", + srp->task_attr); + goto fail; + } + + cmd->se_cmd.tag = be64_to_cpu(srp->tag); + + spin_lock_bh(&vscsi->intr_lock); + list_add_tail(&cmd->list, &vscsi->active_q); + spin_unlock_bh(&vscsi->intr_lock); + + srp->lun.scsi_lun[0] &= 0x3f; + + pr_debug("calling submit_cmd, se_cmd %p, lun 0x%llx, cdb 0x%x, attr:%d\n", + &cmd->se_cmd, scsilun_to_int(&srp->lun), (int)srp->cdb[0], + attr); + + rc = target_submit_cmd(&cmd->se_cmd, nexus->se_sess, srp->cdb, + cmd->sense_buf, scsilun_to_int(&srp->lun), + data_len, attr, dir, 0); + if (rc) { + dev_err(&vscsi->dev, "target_submit_cmd failed, rc %d\n", rc); + goto fail; + } + return; + +fail: + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + spin_unlock_bh(&vscsi->intr_lock); +} + +/** + * ibmvscsis_parse_task() - Parse SRP Task Management Request + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command element with SRP task management request + * + * Parse the srp task management request; if it is valid then submit it to tcm. + * Note: The return code does not reflect the status of the task management + * request. + * + * EXECUTION ENVIRONMENT: + * Processor level + */ +static void ibmvscsis_parse_task(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt; + int tcm_type; + u64 tag_to_abort = 0; + int rc = 0; + struct ibmvscsis_nexus *nexus; + + nexus = vscsi->tport.ibmv_nexus; + + cmd->rsp.sol_not = srp_tsk->sol_not; + + switch (srp_tsk->tsk_mgmt_func) { + case SRP_TSK_ABORT_TASK: + tcm_type = TMR_ABORT_TASK; + tag_to_abort = be64_to_cpu(srp_tsk->task_tag); + break; + case SRP_TSK_ABORT_TASK_SET: + tcm_type = TMR_ABORT_TASK_SET; + break; + case SRP_TSK_CLEAR_TASK_SET: + tcm_type = TMR_CLEAR_TASK_SET; + break; + case SRP_TSK_LUN_RESET: + tcm_type = TMR_LUN_RESET; + break; + case SRP_TSK_CLEAR_ACA: + tcm_type = TMR_CLEAR_ACA; + break; + default: + dev_err(&vscsi->dev, "unknown task mgmt func %d\n", + srp_tsk->tsk_mgmt_func); + cmd->se_cmd.se_tmr_req->response = + TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; + rc = -1; + break; + } + + if (!rc) { + cmd->se_cmd.tag = be64_to_cpu(srp_tsk->tag); + + spin_lock_bh(&vscsi->intr_lock); + list_add_tail(&cmd->list, &vscsi->active_q); + spin_unlock_bh(&vscsi->intr_lock); + + srp_tsk->lun.scsi_lun[0] &= 0x3f; + + pr_debug("calling submit_tmr, func %d\n", + srp_tsk->tsk_mgmt_func); + rc = target_submit_tmr(&cmd->se_cmd, nexus->se_sess, NULL, + scsilun_to_int(&srp_tsk->lun), srp_tsk, + tcm_type, GFP_KERNEL, tag_to_abort, 0); + if (rc) { + dev_err(&vscsi->dev, "target_submit_tmr failed, rc %d\n", + rc); + cmd->se_cmd.se_tmr_req->response = + TMR_FUNCTION_REJECTED; + } + } + + if (rc) + transport_send_check_condition_and_sense(&cmd->se_cmd, 0, 0); +} + +static void ibmvscsis_scheduler(struct work_struct *work) +{ + struct ibmvscsis_cmd *cmd = container_of(work, struct ibmvscsis_cmd, + work); + struct scsi_info *vscsi = cmd->adapter; + + spin_lock_bh(&vscsi->intr_lock); + + /* Remove from schedule_q */ + list_del(&cmd->list); + + /* Don't submit cmd if we're disconnecting */ + if (vscsi->flags & (SCHEDULE_DISCONNECT | DISCONNECT_SCHEDULED)) { + ibmvscsis_free_cmd_resources(vscsi, cmd); + + /* ibmvscsis_disconnect might be waiting for us */ + if (list_empty(&vscsi->active_q) && + list_empty(&vscsi->schedule_q) && + (vscsi->flags & WAIT_FOR_IDLE)) { + vscsi->flags &= ~WAIT_FOR_IDLE; + complete(&vscsi->wait_idle); + } + + spin_unlock_bh(&vscsi->intr_lock); + return; + } + + spin_unlock_bh(&vscsi->intr_lock); + + switch (cmd->type) { + case SCSI_CDB: + ibmvscsis_parse_cmd(vscsi, cmd); + break; + case TASK_MANAGEMENT: + ibmvscsis_parse_task(vscsi, cmd); + break; + default: + dev_err(&vscsi->dev, "scheduler, invalid cmd type %d\n", + cmd->type); + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_free_cmd_resources(vscsi, cmd); + spin_unlock_bh(&vscsi->intr_lock); + break; + } +} + +static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num) +{ + struct ibmvscsis_cmd *cmd; + int i; + + INIT_LIST_HEAD(&vscsi->free_cmd); + vscsi->cmd_pool = kcalloc(num, sizeof(struct ibmvscsis_cmd), + GFP_KERNEL); + if (!vscsi->cmd_pool) + return -ENOMEM; + + for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num; + i++, cmd++) { + cmd->adapter = vscsi; + INIT_WORK(&cmd->work, ibmvscsis_scheduler); + list_add_tail(&cmd->list, &vscsi->free_cmd); + } + + return 0; +} + +static void ibmvscsis_free_cmds(struct scsi_info *vscsi) +{ + kfree(vscsi->cmd_pool); + vscsi->cmd_pool = NULL; + INIT_LIST_HEAD(&vscsi->free_cmd); +} + +/** + * ibmvscsis_service_wait_q() - Service Waiting Queue + * @timer: Pointer to timer which has expired + * + * This routine is called when the timer pops to service the waiting + * queue. Elements on the queue have completed, their responses have been + * copied to the client, but the client's response queue was full so + * the queue message could not be sent. The routine grabs the proper locks + * and calls send messages. + * + * EXECUTION ENVIRONMENT: + * called at interrupt level + */ +static enum hrtimer_restart ibmvscsis_service_wait_q(struct hrtimer *timer) +{ + struct timer_cb *p_timer = container_of(timer, struct timer_cb, timer); + struct scsi_info *vscsi = container_of(p_timer, struct scsi_info, + rsp_q_timer); + + spin_lock_bh(&vscsi->intr_lock); + p_timer->timer_pops += 1; + p_timer->started = false; + ibmvscsis_send_messages(vscsi); + spin_unlock_bh(&vscsi->intr_lock); + + return HRTIMER_NORESTART; +} + +static long ibmvscsis_alloctimer(struct scsi_info *vscsi) +{ + struct timer_cb *p_timer; + + p_timer = &vscsi->rsp_q_timer; + hrtimer_init(&p_timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + p_timer->timer.function = ibmvscsis_service_wait_q; + p_timer->started = false; + p_timer->timer_pops = 0; + + return ADAPT_SUCCESS; +} + +static void ibmvscsis_freetimer(struct scsi_info *vscsi) +{ + struct timer_cb *p_timer; + + p_timer = &vscsi->rsp_q_timer; + + (void)hrtimer_cancel(&p_timer->timer); + + p_timer->started = false; + p_timer->timer_pops = 0; +} + +static irqreturn_t ibmvscsis_interrupt(int dummy, void *data) +{ + struct scsi_info *vscsi = data; + + vio_disable_interrupts(vscsi->dma_dev); + tasklet_schedule(&vscsi->work_task); + + return IRQ_HANDLED; +} + +/** + * ibmvscsis_check_q() - Helper function to Check Init Message Valid + * @vscsi: Pointer to our adapter structure + * + * Checks if a initialize message was queued by the initiatior + * while the timing window was open. This function is called from + * probe after the CRQ is created and interrupts are enabled. + * It would only be used by adapters who wait for some event before + * completing the init handshake with the client. For ibmvscsi, this + * event is waiting for the port to be enabled. + * + * EXECUTION ENVIRONMENT: + * Process level only, interrupt lock held + */ +static long ibmvscsis_check_q(struct scsi_info *vscsi) +{ + uint format; + long rc; + + rc = ibmvscsis_check_init_msg(vscsi, &format); + if (rc) + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + else if (format == UNUSED_FORMAT) + vscsi->state = WAIT_ENABLED; + else + vscsi->state = PART_UP_WAIT_ENAB; + + return rc; +} + +/** + * ibmvscsis_enable_change_state() - Set new state based on enabled status + * @vscsi: Pointer to our adapter structure + * + * This function determines our new state now that we are enabled. This + * may involve sending an Init Complete message to the client. + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_enable_change_state(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + +handle_state_change: + switch (vscsi->state) { + case WAIT_ENABLED: + rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); + switch (rc) { + case H_SUCCESS: + case H_DROPPED: + case H_CLOSED: + vscsi->state = WAIT_CONNECTION; + rc = ADAPT_SUCCESS; + break; + + case H_PARAMETER: + break; + + case H_HARDWARE: + break; + + default: + vscsi->state = UNDEFINED; + rc = H_HARDWARE; + break; + } + break; + case PART_UP_WAIT_ENAB: + rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); + switch (rc) { + case H_SUCCESS: + vscsi->state = CONNECTED; + rc = ADAPT_SUCCESS; + break; + + case H_DROPPED: + case H_CLOSED: + vscsi->state = WAIT_ENABLED; + goto handle_state_change; + + case H_PARAMETER: + break; + + case H_HARDWARE: + break; + + default: + rc = H_HARDWARE; + break; + } + break; + + case WAIT_CONNECTION: + case WAIT_IDLE: + case SRP_PROCESSING: + case CONNECTED: + rc = ADAPT_SUCCESS; + break; + /* should not be able to get here */ + case UNCONFIGURING: + rc = ERROR; + vscsi->state = UNDEFINED; + break; + + /* driver should never allow this to happen */ + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + default: + dev_err(&vscsi->dev, "in invalid state %d during enable_change_state\n", + vscsi->state); + rc = ADAPT_SUCCESS; + break; + } + + return rc; +} + +/** + * ibmvscsis_create_command_q() - Create Command Queue + * @vscsi: Pointer to our adapter structure + * @num_cmds: Currently unused. In the future, may be used to determine + * the size of the CRQ. + * + * Allocates memory for command queue maps remote memory into an ioba + * initializes the command response queue + * + * EXECUTION ENVIRONMENT: + * Process level only + */ +static long ibmvscsis_create_command_q(struct scsi_info *vscsi, int num_cmds) +{ + long rc = 0; + int pages; + struct vio_dev *vdev = vscsi->dma_dev; + + /* We might support multiple pages in the future, but just 1 for now */ + pages = 1; + + vscsi->cmd_q.size = pages; + + vscsi->cmd_q.base_addr = + (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL); + if (!vscsi->cmd_q.base_addr) + return -ENOMEM; + + vscsi->cmd_q.mask = ((uint)pages * CRQ_PER_PAGE) - 1; + + vscsi->cmd_q.crq_token = dma_map_single(&vdev->dev, + vscsi->cmd_q.base_addr, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vdev->dev, vscsi->cmd_q.crq_token)) { + free_page((unsigned long)vscsi->cmd_q.base_addr); + return -ENOMEM; + } + + rc = h_reg_crq(vscsi->dds.unit_id, vscsi->cmd_q.crq_token, PAGE_SIZE); + if (rc) { + if (rc == H_CLOSED) { + vscsi->state = WAIT_ENABLED; + rc = 0; + } else { + dma_unmap_single(&vdev->dev, vscsi->cmd_q.crq_token, + PAGE_SIZE, DMA_BIDIRECTIONAL); + free_page((unsigned long)vscsi->cmd_q.base_addr); + rc = -ENODEV; + } + } else { + vscsi->state = WAIT_ENABLED; + } + + return rc; +} + +/** + * ibmvscsis_destroy_command_q - Destroy Command Queue + * @vscsi: Pointer to our adapter structure + * + * Releases memory for command queue and unmaps mapped remote memory. + * + * EXECUTION ENVIRONMENT: + * Process level only + */ +static void ibmvscsis_destroy_command_q(struct scsi_info *vscsi) +{ + dma_unmap_single(&vscsi->dma_dev->dev, vscsi->cmd_q.crq_token, + PAGE_SIZE, DMA_BIDIRECTIONAL); + free_page((unsigned long)vscsi->cmd_q.base_addr); + vscsi->cmd_q.base_addr = NULL; + vscsi->state = NO_QUEUE; +} + +static u8 ibmvscsis_fast_fail(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf; + struct scsi_sense_hdr sshdr; + u8 rc = se_cmd->scsi_status; + + if (vscsi->fast_fail && (READ_CMD(srp->cdb) || WRITE_CMD(srp->cdb))) + if (scsi_normalize_sense(se_cmd->sense_buffer, + se_cmd->scsi_sense_length, &sshdr)) + if (sshdr.sense_key == HARDWARE_ERROR && + (se_cmd->residual_count == 0 || + se_cmd->residual_count == se_cmd->data_length)) { + rc = NO_SENSE; + cmd->flags |= CMD_FAST_FAIL; + } + + return rc; +} + +/** + * srp_build_response() - Build an SRP response buffer + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command for which to send the response + * @len_p: Where to return the length of the IU response sent. This + * is needed to construct the CRQ response. + * + * Build the SRP response buffer and copy it to the client's memory space. + */ +static long srp_build_response(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, uint *len_p) +{ + struct iu_entry *iue = cmd->iue; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct srp_rsp *rsp; + uint len; + u32 rsp_code; + char *data; + u32 *tsk_status; + long rc = ADAPT_SUCCESS; + + spin_lock_bh(&vscsi->intr_lock); + + rsp = &vio_iu(iue)->srp.rsp; + len = sizeof(*rsp); + memset(rsp, 0, len); + data = rsp->data; + + rsp->opcode = SRP_RSP; + + if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING) + rsp->req_lim_delta = cpu_to_be32(vscsi->credit); + else + rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); + rsp->tag = cmd->rsp.tag; + rsp->flags = 0; + + if (cmd->type == SCSI_CDB) { + rsp->status = ibmvscsis_fast_fail(vscsi, cmd); + if (rsp->status) { + pr_debug("build_resp: cmd %p, scsi status %d\n", cmd, + (int)rsp->status); + ibmvscsis_determine_resid(se_cmd, rsp); + if (se_cmd->scsi_sense_length && se_cmd->sense_buffer) { + rsp->sense_data_len = + cpu_to_be32(se_cmd->scsi_sense_length); + rsp->flags |= SRP_RSP_FLAG_SNSVALID; + len += se_cmd->scsi_sense_length; + memcpy(data, se_cmd->sense_buffer, + se_cmd->scsi_sense_length); + } + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + } else if (cmd->flags & CMD_FAST_FAIL) { + pr_debug("build_resp: cmd %p, fast fail\n", cmd); + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + } else { + rsp->sol_not = (cmd->rsp.sol_not & SCSOLNT) >> + SCSOLNT_RESP_SHIFT; + } + } else { + /* this is task management */ + rsp->status = 0; + rsp->resp_data_len = cpu_to_be32(4); + rsp->flags |= SRP_RSP_FLAG_RSPVALID; + + switch (se_cmd->se_tmr_req->response) { + case TMR_FUNCTION_COMPLETE: + case TMR_TASK_DOES_NOT_EXIST: + rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_COMPLETE; + rsp->sol_not = (cmd->rsp.sol_not & SCSOLNT) >> + SCSOLNT_RESP_SHIFT; + break; + case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: + case TMR_LUN_DOES_NOT_EXIST: + rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_NOT_SUPPORTED; + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + break; + case TMR_FUNCTION_FAILED: + case TMR_FUNCTION_REJECTED: + default: + rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_FAILED; + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + break; + } + + tsk_status = (u32 *)data; + *tsk_status = cpu_to_be32(rsp_code); + data = (char *)(tsk_status + 1); + len += 4; + } + + dma_wmb(); + rc = h_copy_rdma(len, vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma, + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(iue->remote_token)); + + switch (rc) { + case H_SUCCESS: + vscsi->credit = 0; + *len_p = len; + break; + case H_PERMISSION: + if (connection_broken(vscsi)) + vscsi->flags |= RESPONSE_Q_DOWN | CLIENT_FAILED; + + dev_err(&vscsi->dev, "build_response: error copying to client, rc %ld, flags 0x%x, state 0x%hx\n", + rc, vscsi->flags, vscsi->state); + break; + case H_SOURCE_PARM: + case H_DEST_PARM: + default: + dev_err(&vscsi->dev, "build_response: error copying to client, rc %ld\n", + rc); + break; + } + + spin_unlock_bh(&vscsi->intr_lock); + + return rc; +} + +static int ibmvscsis_rdma(struct ibmvscsis_cmd *cmd, struct scatterlist *sg, + int nsg, struct srp_direct_buf *md, int nmd, + enum dma_data_direction dir, unsigned int bytes) +{ + struct iu_entry *iue = cmd->iue; + struct srp_target *target = iue->target; + struct scsi_info *vscsi = target->ldata; + struct scatterlist *sgp; + dma_addr_t client_ioba, server_ioba; + ulong buf_len; + ulong client_len, server_len; + int md_idx; + long tx_len; + long rc = 0; + + pr_debug("rdma: dir %d, bytes 0x%x\n", dir, bytes); + + if (bytes == 0) + return 0; + + sgp = sg; + client_len = 0; + server_len = 0; + md_idx = 0; + tx_len = bytes; + + do { + if (client_len == 0) { + if (md_idx >= nmd) { + dev_err(&vscsi->dev, "rdma: ran out of client memory descriptors\n"); + rc = -EIO; + break; + } + client_ioba = be64_to_cpu(md[md_idx].va); + client_len = be32_to_cpu(md[md_idx].len); + } + if (server_len == 0) { + if (!sgp) { + dev_err(&vscsi->dev, "rdma: ran out of scatter/gather list\n"); + rc = -EIO; + break; + } + server_ioba = sg_dma_address(sgp); + server_len = sg_dma_len(sgp); + } + + buf_len = tx_len; + + if (buf_len > client_len) + buf_len = client_len; + + if (buf_len > server_len) + buf_len = server_len; + + if (buf_len > max_vdma_size) + buf_len = max_vdma_size; + + if (dir == DMA_TO_DEVICE) { + /* read from client */ + rc = h_copy_rdma(buf_len, + vscsi->dds.window[REMOTE].liobn, + client_ioba, + vscsi->dds.window[LOCAL].liobn, + server_ioba); + } else { + /* write to client */ + struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf; + + if (!READ_CMD(srp->cdb)) + print_hex_dump_bytes(" data:", DUMP_PREFIX_NONE, + sg_virt(sgp), buf_len); + /* The h_copy_rdma will cause phyp, running in another + * partition, to read memory, so we need to make sure + * the data has been written out, hence these syncs. + */ + /* ensure that everything is in memory */ + isync(); + /* ensure that memory has been made visible */ + dma_wmb(); + rc = h_copy_rdma(buf_len, + vscsi->dds.window[LOCAL].liobn, + server_ioba, + vscsi->dds.window[REMOTE].liobn, + client_ioba); + } + switch (rc) { + case H_SUCCESS: + break; + case H_PERMISSION: + case H_SOURCE_PARM: + case H_DEST_PARM: + if (connection_broken(vscsi)) { + spin_lock_bh(&vscsi->intr_lock); + vscsi->flags |= + (RESPONSE_Q_DOWN | CLIENT_FAILED); + spin_unlock_bh(&vscsi->intr_lock); + } + dev_err(&vscsi->dev, "rdma: h_copy_rdma failed, rc %ld\n", + rc); + break; + + default: + dev_err(&vscsi->dev, "rdma: unknown error %ld from h_copy_rdma\n", + rc); + break; + } + + if (!rc) { + tx_len -= buf_len; + if (tx_len) { + client_len -= buf_len; + if (client_len == 0) + md_idx++; + else + client_ioba += buf_len; + + server_len -= buf_len; + if (server_len == 0) + sgp = sg_next(sgp); + else + server_ioba += buf_len; + } else { + break; + } + } + } while (!rc); + + return rc; +} + +/** + * ibmvscsis_handle_crq() - Handle CRQ + * @data: Pointer to our adapter structure + * + * Read the command elements from the command queue and copy the payloads + * associated with the command elements to local memory and execute the + * SRP requests. + * + * Note: this is an edge triggered interrupt. It can not be shared. + */ +static void ibmvscsis_handle_crq(unsigned long data) +{ + struct scsi_info *vscsi = (struct scsi_info *)data; + struct viosrp_crq *crq; + long rc; + bool ack = true; + volatile u8 valid; + + spin_lock_bh(&vscsi->intr_lock); + + pr_debug("got interrupt\n"); + + /* + * if we are in a path where we are waiting for all pending commands + * to complete because we received a transport event and anything in + * the command queue is for a new connection, do nothing + */ + if (TARGET_STOP(vscsi)) { + vio_enable_interrupts(vscsi->dma_dev); + + pr_debug("handle_crq, don't process: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + spin_unlock_bh(&vscsi->intr_lock); + return; + } + + rc = vscsi->flags & SCHEDULE_DISCONNECT; + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + + while (valid) { + /* + * These are edege triggered interrupts. After dropping out of + * the while loop, the code must check for work since an + * interrupt could be lost, and an elment be left on the queue, + * hence the label. + */ +cmd_work: + vscsi->cmd_q.index = + (vscsi->cmd_q.index + 1) & vscsi->cmd_q.mask; + + if (!rc) { + rc = ibmvscsis_parse_command(vscsi, crq); + } else { + if ((uint)crq->valid == VALID_TRANS_EVENT) { + /* + * must service the transport layer events even + * in an error state, dont break out until all + * the consecutive transport events have been + * processed + */ + rc = ibmvscsis_trans_event(vscsi, crq); + } else if (vscsi->flags & TRANS_EVENT) { + /* + * if a tranport event has occurred leave + * everything but transport events on the queue + */ + pr_debug("handle_crq, ignoring\n"); + + /* + * need to decrement the queue index so we can + * look at the elment again + */ + if (vscsi->cmd_q.index) + vscsi->cmd_q.index -= 1; + else + /* + * index is at 0 it just wrapped. + * have it index last element in q + */ + vscsi->cmd_q.index = vscsi->cmd_q.mask; + break; + } + } + + crq->valid = INVALIDATE_CMD_RESP_EL; + + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + } + + if (!rc) { + if (ack) { + vio_enable_interrupts(vscsi->dma_dev); + ack = false; + pr_debug("handle_crq, reenabling interrupts\n"); + } + valid = crq->valid; + dma_rmb(); + if (valid) + goto cmd_work; + } else { + pr_debug("handle_crq, error: flags 0x%x, state 0x%hx, crq index 0x%x\n", + vscsi->flags, vscsi->state, vscsi->cmd_q.index); + } + + pr_debug("Leaving handle_crq: schedule_q empty %d, flags 0x%x, state 0x%hx\n", + (int)list_empty(&vscsi->schedule_q), vscsi->flags, + vscsi->state); + + spin_unlock_bh(&vscsi->intr_lock); +} + +static int ibmvscsis_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + struct scsi_info *vscsi; + int rc = 0; + long hrc = 0; + char wq_name[24]; + + vscsi = kzalloc(sizeof(*vscsi), GFP_KERNEL); + if (!vscsi) { + rc = -ENOMEM; + pr_err("probe: allocation of adapter failed\n"); + return rc; + } + + vscsi->dma_dev = vdev; + vscsi->dev = vdev->dev; + INIT_LIST_HEAD(&vscsi->schedule_q); + INIT_LIST_HEAD(&vscsi->waiting_rsp); + INIT_LIST_HEAD(&vscsi->active_q); + + snprintf(vscsi->tport.tport_name, 256, "%s", dev_name(&vdev->dev)); + + pr_debug("probe tport_name: %s\n", vscsi->tport.tport_name); + + rc = read_dma_window(vscsi); + if (rc) + goto free_adapter; + pr_debug("Probe: liobn 0x%x, riobn 0x%x\n", + vscsi->dds.window[LOCAL].liobn, + vscsi->dds.window[REMOTE].liobn); + + strcpy(vscsi->eye, "VSCSI "); + strncat(vscsi->eye, vdev->name, MAX_EYE); + + vscsi->dds.unit_id = vdev->unit_address; + + spin_lock_bh(&ibmvscsis_dev_lock); + list_add_tail(&vscsi->list, &ibmvscsis_dev_list); + spin_unlock_bh(&ibmvscsis_dev_lock); + + /* + * TBD: How do we determine # of cmds to request? Do we know how + * many "children" we have? + */ + vscsi->request_limit = INITIAL_SRP_LIMIT; + rc = srp_target_alloc(&vscsi->target, &vdev->dev, vscsi->request_limit, + SRP_MAX_IU_LEN); + if (rc) + goto rem_list; + + vscsi->target.ldata = vscsi; + + rc = ibmvscsis_alloc_cmds(vscsi, vscsi->request_limit); + if (rc) { + dev_err(&vscsi->dev, "alloc_cmds failed, rc %d, num %d\n", + rc, vscsi->request_limit); + goto free_target; + } + + /* + * Note: the lock is used in freeing timers, so must initialize + * first so that ordering in case of error is correct. + */ + spin_lock_init(&vscsi->intr_lock); + + rc = ibmvscsis_alloctimer(vscsi); + if (rc) { + dev_err(&vscsi->dev, "probe: alloctimer failed, rc %d\n", rc); + goto free_cmds; + } + + rc = ibmvscsis_create_command_q(vscsi, 256); + if (rc) { + dev_err(&vscsi->dev, "probe: create_command_q failed, rc %d\n", + rc); + goto free_timer; + } + + vscsi->map_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!vscsi->map_buf) { + rc = -ENOMEM; + dev_err(&vscsi->dev, "probe: allocating cmd buffer failed\n"); + goto destroy_queue; + } + + vscsi->map_ioba = dma_map_single(&vdev->dev, vscsi->map_buf, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vdev->dev, vscsi->map_ioba)) { + dev_err(&vscsi->dev, "probe: error mapping command buffer\n"); + goto free_buf; + } + + hrc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO, + (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0, + 0); + if (hrc == H_SUCCESS) + vscsi->client_data.partition_number = + be64_to_cpu(*(u64 *)vscsi->map_buf); + /* + * We expect the VIOCTL to fail if we're configured as "any + * client can connect" and the client isn't activated yet. + * We'll make the call again when he sends an init msg. + */ + pr_debug("probe hrc %ld, client partition num %d\n", + hrc, vscsi->client_data.partition_number); + + tasklet_init(&vscsi->work_task, ibmvscsis_handle_crq, + (unsigned long)vscsi); + + init_completion(&vscsi->wait_idle); + + snprintf(wq_name, 24, "ibmvscsis%s", dev_name(&vdev->dev)); + vscsi->work_q = create_workqueue(wq_name); + if (!vscsi->work_q) { + rc = -ENOMEM; + dev_err(&vscsi->dev, "create_workqueue failed\n"); + goto unmap_buf; + } + + rc = request_irq(vdev->irq, ibmvscsis_interrupt, 0, "ibmvscsis", vscsi); + if (rc) { + rc = -EPERM; + dev_err(&vscsi->dev, "probe: request_irq failed, rc %d\n", rc); + goto destroy_WQ; + } + + spin_lock_bh(&vscsi->intr_lock); + vio_enable_interrupts(vdev); + if (rc) { + dev_err(&vscsi->dev, "enabling interrupts failed, rc %d\n", rc); + rc = -ENODEV; + spin_unlock_bh(&vscsi->intr_lock); + goto free_irq; + } + + if (ibmvscsis_check_q(vscsi)) { + rc = ERROR; + dev_err(&vscsi->dev, "probe: check_q failed, rc %d\n", rc); + spin_unlock_bh(&vscsi->intr_lock); + goto disable_interrupt; + } + spin_unlock_bh(&vscsi->intr_lock); + + dev_set_drvdata(&vdev->dev, vscsi); + + return 0; + +disable_interrupt: + vio_disable_interrupts(vdev); +free_irq: + free_irq(vdev->irq, vscsi); +destroy_WQ: + destroy_workqueue(vscsi->work_q); +unmap_buf: + dma_unmap_single(&vdev->dev, vscsi->map_ioba, PAGE_SIZE, + DMA_BIDIRECTIONAL); +free_buf: + kfree(vscsi->map_buf); +destroy_queue: + tasklet_kill(&vscsi->work_task); + ibmvscsis_unregister_command_q(vscsi); + ibmvscsis_destroy_command_q(vscsi); +free_timer: + ibmvscsis_freetimer(vscsi); +free_cmds: + ibmvscsis_free_cmds(vscsi); +free_target: + srp_target_free(&vscsi->target); +rem_list: + spin_lock_bh(&ibmvscsis_dev_lock); + list_del(&vscsi->list); + spin_unlock_bh(&ibmvscsis_dev_lock); +free_adapter: + kfree(vscsi); + + return rc; +} + +static int ibmvscsis_remove(struct vio_dev *vdev) +{ + struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev); + + pr_debug("remove (%s)\n", dev_name(&vscsi->dma_dev->dev)); + + /* + * TBD: Need to handle if there are commands on the waiting_rsp q + * Actually, can there still be cmds outstanding to tcm? + */ + + vio_disable_interrupts(vdev); + free_irq(vdev->irq, vscsi); + destroy_workqueue(vscsi->work_q); + dma_unmap_single(&vdev->dev, vscsi->map_ioba, PAGE_SIZE, + DMA_BIDIRECTIONAL); + kfree(vscsi->map_buf); + tasklet_kill(&vscsi->work_task); + ibmvscsis_unregister_command_q(vscsi); + ibmvscsis_destroy_command_q(vscsi); + ibmvscsis_freetimer(vscsi); + ibmvscsis_free_cmds(vscsi); + srp_target_free(&vscsi->target); + spin_lock_bh(&ibmvscsis_dev_lock); + list_del(&vscsi->list); + spin_unlock_bh(&ibmvscsis_dev_lock); + kfree(vscsi); + + return 0; +} + +static ssize_t system_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", system_id); +} + +static ssize_t partition_number_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%x\n", partition_number); +} + +static ssize_t unit_address_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_info *vscsi = container_of(dev, struct scsi_info, dev); + + return snprintf(buf, PAGE_SIZE, "%x\n", vscsi->dma_dev->unit_address); +} + +static int ibmvscsis_get_system_info(void) +{ + struct device_node *rootdn, *vdevdn; + const char *id, *model, *name; + const uint *num; + + rootdn = of_find_node_by_path("/"); + if (!rootdn) + return -ENOENT; + + model = of_get_property(rootdn, "model", NULL); + id = of_get_property(rootdn, "system-id", NULL); + if (model && id) + snprintf(system_id, sizeof(system_id), "%s-%s", model, id); + + name = of_get_property(rootdn, "ibm,partition-name", NULL); + if (name) + strncpy(partition_name, name, sizeof(partition_name)); + + num = of_get_property(rootdn, "ibm,partition-no", NULL); + if (num) + partition_number = *num; + + of_node_put(rootdn); + + vdevdn = of_find_node_by_path("/vdevice"); + if (vdevdn) { + const uint *mvds; + + mvds = of_get_property(vdevdn, "ibm,max-virtual-dma-size", + NULL); + if (mvds) + max_vdma_size = *mvds; + of_node_put(vdevdn); + } + + return 0; +} + +static char *ibmvscsis_get_fabric_name(void) +{ + return "ibmvscsis"; +} + +static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg) +{ + struct ibmvscsis_tport *tport = + container_of(se_tpg, struct ibmvscsis_tport, se_tpg); + + return tport->tport_name; +} + +static u16 ibmvscsis_get_tag(struct se_portal_group *se_tpg) +{ + struct ibmvscsis_tport *tport = + container_of(se_tpg, struct ibmvscsis_tport, se_tpg); + + return tport->tport_tpgt; +} + +static u32 ibmvscsis_get_default_depth(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int ibmvscsis_check_true(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int ibmvscsis_check_false(struct se_portal_group *se_tpg) +{ + return 0; +} + +static u32 ibmvscsis_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int ibmvscsis_check_stop_free(struct se_cmd *se_cmd) +{ + return target_put_sess_cmd(se_cmd); +} + +static void ibmvscsis_release_cmd(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct scsi_info *vscsi = cmd->adapter; + + pr_debug("release_cmd %p, flags %d\n", se_cmd, cmd->flags); + + spin_lock_bh(&vscsi->intr_lock); + /* Remove from active_q */ + list_del(&cmd->list); + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + spin_unlock_bh(&vscsi->intr_lock); +} + +static u32 ibmvscsis_sess_get_index(struct se_session *se_sess) +{ + return 0; +} + +static int ibmvscsis_write_pending(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct iu_entry *iue = cmd->iue; + int rc; + + pr_debug("write_pending, se_cmd %p, length 0x%x\n", + se_cmd, se_cmd->data_length); + + rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, + 1, 1); + if (rc) { + pr_err("srp_transfer_data() failed: %d\n", rc); + return -EAGAIN; + } + /* + * We now tell TCM to add this WRITE CDB directly into the TCM storage + * object execution queue. + */ + target_execute_cmd(se_cmd); + return 0; +} + +static int ibmvscsis_write_pending_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static void ibmvscsis_set_default_node_attrs(struct se_node_acl *nacl) +{ +} + +static int ibmvscsis_get_cmd_state(struct se_cmd *se_cmd) +{ + return 0; +} + +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct iu_entry *iue = cmd->iue; + struct scsi_info *vscsi = cmd->adapter; + char *sd; + uint len = 0; + int rc; + + pr_debug("queue_data_in, se_cmd %p, length 0x%x\n", + se_cmd, se_cmd->data_length); + + rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, 1, + 1); + if (rc) { + pr_err("srp_transfer_data failed: %d\n", rc); + sd = se_cmd->sense_buffer; + se_cmd->scsi_sense_length = 18; + memset(se_cmd->sense_buffer, 0, se_cmd->scsi_sense_length); + /* Logical Unit Communication Time-out asc/ascq = 0x0801 */ + scsi_build_sense_buffer(0, se_cmd->sense_buffer, MEDIUM_ERROR, + 0x08, 0x01); + } + + srp_build_response(vscsi, cmd, &len); + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.len = len; + + return 0; +} + +static int ibmvscsis_queue_status(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct scsi_info *vscsi = cmd->adapter; + uint len; + + pr_debug("queue_status %p\n", se_cmd); + + srp_build_response(vscsi, cmd, &len); + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.len = len; + + return 0; +} + +static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct scsi_info *vscsi = cmd->adapter; + uint len; + + pr_debug("queue_tm_rsp %p, status %d\n", + se_cmd, (int)se_cmd->se_tmr_req->response); + + srp_build_response(vscsi, cmd, &len); + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.len = len; +} + +static void ibmvscsis_aborted_task(struct se_cmd *se_cmd) +{ + /* TBD: What (if anything) should we do here? */ + pr_debug("ibmvscsis_aborted_task %p\n", se_cmd); +} + +static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct ibmvscsis_tport *tport; + + tport = ibmvscsis_lookup_port(name); + if (tport) { + tport->tport_proto_id = SCSI_PROTOCOL_SRP; + pr_debug("make_tport(%s), pointer:%p, tport_id:%x\n", + name, tport, tport->tport_proto_id); + return &tport->tport_wwn; + } + + return ERR_PTR(-EINVAL); +} + +static void ibmvscsis_drop_tport(struct se_wwn *wwn) +{ + struct ibmvscsis_tport *tport = container_of(wwn, + struct ibmvscsis_tport, + tport_wwn); + + pr_debug("drop_tport(%s)\n", + config_item_name(&tport->tport_wwn.wwn_group.cg_item)); +} + +static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn, + struct config_group *group, + const char *name) +{ + struct ibmvscsis_tport *tport = + container_of(wwn, struct ibmvscsis_tport, tport_wwn); + int rc; + + tport->releasing = false; + + rc = core_tpg_register(&tport->tport_wwn, &tport->se_tpg, + tport->tport_proto_id); + if (rc) + return ERR_PTR(rc); + + return &tport->se_tpg; +} + +static void ibmvscsis_drop_tpg(struct se_portal_group *se_tpg) +{ + struct ibmvscsis_tport *tport = container_of(se_tpg, + struct ibmvscsis_tport, + se_tpg); + + tport->releasing = true; + tport->enabled = false; + + /* + * Release the virtual I_T Nexus for this ibmvscsis TPG + */ + ibmvscsis_drop_nexus(tport); + /* + * Deregister the se_tpg from TCM.. + */ + core_tpg_deregister(se_tpg); +} + +static ssize_t ibmvscsis_wwn_version_show(struct config_item *item, + char *page) +{ + return scnprintf(page, PAGE_SIZE, "%s\n", IBMVSCSIS_VERSION); +} +CONFIGFS_ATTR_RO(ibmvscsis_wwn_, version); + +static struct configfs_attribute *ibmvscsis_wwn_attrs[] = { + &ibmvscsis_wwn_attr_version, + NULL, +}; + +static ssize_t ibmvscsis_tpg_enable_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = to_tpg(item); + struct ibmvscsis_tport *tport = container_of(se_tpg, + struct ibmvscsis_tport, + se_tpg); + + return snprintf(page, PAGE_SIZE, "%d\n", (tport->enabled) ? 1 : 0); +} + +static ssize_t ibmvscsis_tpg_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = to_tpg(item); + struct ibmvscsis_tport *tport = container_of(se_tpg, + struct ibmvscsis_tport, + se_tpg); + struct scsi_info *vscsi = container_of(tport, struct scsi_info, tport); + unsigned long tmp; + int rc; + long lrc; + + rc = kstrtoul(page, 0, &tmp); + if (rc < 0) { + pr_err("Unable to extract srpt_tpg_store_enable\n"); + return -EINVAL; + } + + if ((tmp != 0) && (tmp != 1)) { + pr_err("Illegal value for srpt_tpg_store_enable\n"); + return -EINVAL; + } + + if (tmp) { + tport->enabled = true; + spin_lock_bh(&vscsi->intr_lock); + lrc = ibmvscsis_enable_change_state(vscsi); + if (lrc) + pr_err("enable_change_state failed, rc %ld state %d\n", + lrc, vscsi->state); + spin_unlock_bh(&vscsi->intr_lock); + } else { + tport->enabled = false; + } + + pr_debug("tpg_enable_store, state %d\n", vscsi->state); + + return count; +} +CONFIGFS_ATTR(ibmvscsis_tpg_, enable); + +static struct configfs_attribute *ibmvscsis_tpg_attrs[] = { + &ibmvscsis_tpg_attr_enable, + NULL, +}; + +static const struct target_core_fabric_ops ibmvscsis_ops = { + .module = THIS_MODULE, + .name = "ibmvscsis", + .get_fabric_name = ibmvscsis_get_fabric_name, + .tpg_get_wwn = ibmvscsis_get_fabric_wwn, + .tpg_get_tag = ibmvscsis_get_tag, + .tpg_get_default_depth = ibmvscsis_get_default_depth, + .tpg_check_demo_mode = ibmvscsis_check_true, + .tpg_check_demo_mode_cache = ibmvscsis_check_true, + .tpg_check_demo_mode_write_protect = ibmvscsis_check_false, + .tpg_check_prod_mode_write_protect = ibmvscsis_check_false, + .tpg_get_inst_index = ibmvscsis_tpg_get_inst_index, + .check_stop_free = ibmvscsis_check_stop_free, + .release_cmd = ibmvscsis_release_cmd, + .sess_get_index = ibmvscsis_sess_get_index, + .write_pending = ibmvscsis_write_pending, + .write_pending_status = ibmvscsis_write_pending_status, + .set_default_node_attributes = ibmvscsis_set_default_node_attrs, + .get_cmd_state = ibmvscsis_get_cmd_state, + .queue_data_in = ibmvscsis_queue_data_in, + .queue_status = ibmvscsis_queue_status, + .queue_tm_rsp = ibmvscsis_queue_tm_rsp, + .aborted_task = ibmvscsis_aborted_task, + /* + * Setup function pointers for logic in target_core_fabric_configfs.c + */ + .fabric_make_wwn = ibmvscsis_make_tport, + .fabric_drop_wwn = ibmvscsis_drop_tport, + .fabric_make_tpg = ibmvscsis_make_tpg, + .fabric_drop_tpg = ibmvscsis_drop_tpg, + + .tfc_wwn_attrs = ibmvscsis_wwn_attrs, + .tfc_tpg_base_attrs = ibmvscsis_tpg_attrs, +}; + +static void ibmvscsis_dev_release(struct device *dev) {}; + +static struct class_attribute ibmvscsis_class_attrs[] = { + __ATTR_NULL, +}; + +static struct device_attribute dev_attr_system_id = + __ATTR(system_id, S_IRUGO, system_id_show, NULL); + +static struct device_attribute dev_attr_partition_number = + __ATTR(partition_number, S_IRUGO, partition_number_show, NULL); + +static struct device_attribute dev_attr_unit_address = + __ATTR(unit_address, S_IRUGO, unit_address_show, NULL); + +static struct attribute *ibmvscsis_dev_attrs[] = { + &dev_attr_system_id.attr, + &dev_attr_partition_number.attr, + &dev_attr_unit_address.attr, +}; +ATTRIBUTE_GROUPS(ibmvscsis_dev); + +static struct class ibmvscsis_class = { + .name = "ibmvscsis", + .dev_release = ibmvscsis_dev_release, + .class_attrs = ibmvscsis_class_attrs, + .dev_groups = ibmvscsis_dev_groups, +}; + +static struct vio_device_id ibmvscsis_device_table[] = { + { "v-scsi-host", "IBM,v-scsi-host" }, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmvscsis_device_table); + +static struct vio_driver ibmvscsis_driver = { + .name = "ibmvscsis", + .id_table = ibmvscsis_device_table, + .probe = ibmvscsis_probe, + .remove = ibmvscsis_remove, +}; + +/* + * ibmvscsis_init() - Kernel Module initialization + * + * Note: vio_register_driver() registers callback functions, and at least one + * of those callback functions calls TCM - Linux IO Target Subsystem, thus + * the SCSI Target template must be registered before vio_register_driver() + * is called. + */ +static int __init ibmvscsis_init(void) +{ + int rc = 0; + + rc = ibmvscsis_get_system_info(); + if (rc) { + pr_err("rc %d from get_system_info\n", rc); + goto out; + } + + rc = class_register(&ibmvscsis_class); + if (rc) { + pr_err("failed class register\n"); + goto out; + } + + rc = target_register_template(&ibmvscsis_ops); + if (rc) { + pr_err("rc %d from target_register_template\n", rc); + goto unregister_class; + } + + rc = vio_register_driver(&ibmvscsis_driver); + if (rc) { + pr_err("rc %d from vio_register_driver\n", rc); + goto unregister_target; + } + + return 0; + +unregister_target: + target_unregister_template(&ibmvscsis_ops); +unregister_class: + class_unregister(&ibmvscsis_class); +out: + return rc; +} + +static void __exit ibmvscsis_exit(void) +{ + pr_info("Unregister IBM virtual SCSI host driver\n"); + vio_unregister_driver(&ibmvscsis_driver); + target_unregister_template(&ibmvscsis_ops); + class_unregister(&ibmvscsis_class); +} + +MODULE_DESCRIPTION("IBMVSCSIS fabric driver"); +MODULE_AUTHOR("Bryant G. Ly and Michael Cyr"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(IBMVSCSIS_VERSION); +module_init(ibmvscsis_init); +module_exit(ibmvscsis_exit); diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h new file mode 100644 index 000000000000..981a0c992b6c --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -0,0 +1,346 @@ +/******************************************************************************* + * IBM Virtual SCSI Target Driver + * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp. + * Santiago Leon (santil@us.ibm.com) IBM Corp. + * Linda Xie (lxie@us.ibm.com) IBM Corp. + * + * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org> + * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org> + * Copyright (C) 2016 Bryant G. Ly <bryantly@linux.vnet.ibm.com> IBM Corp. + * + * Authors: Bryant G. Ly <bryantly@linux.vnet.ibm.com> + * Authors: Michael Cyr <mikecyr@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ****************************************************************************/ + +#ifndef __H_IBMVSCSI_TGT +#define __H_IBMVSCSI_TGT + +#include "libsrp.h" + +#define SYS_ID_NAME_LEN 64 +#define PARTITION_NAMELEN 96 +#define IBMVSCSIS_NAMELEN 32 + +#define MSG_HI 0 +#define MSG_LOW 1 + +#define MAX_CMD_Q_PAGES 4 +#define CRQ_PER_PAGE (PAGE_SIZE / sizeof(struct viosrp_crq)) +/* in terms of number of elements */ +#define DEFAULT_CMD_Q_SIZE CRQ_PER_PAGE +#define MAX_CMD_Q_SIZE (DEFAULT_CMD_Q_SIZE * MAX_CMD_Q_PAGES) + +#define SRP_VIOLATION 0x102 /* general error code */ + +/* + * SRP buffer formats defined as of 16.a supported by this driver. + */ +#define SUPPORTED_FORMATS ((SRP_DATA_DESC_DIRECT << 1) | \ + (SRP_DATA_DESC_INDIRECT << 1)) + +#define SCSI_LUN_ADDR_METHOD_FLAT 1 + +struct dma_window { + u32 liobn; /* Unique per vdevice */ + u64 tce_base; /* Physical location of the TCE table */ + u64 tce_size; /* Size of the TCE table in bytes */ +}; + +struct target_dds { + u64 unit_id; /* 64 bit will force alignment */ +#define NUM_DMA_WINDOWS 2 +#define LOCAL 0 +#define REMOTE 1 + struct dma_window window[NUM_DMA_WINDOWS]; + + /* root node property "ibm,partition-no" */ + uint partition_num; + char partition_name[PARTITION_NAMELEN]; +}; + +#define MAX_NUM_PORTS 1 +#define MAX_H_COPY_RDMA (128 * 1024) + +#define MAX_EYE 64 + +/* Return codes */ +#define ADAPT_SUCCESS 0L +/* choose error codes that do not conflict with PHYP */ +#define ERROR -40L + +struct format_code { + u8 reserved; + u8 buffers; +}; + +struct client_info { +#define SRP_VERSION "16.a" + char srp_version[8]; + /* root node property ibm,partition-name */ + char partition_name[PARTITION_NAMELEN]; + /* root node property ibm,partition-no */ + u32 partition_number; + /* initially 1 */ + u32 mad_version; + u32 os_type; +}; + +/* + * Changing this constant changes the number of seconds to wait before + * considering the client will never service its queue again. + */ +#define SECONDS_TO_CONSIDER_FAILED 30 +/* + * These constants set the polling period used to determine if the client + * has freed at least one element in the response queue. + */ +#define WAIT_SECONDS 1 +#define WAIT_NANO_SECONDS 5000 +#define MAX_TIMER_POPS ((1000000 / WAIT_NANO_SECONDS) * \ + SECONDS_TO_CONSIDER_FAILED) +/* + * general purpose timer control block + * which can be used for multiple functions + */ +struct timer_cb { + struct hrtimer timer; + /* + * how long has it been since the client + * serviced the queue. The variable is incrmented + * in the service_wait_q routine and cleared + * in send messages + */ + int timer_pops; + /* the timer is started */ + bool started; +}; + +struct cmd_queue { + /* kva */ + struct viosrp_crq *base_addr; + dma_addr_t crq_token; + /* used to maintain index */ + uint mask; + /* current element */ + uint index; + int size; +}; + +#define SCSOLNT_RESP_SHIFT 1 +#define UCSOLNT_RESP_SHIFT 2 + +#define SCSOLNT BIT(SCSOLNT_RESP_SHIFT) +#define UCSOLNT BIT(UCSOLNT_RESP_SHIFT) + +enum cmd_type { + SCSI_CDB = 0x01, + TASK_MANAGEMENT = 0x02, + /* MAD or addressed to port 0 */ + ADAPTER_MAD = 0x04, + UNSET_TYPE = 0x08, +}; + +struct iu_rsp { + u8 format; + u8 sol_not; + u16 len; + /* tag is just to help client identify cmd, so don't translate be/le */ + u64 tag; +}; + +struct ibmvscsis_cmd { + struct list_head list; + /* Used for TCM Core operations */ + struct se_cmd se_cmd; + struct iu_entry *iue; + struct iu_rsp rsp; + struct work_struct work; + struct scsi_info *adapter; + /* Sense buffer that will be mapped into outgoing status */ + unsigned char sense_buf[TRANSPORT_SENSE_BUFFER]; + u64 init_time; +#define CMD_FAST_FAIL BIT(0) + u32 flags; + char type; +}; + +struct ibmvscsis_nexus { + struct se_session *se_sess; +}; + +struct ibmvscsis_tport { + /* SCSI protocol the tport is providing */ + u8 tport_proto_id; + /* ASCII formatted WWPN for SRP Target port */ + char tport_name[IBMVSCSIS_NAMELEN]; + /* Returned by ibmvscsis_make_tport() */ + struct se_wwn tport_wwn; + /* Returned by ibmvscsis_make_tpg() */ + struct se_portal_group se_tpg; + /* ibmvscsis port target portal group tag for TCM */ + u16 tport_tpgt; + /* Pointer to TCM session for I_T Nexus */ + struct ibmvscsis_nexus *ibmv_nexus; + bool enabled; + bool releasing; +}; + +struct scsi_info { + struct list_head list; + char eye[MAX_EYE]; + + /* commands waiting for space on repsonse queue */ + struct list_head waiting_rsp; +#define NO_QUEUE 0x00 +#define WAIT_ENABLED 0X01 + /* driver has received an initialize command */ +#define PART_UP_WAIT_ENAB 0x02 +#define WAIT_CONNECTION 0x04 + /* have established a connection */ +#define CONNECTED 0x08 + /* at least one port is processing SRP IU */ +#define SRP_PROCESSING 0x10 + /* remove request received */ +#define UNCONFIGURING 0x20 + /* disconnect by letting adapter go idle, no error */ +#define WAIT_IDLE 0x40 + /* disconnecting to clear an error */ +#define ERR_DISCONNECT 0x80 + /* disconnect to clear error state, then come back up */ +#define ERR_DISCONNECT_RECONNECT 0x100 + /* disconnected after clearing an error */ +#define ERR_DISCONNECTED 0x200 + /* A series of errors caused unexpected errors */ +#define UNDEFINED 0x400 + u16 state; + int fast_fail; + struct target_dds dds; + char *cmd_pool; + /* list of free commands */ + struct list_head free_cmd; + /* command elements ready for scheduler */ + struct list_head schedule_q; + /* commands sent to TCM */ + struct list_head active_q; + caddr_t *map_buf; + /* ioba of map buffer */ + dma_addr_t map_ioba; + /* allowable number of outstanding SRP requests */ + int request_limit; + /* extra credit */ + int credit; + /* outstanding transactions against credit limit */ + int debit; + + /* allow only one outstanding mad request */ +#define PROCESSING_MAD 0x00002 + /* Waiting to go idle */ +#define WAIT_FOR_IDLE 0x00004 + /* H_REG_CRQ called */ +#define CRQ_CLOSED 0x00010 + /* detected that client has failed */ +#define CLIENT_FAILED 0x00040 + /* detected that transport event occurred */ +#define TRANS_EVENT 0x00080 + /* don't attempt to send anything to the client */ +#define RESPONSE_Q_DOWN 0x00100 + /* request made to schedule disconnect handler */ +#define SCHEDULE_DISCONNECT 0x00400 + /* disconnect handler is scheduled */ +#define DISCONNECT_SCHEDULED 0x00800 + u32 flags; + /* adapter lock */ + spinlock_t intr_lock; + /* information needed to manage command queue */ + struct cmd_queue cmd_q; + /* used in hcall to copy response back into srp buffer */ + u64 empty_iu_id; + /* used in crq, to tag what iu the response is for */ + u64 empty_iu_tag; + uint new_state; + /* control block for the response queue timer */ + struct timer_cb rsp_q_timer; + /* keep last client to enable proper accounting */ + struct client_info client_data; + /* what can this client do */ + u32 client_cap; + /* + * The following two fields capture state and flag changes that + * can occur when the lock is given up. In the orginal design, + * the lock was held during calls into phyp; + * however, phyp did not meet PAPR architecture. This is + * a work around. + */ + u16 phyp_acr_state; + u32 phyp_acr_flags; + + struct workqueue_struct *work_q; + struct completion wait_idle; + struct device dev; + struct vio_dev *dma_dev; + struct srp_target target; + struct ibmvscsis_tport tport; + struct tasklet_struct work_task; + struct work_struct proc_work; +}; + +/* + * Provide a constant that allows software to detect the adapter is + * disconnecting from the client from one of several states. + */ +#define IS_DISCONNECTING (UNCONFIGURING | ERR_DISCONNECT_RECONNECT | \ + ERR_DISCONNECT) + +/* + * Provide a constant that can be used with interrupt handling that + * essentially lets the interrupt handler know that all requests should + * be thrown out, + */ +#define DONT_PROCESS_STATE (IS_DISCONNECTING | UNDEFINED | \ + ERR_DISCONNECTED | WAIT_IDLE) + +/* + * If any of these flag bits are set then do not allow the interrupt + * handler to schedule the off level handler. + */ +#define BLOCK (DISCONNECT_SCHEDULED) + +/* State and transition events that stop the interrupt handler */ +#define TARGET_STOP(VSCSI) (long)(((VSCSI)->state & DONT_PROCESS_STATE) | \ + ((VSCSI)->flags & BLOCK)) + +/* flag bit that are not reset during disconnect */ +#define PRESERVE_FLAG_FIELDS 0 + +#define vio_iu(IUE) ((union viosrp_iu *)((IUE)->sbuf->buf)) + +#define READ_CMD(cdb) (((cdb)[0] & 0x1F) == 8) +#define WRITE_CMD(cdb) (((cdb)[0] & 0x1F) == 0xA) + +#ifndef H_GET_PARTNER_INFO +#define H_GET_PARTNER_INFO 0x0000000000000008LL +#endif + +#define h_copy_rdma(l, sa, sb, da, db) \ + plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db) +#define h_vioctl(u, o, a, u1, u2, u3, u4) \ + plpar_hcall_norets(H_VIOCTL, u, o, a, u1, u2) +#define h_reg_crq(ua, tok, sz) \ + plpar_hcall_norets(H_REG_CRQ, ua, tok, sz) +#define h_free_crq(ua) \ + plpar_hcall_norets(H_FREE_CRQ, ua) +#define h_send_crq(ua, d1, d2) \ + plpar_hcall_norets(H_SEND_CRQ, ua, d1, d2) + +#endif diff --git a/drivers/scsi/ibmvscsi_tgt/libsrp.c b/drivers/scsi/ibmvscsi_tgt/libsrp.c new file mode 100644 index 000000000000..5a4cc28ca5ff --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/libsrp.c @@ -0,0 +1,427 @@ +/******************************************************************************* + * SCSI RDMA Protocol lib functions + * + * Copyright (C) 2006 FUJITA Tomonori <tomof@acm.org> + * Copyright (C) 2016 Bryant G. Ly <bryantly@linux.vnet.ibm.com> IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ***********************************************************************/ + +#define pr_fmt(fmt) "libsrp: " fmt + +#include <linux/printk.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/kfifo.h> +#include <linux/scatterlist.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <scsi/srp.h> +#include <target/target_core_base.h> +#include "libsrp.h" +#include "ibmvscsi_tgt.h" + +static int srp_iu_pool_alloc(struct srp_queue *q, size_t max, + struct srp_buf **ring) +{ + struct iu_entry *iue; + int i; + + q->pool = kcalloc(max, sizeof(struct iu_entry *), GFP_KERNEL); + if (!q->pool) + return -ENOMEM; + q->items = kcalloc(max, sizeof(struct iu_entry), GFP_KERNEL); + if (!q->items) + goto free_pool; + + spin_lock_init(&q->lock); + kfifo_init(&q->queue, (void *)q->pool, max * sizeof(void *)); + + for (i = 0, iue = q->items; i < max; i++) { + kfifo_in(&q->queue, (void *)&iue, sizeof(void *)); + iue->sbuf = ring[i]; + iue++; + } + return 0; + +free_pool: + kfree(q->pool); + return -ENOMEM; +} + +static void srp_iu_pool_free(struct srp_queue *q) +{ + kfree(q->items); + kfree(q->pool); +} + +static struct srp_buf **srp_ring_alloc(struct device *dev, + size_t max, size_t size) +{ + struct srp_buf **ring; + int i; + + ring = kcalloc(max, sizeof(struct srp_buf *), GFP_KERNEL); + if (!ring) + return NULL; + + for (i = 0; i < max; i++) { + ring[i] = kzalloc(sizeof(*ring[i]), GFP_KERNEL); + if (!ring[i]) + goto out; + ring[i]->buf = dma_alloc_coherent(dev, size, &ring[i]->dma, + GFP_KERNEL); + if (!ring[i]->buf) + goto out; + } + return ring; + +out: + for (i = 0; i < max && ring[i]; i++) { + if (ring[i]->buf) { + dma_free_coherent(dev, size, ring[i]->buf, + ring[i]->dma); + } + kfree(ring[i]); + } + kfree(ring); + + return NULL; +} + +static void srp_ring_free(struct device *dev, struct srp_buf **ring, + size_t max, size_t size) +{ + int i; + + for (i = 0; i < max; i++) { + dma_free_coherent(dev, size, ring[i]->buf, ring[i]->dma); + kfree(ring[i]); + } + kfree(ring); +} + +int srp_target_alloc(struct srp_target *target, struct device *dev, + size_t nr, size_t iu_size) +{ + int err; + + spin_lock_init(&target->lock); + + target->dev = dev; + + target->srp_iu_size = iu_size; + target->rx_ring_size = nr; + target->rx_ring = srp_ring_alloc(target->dev, nr, iu_size); + if (!target->rx_ring) + return -ENOMEM; + err = srp_iu_pool_alloc(&target->iu_queue, nr, target->rx_ring); + if (err) + goto free_ring; + + dev_set_drvdata(target->dev, target); + return 0; + +free_ring: + srp_ring_free(target->dev, target->rx_ring, nr, iu_size); + return -ENOMEM; +} + +void srp_target_free(struct srp_target *target) +{ + dev_set_drvdata(target->dev, NULL); + srp_ring_free(target->dev, target->rx_ring, target->rx_ring_size, + target->srp_iu_size); + srp_iu_pool_free(&target->iu_queue); +} + +struct iu_entry *srp_iu_get(struct srp_target *target) +{ + struct iu_entry *iue = NULL; + + if (kfifo_out_locked(&target->iu_queue.queue, (void *)&iue, + sizeof(void *), + &target->iu_queue.lock) != sizeof(void *)) { + WARN_ONCE(1, "unexpected fifo state"); + return NULL; + } + if (!iue) + return iue; + iue->target = target; + iue->flags = 0; + return iue; +} + +void srp_iu_put(struct iu_entry *iue) +{ + kfifo_in_locked(&iue->target->iu_queue.queue, (void *)&iue, + sizeof(void *), &iue->target->iu_queue.lock); +} + +static int srp_direct_data(struct ibmvscsis_cmd *cmd, struct srp_direct_buf *md, + enum dma_data_direction dir, srp_rdma_t rdma_io, + int dma_map, int ext_desc) +{ + struct iu_entry *iue = NULL; + struct scatterlist *sg = NULL; + int err, nsg = 0, len; + + if (dma_map) { + iue = cmd->iue; + sg = cmd->se_cmd.t_data_sg; + nsg = dma_map_sg(iue->target->dev, sg, cmd->se_cmd.t_data_nents, + DMA_BIDIRECTIONAL); + if (!nsg) { + pr_err("fail to map %p %d\n", iue, + cmd->se_cmd.t_data_nents); + return 0; + } + len = min(cmd->se_cmd.data_length, be32_to_cpu(md->len)); + } else { + len = be32_to_cpu(md->len); + } + + err = rdma_io(cmd, sg, nsg, md, 1, dir, len); + + if (dma_map) + dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL); + + return err; +} + +static int srp_indirect_data(struct ibmvscsis_cmd *cmd, struct srp_cmd *srp_cmd, + struct srp_indirect_buf *id, + enum dma_data_direction dir, srp_rdma_t rdma_io, + int dma_map, int ext_desc) +{ + struct iu_entry *iue = NULL; + struct srp_direct_buf *md = NULL; + struct scatterlist dummy, *sg = NULL; + dma_addr_t token = 0; + int err = 0; + int nmd, nsg = 0, len; + + if (dma_map || ext_desc) { + iue = cmd->iue; + sg = cmd->se_cmd.t_data_sg; + } + + nmd = be32_to_cpu(id->table_desc.len) / sizeof(struct srp_direct_buf); + + if ((dir == DMA_FROM_DEVICE && nmd == srp_cmd->data_in_desc_cnt) || + (dir == DMA_TO_DEVICE && nmd == srp_cmd->data_out_desc_cnt)) { + md = &id->desc_list[0]; + goto rdma; + } + + if (ext_desc && dma_map) { + md = dma_alloc_coherent(iue->target->dev, + be32_to_cpu(id->table_desc.len), + &token, GFP_KERNEL); + if (!md) { + pr_err("Can't get dma memory %u\n", + be32_to_cpu(id->table_desc.len)); + return -ENOMEM; + } + + sg_init_one(&dummy, md, be32_to_cpu(id->table_desc.len)); + sg_dma_address(&dummy) = token; + sg_dma_len(&dummy) = be32_to_cpu(id->table_desc.len); + err = rdma_io(cmd, &dummy, 1, &id->table_desc, 1, DMA_TO_DEVICE, + be32_to_cpu(id->table_desc.len)); + if (err) { + pr_err("Error copying indirect table %d\n", err); + goto free_mem; + } + } else { + pr_err("This command uses external indirect buffer\n"); + return -EINVAL; + } + +rdma: + if (dma_map) { + nsg = dma_map_sg(iue->target->dev, sg, cmd->se_cmd.t_data_nents, + DMA_BIDIRECTIONAL); + if (!nsg) { + pr_err("fail to map %p %d\n", iue, + cmd->se_cmd.t_data_nents); + err = -EIO; + goto free_mem; + } + len = min(cmd->se_cmd.data_length, be32_to_cpu(id->len)); + } else { + len = be32_to_cpu(id->len); + } + + err = rdma_io(cmd, sg, nsg, md, nmd, dir, len); + + if (dma_map) + dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL); + +free_mem: + if (token && dma_map) { + dma_free_coherent(iue->target->dev, + be32_to_cpu(id->table_desc.len), md, token); + } + return err; +} + +static int data_out_desc_size(struct srp_cmd *cmd) +{ + int size = 0; + u8 fmt = cmd->buf_fmt >> 4; + + switch (fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + size = sizeof(struct srp_direct_buf); + break; + case SRP_DATA_DESC_INDIRECT: + size = sizeof(struct srp_indirect_buf) + + sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt; + break; + default: + pr_err("client error. Invalid data_out_format %x\n", fmt); + break; + } + return size; +} + +/* + * TODO: this can be called multiple times for a single command if it + * has very long data. + */ +int srp_transfer_data(struct ibmvscsis_cmd *cmd, struct srp_cmd *srp_cmd, + srp_rdma_t rdma_io, int dma_map, int ext_desc) +{ + struct srp_direct_buf *md; + struct srp_indirect_buf *id; + enum dma_data_direction dir; + int offset, err = 0; + u8 format; + + if (!cmd->se_cmd.t_data_nents) + return 0; + + offset = srp_cmd->add_cdb_len & ~3; + + dir = srp_cmd_direction(srp_cmd); + if (dir == DMA_FROM_DEVICE) + offset += data_out_desc_size(srp_cmd); + + if (dir == DMA_TO_DEVICE) + format = srp_cmd->buf_fmt >> 4; + else + format = srp_cmd->buf_fmt & ((1U << 4) - 1); + + switch (format) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + md = (struct srp_direct_buf *)(srp_cmd->add_data + offset); + err = srp_direct_data(cmd, md, dir, rdma_io, dma_map, ext_desc); + break; + case SRP_DATA_DESC_INDIRECT: + id = (struct srp_indirect_buf *)(srp_cmd->add_data + offset); + err = srp_indirect_data(cmd, srp_cmd, id, dir, rdma_io, dma_map, + ext_desc); + break; + default: + pr_err("Unknown format %d %x\n", dir, format); + err = -EINVAL; + } + + return err; +} + +u64 srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir) +{ + struct srp_direct_buf *md; + struct srp_indirect_buf *id; + u64 len = 0; + uint offset = cmd->add_cdb_len & ~3; + u8 fmt; + + if (dir == DMA_TO_DEVICE) { + fmt = cmd->buf_fmt >> 4; + } else { + fmt = cmd->buf_fmt & ((1U << 4) - 1); + offset += data_out_desc_size(cmd); + } + + switch (fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + md = (struct srp_direct_buf *)(cmd->add_data + offset); + len = be32_to_cpu(md->len); + break; + case SRP_DATA_DESC_INDIRECT: + id = (struct srp_indirect_buf *)(cmd->add_data + offset); + len = be32_to_cpu(id->len); + break; + default: + pr_err("invalid data format %x\n", fmt); + break; + } + return len; +} + +int srp_get_desc_table(struct srp_cmd *srp_cmd, enum dma_data_direction *dir, + u64 *data_len) +{ + struct srp_indirect_buf *idb; + struct srp_direct_buf *db; + uint add_cdb_offset; + int rc; + + /* + * The pointer computations below will only be compiled correctly + * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check + * whether srp_cmd::add_data has been declared as a byte pointer. + */ + BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) + && !__same_type(srp_cmd->add_data[0], (u8)0)); + + BUG_ON(!dir); + BUG_ON(!data_len); + + rc = 0; + *data_len = 0; + + *dir = DMA_NONE; + + if (srp_cmd->buf_fmt & 0xf) + *dir = DMA_FROM_DEVICE; + else if (srp_cmd->buf_fmt >> 4) + *dir = DMA_TO_DEVICE; + + add_cdb_offset = srp_cmd->add_cdb_len & ~3; + if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { + db = (struct srp_direct_buf *)(srp_cmd->add_data + + add_cdb_offset); + *data_len = be32_to_cpu(db->len); + } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { + idb = (struct srp_indirect_buf *)(srp_cmd->add_data + + add_cdb_offset); + + *data_len = be32_to_cpu(idb->len); + } + return rc; +} + +MODULE_DESCRIPTION("SCSI RDMA Protocol lib functions"); +MODULE_AUTHOR("FUJITA Tomonori"); +MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/ibmvscsi_tgt/libsrp.h b/drivers/scsi/ibmvscsi_tgt/libsrp.h new file mode 100644 index 000000000000..4696f331453e --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/libsrp.h @@ -0,0 +1,123 @@ +#ifndef __LIBSRP_H__ +#define __LIBSRP_H__ + +#include <linux/list.h> +#include <linux/kfifo.h> +#include <scsi/srp.h> + +enum srp_valid { + INVALIDATE_CMD_RESP_EL = 0, + VALID_CMD_RESP_EL = 0x80, + VALID_INIT_MSG = 0xC0, + VALID_TRANS_EVENT = 0xFF +}; + +enum srp_format { + SRP_FORMAT = 1, + MAD_FORMAT = 2, + OS400_FORMAT = 3, + AIX_FORMAT = 4, + LINUX_FORMAT = 5, + MESSAGE_IN_CRQ = 6 +}; + +enum srp_init_msg { + INIT_MSG = 1, + INIT_COMPLETE_MSG = 2 +}; + +enum srp_trans_event { + UNUSED_FORMAT = 0, + PARTNER_FAILED = 1, + PARTNER_DEREGISTER = 2, + MIGRATED = 6 +}; + +enum srp_status { + HEADER_DESCRIPTOR = 0xF1, + PING = 0xF5, + PING_RESPONSE = 0xF6 +}; + +enum srp_mad_version { + MAD_VERSION_1 = 1 +}; + +enum srp_os_type { + OS400 = 1, + LINUX = 2, + AIX = 3, + OFW = 4 +}; + +enum srp_task_attributes { + SRP_SIMPLE_TASK = 0, + SRP_HEAD_TASK = 1, + SRP_ORDERED_TASK = 2, + SRP_ACA_TASK = 4 +}; + +enum { + SRP_TASK_MANAGEMENT_FUNCTION_COMPLETE = 0, + SRP_REQUEST_FIELDS_INVALID = 2, + SRP_TASK_MANAGEMENT_FUNCTION_NOT_SUPPORTED = 4, + SRP_TASK_MANAGEMENT_FUNCTION_FAILED = 5 +}; + +struct srp_buf { + dma_addr_t dma; + void *buf; +}; + +struct srp_queue { + void *pool; + void *items; + struct kfifo queue; + spinlock_t lock; +}; + +struct srp_target { + struct device *dev; + + spinlock_t lock; + struct list_head cmd_queue; + + size_t srp_iu_size; + struct srp_queue iu_queue; + size_t rx_ring_size; + struct srp_buf **rx_ring; + + void *ldata; +}; + +struct iu_entry { + struct srp_target *target; + + struct list_head ilist; + dma_addr_t remote_token; + unsigned long flags; + + struct srp_buf *sbuf; + u16 iu_len; +}; + +struct ibmvscsis_cmd; + +typedef int (srp_rdma_t)(struct ibmvscsis_cmd *, struct scatterlist *, int, + struct srp_direct_buf *, int, + enum dma_data_direction, unsigned int); +int srp_target_alloc(struct srp_target *, struct device *, size_t, size_t); +void srp_target_free(struct srp_target *); +struct iu_entry *srp_iu_get(struct srp_target *); +void srp_iu_put(struct iu_entry *); +int srp_transfer_data(struct ibmvscsis_cmd *, struct srp_cmd *, + srp_rdma_t, int, int); +u64 srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir); +int srp_get_desc_table(struct srp_cmd *srp_cmd, enum dma_data_direction *dir, + u64 *data_len); +static inline int srp_cmd_direction(struct srp_cmd *cmd) +{ + return (cmd->buf_fmt >> 4) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; +} + +#endif diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1f539c288ae8..bf85974be862 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3288,6 +3288,11 @@ static void ipr_worker_thread(struct work_struct *work) return; } + if (!ioa_cfg->scan_enabled) { + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return; + } + restart: do { did_work = 0; @@ -10214,6 +10219,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (!ioa_cfg->reset_work_q) { dev_err(&pdev->dev, "Couldn't register reset workqueue\n"); + rc = -ENOMEM; goto out_free_irq; } } else @@ -10362,6 +10368,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; + unsigned long flags; int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -10414,7 +10421,10 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) } } + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + ioa_cfg->scan_enabled = 1; schedule_work(&ioa_cfg->work_q); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); return 0; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 1d42c7464dfc..cdb51960b53c 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1478,6 +1478,7 @@ struct ipr_ioa_cfg { u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; u8 dump_taken:1; + u8 scan_enabled:1; u8 scan_done:1; u8 needs_hard_reset:1; u8 dual_raid:1; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index adf61b43eb70..734a0428ef0e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4854,20 +4854,17 @@ static int lpfc_enable_pci_dev(struct lpfc_hba *phba) { struct pci_dev *pdev; - int bars = 0; /* Obtain PCI device reference */ if (!phba->pcidev) goto out_error; else pdev = phba->pcidev; - /* Select PCI BARs */ - bars = pci_select_bars(pdev, IORESOURCE_MEM); /* Enable PCI device */ if (pci_enable_device_mem(pdev)) goto out_error; /* Request PCI resource for the device */ - if (pci_request_selected_regions(pdev, bars, LPFC_DRIVER_NAME)) + if (pci_request_mem_regions(pdev, LPFC_DRIVER_NAME)) goto out_disable_device; /* Set up device as PCI master and save state for EEH */ pci_set_master(pdev); @@ -4884,7 +4881,7 @@ out_disable_device: pci_disable_device(pdev); out_error: lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1401 Failed to enable pci device, bars:x%x\n", bars); + "1401 Failed to enable pci device\n"); return -ENODEV; } @@ -4899,17 +4896,14 @@ static void lpfc_disable_pci_dev(struct lpfc_hba *phba) { struct pci_dev *pdev; - int bars; /* Obtain PCI device reference */ if (!phba->pcidev) return; else pdev = phba->pcidev; - /* Select PCI BARs */ - bars = pci_select_bars(pdev, IORESOURCE_MEM); /* Release PCI resource and disable PCI device */ - pci_release_selected_regions(pdev, bars); + pci_release_mem_regions(pdev); pci_disable_device(pdev); return; @@ -9811,7 +9805,6 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev) struct lpfc_vport **vports; struct lpfc_hba *phba = vport->phba; int i; - int bars = pci_select_bars(pdev, IORESOURCE_MEM); spin_lock_irq(&phba->hbalock); vport->load_flag |= FC_UNLOADING; @@ -9886,7 +9879,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev) lpfc_hba_free(phba); - pci_release_selected_regions(pdev, bars); + pci_release_mem_regions(pdev); pci_disable_device(pdev); } diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a5655d571906..d197aa176dee 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3877,7 +3877,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, uint32_t tag; uint16_t hwq; - if (shost_use_blk_mq(cmnd->device->host)) { + if (cmnd && shost_use_blk_mq(cmnd->device->host)) { tag = blk_mq_unique_tag(cmnd->request); hwq = blk_mq_unique_tag_to_hwq(tag); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 351d08ace24a..7080ce2920fd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1323,21 +1323,18 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, { lockdep_assert_held(&phba->hbalock); + BUG_ON(!piocb || !piocb->vport); + list_add_tail(&piocb->list, &pring->txcmplq); piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ; if ((unlikely(pring->ringno == LPFC_ELS_RING)) && (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN) && - (!(piocb->vport->load_flag & FC_UNLOADING))) { - if (!piocb->vport) - BUG(); - else - mod_timer(&piocb->vport->els_tmofunc, - jiffies + - msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); - } - + (!(piocb->vport->load_flag & FC_UNLOADING))) + mod_timer(&piocb->vport->els_tmofunc, + jiffies + + msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); return 0; } diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index 180e027b1c8a..796e22037bc4 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -23,7 +23,7 @@ **************************************************/ #if IS_ENABLED(CONFIG_SSB_EMBEDDED) -static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -38,14 +38,14 @@ static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) * ChipCommon **************************************************/ -static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); return !!ssb_chipco_gpio_in(&bus->chipco, 1 << gpio); } -static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio, +static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -54,7 +54,7 @@ static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio, } static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip, - unsigned gpio) + unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -63,7 +63,7 @@ static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip, } static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) + unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -72,7 +72,7 @@ static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip, return 0; } -static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -85,7 +85,7 @@ static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio) return 0; } -static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio) +static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -256,14 +256,14 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus) #ifdef CONFIG_SSB_DRIVER_EXTIF -static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); return !!ssb_extif_gpio_in(&bus->extif, 1 << gpio); } -static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio, +static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -272,7 +272,7 @@ static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio, } static int ssb_gpio_extif_direction_input(struct gpio_chip *chip, - unsigned gpio) + unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -281,7 +281,7 @@ static int ssb_gpio_extif_direction_input(struct gpio_chip *chip, } static int ssb_gpio_extif_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) + unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); diff --git a/drivers/staging/emxx_udc/Kconfig b/drivers/staging/emxx_udc/Kconfig index cc3402020487..d7577096fb25 100644 --- a/drivers/staging/emxx_udc/Kconfig +++ b/drivers/staging/emxx_udc/Kconfig @@ -1,5 +1,5 @@ config USB_EMXX - bool "EMXX USB Function Device Controller" + tristate "EMXX USB Function Device Controller" depends on USB_GADGET && (ARCH_SHMOBILE || (ARM && COMPILE_TEST)) help The Emma Mobile series of SoCs from Renesas Electronics and diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c index 3bd91758b2da..3b56b2826263 100644 --- a/drivers/staging/emxx_udc/emxx_udc.c +++ b/drivers/staging/emxx_udc/emxx_udc.c @@ -15,7 +15,7 @@ */ #include <linux/kernel.h> -#include <linux/init.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/ioport.h> @@ -39,9 +39,11 @@ #include "emxx_udc.h" +#define DRIVER_DESC "EMXX UDC driver" #define DMA_ADDR_INVALID (~(dma_addr_t)0) static const char driver_name[] = "emxx_udc"; +static const char driver_desc[] = DRIVER_DESC; /*===========================================================================*/ /* Prototype */ @@ -3296,6 +3298,28 @@ static void nbu2ss_drv_shutdown(struct platform_device *pdev) } /*-------------------------------------------------------------------------*/ +static int nbu2ss_drv_remove(struct platform_device *pdev) +{ + struct nbu2ss_udc *udc; + struct nbu2ss_ep *ep; + int i; + + udc = &udc_controller; + + for (i = 0; i < NUM_ENDPOINTS; i++) { + ep = &udc->ep[i]; + if (ep->virt_buf) + dma_free_coherent(NULL, PAGE_SIZE, + (void *)ep->virt_buf, ep->phys_buf); + } + + /* Interrupt Handler - Release */ + free_irq(INT_VBUS, udc); + + return 0; +} + +/*-------------------------------------------------------------------------*/ static int nbu2ss_drv_suspend(struct platform_device *pdev, pm_message_t state) { struct nbu2ss_udc *udc; @@ -3347,12 +3371,16 @@ static int nbu2ss_drv_resume(struct platform_device *pdev) static struct platform_driver udc_driver = { .probe = nbu2ss_drv_probe, .shutdown = nbu2ss_drv_shutdown, + .remove = nbu2ss_drv_remove, .suspend = nbu2ss_drv_suspend, .resume = nbu2ss_drv_resume, .driver = { - .name = driver_name, - .suppress_bind_attrs = true, + .name = driver_name, }, }; -builtin_platform_driver(udc_driver); +module_platform_driver(udc_driver); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR("Renesas Electronics Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 581a63a0a63e..463b1a360733 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -78,7 +78,7 @@ static void ll_release(struct dentry *de) * INVALID) so d_lookup() matches it, but we have no lock on it (so * lock_match() fails) and we spin around real_lookup(). */ -static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, +static int ll_dcompare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 8e52722266fe..c1cb6b19e724 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -781,7 +781,7 @@ static int sa_args_init(struct inode *dir, struct inode *child, struct ll_sa_entry *entry, struct md_enqueue_info **pmi, struct ldlm_enqueue_info **pei) { - struct qstr *qstr = &entry->se_qstr; + const struct qstr *qstr = &entry->se_qstr; struct ll_inode_info *lli = ll_i2info(dir); struct md_enqueue_info *minfo; struct ldlm_enqueue_info *einfo; @@ -1340,7 +1340,7 @@ enum { static int is_first_dirent(struct inode *dir, struct dentry *dentry) { struct ll_dir_chain chain; - struct qstr *target = &dentry->d_name; + const struct qstr *target = &dentry->d_name; struct page *page; __u64 pos = 0; int dot_de; diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig index cae42e56f270..7292f23954df 100644 --- a/drivers/staging/media/Kconfig +++ b/drivers/staging/media/Kconfig @@ -16,7 +16,7 @@ menuconfig STAGING_MEDIA If in doubt, say N here. -if STAGING_MEDIA +if STAGING_MEDIA && MEDIA_SUPPORT # Please keep them in alphabetic order source "drivers/staging/media/bcm2048/Kconfig" diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 9fffddb7ac7e..b2393bbacb26 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1252,7 +1252,7 @@ int __cec_s_log_addrs(struct cec_adapter *adap, return -EINVAL; } /* Zero unused part of the feature array */ - memset(features + i, 0, feature_sz - i); + memset(features + i + 1, 0, feature_sz - i - 1); } if (log_addrs->cec_version >= CEC_OP_CEC_VERSION_2_0) { diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 50f3d3a0dd7b..39b928c2849d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -492,7 +492,8 @@ void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD); spin_lock_bh(&conn->cmd_lock); - if (!list_empty(&cmd->i_conn_node)) + if (!list_empty(&cmd->i_conn_node) && + !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP)) list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); @@ -4034,6 +4035,7 @@ int iscsi_target_rx_thread(void *arg) static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) { + LIST_HEAD(tmp_list); struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL; struct iscsi_session *sess = conn->sess; /* @@ -4042,18 +4044,26 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) * has been reset -> returned sleeping pre-handler state. */ spin_lock_bh(&conn->cmd_lock); - list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) { + list_splice_init(&conn->conn_cmd_list, &tmp_list); + list_for_each_entry(cmd, &tmp_list, i_conn_node) { + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (se_cmd->se_tfo != NULL) { + spin_lock(&se_cmd->t_state_lock); + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + spin_unlock(&se_cmd->t_state_lock); + } + } + spin_unlock_bh(&conn->cmd_lock); + + list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) { list_del_init(&cmd->i_conn_node); - spin_unlock_bh(&conn->cmd_lock); iscsit_increment_maxcmdsn(cmd, sess); - iscsit_free_cmd(cmd, true); - spin_lock_bh(&conn->cmd_lock); } - spin_unlock_bh(&conn->cmd_lock); } static void iscsit_stop_timers_for_cmds( diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index b5212f0f9571..adf419fa4291 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1371,8 +1371,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) } login->zero_tsih = zero_tsih; - conn->sess->se_sess->sup_prot_ops = - conn->conn_transport->iscsit_get_sup_prot_ops(conn); + if (conn->sess) + conn->sess->se_sess->sup_prot_ops = + conn->conn_transport->iscsit_get_sup_prot_ops(conn); tpg = conn->tpg; if (!tpg) { diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index a4046ca6e60d..6b423485c5d6 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -821,13 +821,15 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) * in ATA and we need to set TPE=1 */ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, - struct request_queue *q, int block_size) + struct request_queue *q) { + int block_size = queue_logical_block_size(q); + if (!blk_queue_discard(q)) return false; - attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) / - block_size; + attrib->max_unmap_lba_count = + q->limits.max_discard_sectors >> (ilog2(block_size) - 9); /* * Currently hardcoded to 1 in Linux/SCSI code.. */ diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 75f0f08b2a34..d545993df18b 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -161,8 +161,7 @@ static int fd_configure_device(struct se_device *dev) dev_size, div_u64(dev_size, fd_dev->fd_block_size), fd_dev->fd_block_size); - if (target_configure_unmap_from_queue(&dev->dev_attrib, q, - fd_dev->fd_block_size)) + if (target_configure_unmap_from_queue(&dev->dev_attrib, q)) pr_debug("IFILE: BLOCK Discard support available," " disabled by default\n"); /* @@ -523,7 +522,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, */ if (cmd->data_length > FD_MAX_BYTES) { pr_err("FILEIO: Not able to process I/O of %u bytes due to" - "FD_MAX_BYTES: %u iovec count limitiation\n", + "FD_MAX_BYTES: %u iovec count limitation\n", cmd->data_length, FD_MAX_BYTES); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 22af12f8b8eb..47cf6c977367 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -121,8 +121,7 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; - if (target_configure_unmap_from_queue(&dev->dev_attrib, q, - dev->dev_attrib.hw_block_size)) + if (target_configure_unmap_from_queue(&dev->dev_attrib, q)) pr_debug("IBLOCK: BLOCK Discard support available," " disabled by default\n"); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index fc91e85f54ba..e2c970a9d61c 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -146,6 +146,7 @@ sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); bool target_check_wce(struct se_device *dev); bool target_check_fua(struct se_device *dev); +void __target_execute_cmd(struct se_cmd *, bool); /* target_core_stat.c */ void target_stat_setup_dev_default_groups(struct se_device *); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index a9057aa07176..04f616b3ba0a 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -602,7 +602,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; spin_unlock_irq(&cmd->t_state_lock); - __target_execute_cmd(cmd); + __target_execute_cmd(cmd, false); kfree(buf); return ret; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 5ab3967dda43..6094a6beddde 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -754,7 +754,15 @@ EXPORT_SYMBOL(target_complete_cmd); void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) { - if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) { + if (scsi_status != SAM_STAT_GOOD) { + return; + } + + /* + * Calculate new residual count based upon length of SCSI data + * transferred. + */ + if (length < cmd->data_length) { if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { cmd->residual_count += cmd->data_length - length; } else { @@ -763,6 +771,12 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len } cmd->data_length = length; + } else if (length > cmd->data_length) { + cmd->se_cmd_flags |= SCF_OVERFLOW_BIT; + cmd->residual_count = length - cmd->data_length; + } else { + cmd->se_cmd_flags &= ~(SCF_OVERFLOW_BIT | SCF_UNDERFLOW_BIT); + cmd->residual_count = 0; } target_complete_cmd(cmd, scsi_status); @@ -1303,23 +1317,6 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) trace_target_sequencer_start(cmd); - /* - * Check for an existing UNIT ATTENTION condition - */ - ret = target_scsi3_ua_check(cmd); - if (ret) - return ret; - - ret = target_alua_state_check(cmd); - if (ret) - return ret; - - ret = target_check_reservation(cmd); - if (ret) { - cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT; - return ret; - } - ret = dev->transport->parse_cdb(cmd); if (ret == TCM_UNSUPPORTED_SCSI_OPCODE) pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n", @@ -1761,20 +1758,45 @@ queue_full: } EXPORT_SYMBOL(transport_generic_request_failure); -void __target_execute_cmd(struct se_cmd *cmd) +void __target_execute_cmd(struct se_cmd *cmd, bool do_checks) { sense_reason_t ret; - if (cmd->execute_cmd) { - ret = cmd->execute_cmd(cmd); - if (ret) { - spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); - spin_unlock_irq(&cmd->t_state_lock); + if (!cmd->execute_cmd) { + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto err; + } + if (do_checks) { + /* + * Check for an existing UNIT ATTENTION condition after + * target_handle_task_attr() has done SAM task attr + * checking, and possibly have already defered execution + * out to target_restart_delayed_cmds() context. + */ + ret = target_scsi3_ua_check(cmd); + if (ret) + goto err; - transport_generic_request_failure(cmd, ret); + ret = target_alua_state_check(cmd); + if (ret) + goto err; + + ret = target_check_reservation(cmd); + if (ret) { + cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT; + goto err; } } + + ret = cmd->execute_cmd(cmd); + if (!ret) + return; +err: + spin_lock_irq(&cmd->t_state_lock); + cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); + spin_unlock_irq(&cmd->t_state_lock); + + transport_generic_request_failure(cmd, ret); } static int target_write_prot_action(struct se_cmd *cmd) @@ -1819,6 +1841,8 @@ static bool target_handle_task_attr(struct se_cmd *cmd) if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return false; + cmd->se_cmd_flags |= SCF_TASK_ATTR_SET; + /* * Check for the existence of HEAD_OF_QUEUE, and if true return 1 * to allow the passed struct se_cmd list of tasks to the front of the list. @@ -1899,7 +1923,7 @@ void target_execute_cmd(struct se_cmd *cmd) return; } - __target_execute_cmd(cmd); + __target_execute_cmd(cmd, true); } EXPORT_SYMBOL(target_execute_cmd); @@ -1923,7 +1947,7 @@ static void target_restart_delayed_cmds(struct se_device *dev) list_del(&cmd->se_delayed_node); spin_unlock(&dev->delayed_cmd_lock); - __target_execute_cmd(cmd); + __target_execute_cmd(cmd, true); if (cmd->sam_task_attr == TCM_ORDERED_TAG) break; @@ -1941,6 +1965,9 @@ static void transport_complete_task_attr(struct se_cmd *cmd) if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return; + if (!(cmd->se_cmd_flags & SCF_TASK_ATTR_SET)) + goto restart; + if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { atomic_dec_mb(&dev->simple_cmds); dev->dev_cur_ordered_id++; @@ -1957,7 +1984,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n", dev->dev_cur_ordered_id); } - +restart: target_restart_delayed_cmds(dev); } @@ -2557,15 +2584,10 @@ static void target_release_cmd_kref(struct kref *kref) bool fabric_stop; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); - if (list_empty(&se_cmd->se_cmd_list)) { - spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); - target_free_cmd_mem(se_cmd); - se_cmd->se_tfo->release_cmd(se_cmd); - return; - } spin_lock(&se_cmd->t_state_lock); - fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP); + fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) && + (se_cmd->transport_state & CMD_T_ABORTED); spin_unlock(&se_cmd->t_state_lock); if (se_cmd->cmd_wait_set || fabric_stop) { diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index f5186a744399..6ffbb603d912 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -91,6 +91,7 @@ static void ft_tport_delete(struct ft_tport *tport) ft_sess_delete_all(tport); lport = tport->lport; + lport->service_params &= ~FCP_SPPF_TARG_FCN; BUG_ON(tport != lport->prov[FC_TYPE_FCP]); RCU_INIT_POINTER(lport->prov[FC_TYPE_FCP], NULL); @@ -110,6 +111,7 @@ void ft_lport_add(struct fc_lport *lport, void *arg) { mutex_lock(&ft_lport_lock); ft_tport_get(lport); + lport->service_params |= FCP_SPPF_TARG_FCN; mutex_unlock(&ft_lport_lock); } diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index 1519d2ca7705..73137f4aac20 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -54,7 +54,7 @@ struct ar933x_uart_port { static inline bool ar933x_uart_console_enabled(void) { - return config_enabled(CONFIG_SERIAL_AR933X_CONSOLE); + return IS_ENABLED(CONFIG_SERIAL_AR933X_CONSOLE); } static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up, @@ -636,7 +636,7 @@ static int ar933x_uart_probe(struct platform_device *pdev) int ret; np = pdev->dev.of_node; - if (config_enabled(CONFIG_OF) && np) { + if (IS_ENABLED(CONFIG_OF) && np) { id = of_alias_get_id(np, "serial"); if (id < 0) { dev_err(&pdev->dev, "unable to get alias id, err=%d\n", diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c10972fcc8e4..4fd041bec332 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -387,7 +387,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) * need to have the registers polled during D3, so avoid D3cold. */ if (xhci->quirks & XHCI_COMP_MODE_QUIRK) - pdev->no_d3cold = true; + pci_d3cold_disable(pdev); if (xhci->quirks & XHCI_PME_STUCK_QUIRK) xhci_pme_quirk(hcd); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 96a70789b4c2..4d6a5c672a3d 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -496,12 +496,10 @@ static int cp210x_write_reg_block(struct usb_serial_port *port, u8 req, void *dmabuf; int result; - dmabuf = kmalloc(bufsize, GFP_KERNEL); + dmabuf = kmemdup(buf, bufsize, GFP_KERNEL); if (!dmabuf) return -ENOMEM; - memcpy(dmabuf, buf, bufsize); - result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), req, REQTYPE_HOST_TO_INTERFACE, 0, port_priv->bInterfaceNumber, dmabuf, bufsize, diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index ae8c0365abd6..944de657a07a 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -350,6 +350,7 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb) struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned long flags; + int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { @@ -360,22 +361,22 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb) dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i, urb->actual_length); - switch (urb->status) { + switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; case -EPIPE: dev_err(&port->dev, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status: %d\n", - __func__, urb->status); + __func__, status); goto resubmit; } @@ -399,6 +400,7 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb) { unsigned long flags; struct usb_serial_port *port = urb->context; + int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { @@ -410,22 +412,22 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb) set_bit(i, &port->write_urbs_free); spin_unlock_irqrestore(&port->lock, flags); - switch (urb->status) { + switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; case -EPIPE: dev_err_console(port, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; default: dev_err_console(port, "%s - nonzero urb status: %d\n", - __func__, urb->status); + __func__, status); goto resubmit; } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d96d423d00e6..8e07536c233a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 +#define TELIT_PRODUCT_LE910_USBCFG4 0x1206 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 @@ -1198,6 +1199,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), .driver_info = (kernel_ulong_t)&telit_le920_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e7dbbef2af2a..07b4bf01061d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -1,5 +1,4 @@ -/* vi: ts=8 sw=8 - * +/* * TI 3410/5052 USB Serial Driver * * Copyright (C) 2004 Texas Instruments @@ -35,9 +34,238 @@ #include <linux/usb.h> #include <linux/usb/serial.h> -#include "ti_usb_3410_5052.h" - -/* Defines */ +/* Configuration ids */ +#define TI_BOOT_CONFIG 1 +#define TI_ACTIVE_CONFIG 2 + +/* Vendor and product ids */ +#define TI_VENDOR_ID 0x0451 +#define IBM_VENDOR_ID 0x04b3 +#define TI_3410_PRODUCT_ID 0x3410 +#define IBM_4543_PRODUCT_ID 0x4543 +#define IBM_454B_PRODUCT_ID 0x454b +#define IBM_454C_PRODUCT_ID 0x454c +#define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ +#define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ +#define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ +#define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */ +#define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */ +#define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */ + +/* Multi-Tech vendor and product ids */ +#define MTS_VENDOR_ID 0x06E0 +#define MTS_GSM_NO_FW_PRODUCT_ID 0xF108 +#define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109 +#define MTS_CDMA_PRODUCT_ID 0xF110 +#define MTS_GSM_PRODUCT_ID 0xF111 +#define MTS_EDGE_PRODUCT_ID 0xF112 +#define MTS_MT9234MU_PRODUCT_ID 0xF114 +#define MTS_MT9234ZBA_PRODUCT_ID 0xF115 +#define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319 + +/* Abbott Diabetics vendor and product ids */ +#define ABBOTT_VENDOR_ID 0x1a61 +#define ABBOTT_STEREO_PLUG_ID 0x3410 +#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID +#define ABBOTT_STRIP_PORT_ID 0x3420 + +/* Honeywell vendor and product IDs */ +#define HONEYWELL_VENDOR_ID 0x10ac +#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ + +/* Moxa UPORT 11x0 vendor and product IDs */ +#define MXU1_VENDOR_ID 0x110a +#define MXU1_1110_PRODUCT_ID 0x1110 +#define MXU1_1130_PRODUCT_ID 0x1130 +#define MXU1_1150_PRODUCT_ID 0x1150 +#define MXU1_1151_PRODUCT_ID 0x1151 +#define MXU1_1131_PRODUCT_ID 0x1131 + +/* Commands */ +#define TI_GET_VERSION 0x01 +#define TI_GET_PORT_STATUS 0x02 +#define TI_GET_PORT_DEV_INFO 0x03 +#define TI_GET_CONFIG 0x04 +#define TI_SET_CONFIG 0x05 +#define TI_OPEN_PORT 0x06 +#define TI_CLOSE_PORT 0x07 +#define TI_START_PORT 0x08 +#define TI_STOP_PORT 0x09 +#define TI_TEST_PORT 0x0A +#define TI_PURGE_PORT 0x0B +#define TI_RESET_EXT_DEVICE 0x0C +#define TI_WRITE_DATA 0x80 +#define TI_READ_DATA 0x81 +#define TI_REQ_TYPE_CLASS 0x82 + +/* Module identifiers */ +#define TI_I2C_PORT 0x01 +#define TI_IEEE1284_PORT 0x02 +#define TI_UART1_PORT 0x03 +#define TI_UART2_PORT 0x04 +#define TI_RAM_PORT 0x05 + +/* Modem status */ +#define TI_MSR_DELTA_CTS 0x01 +#define TI_MSR_DELTA_DSR 0x02 +#define TI_MSR_DELTA_RI 0x04 +#define TI_MSR_DELTA_CD 0x08 +#define TI_MSR_CTS 0x10 +#define TI_MSR_DSR 0x20 +#define TI_MSR_RI 0x40 +#define TI_MSR_CD 0x80 +#define TI_MSR_DELTA_MASK 0x0F +#define TI_MSR_MASK 0xF0 + +/* Line status */ +#define TI_LSR_OVERRUN_ERROR 0x01 +#define TI_LSR_PARITY_ERROR 0x02 +#define TI_LSR_FRAMING_ERROR 0x04 +#define TI_LSR_BREAK 0x08 +#define TI_LSR_ERROR 0x0F +#define TI_LSR_RX_FULL 0x10 +#define TI_LSR_TX_EMPTY 0x20 + +/* Line control */ +#define TI_LCR_BREAK 0x40 + +/* Modem control */ +#define TI_MCR_LOOP 0x04 +#define TI_MCR_DTR 0x10 +#define TI_MCR_RTS 0x20 + +/* Mask settings */ +#define TI_UART_ENABLE_RTS_IN 0x0001 +#define TI_UART_DISABLE_RTS 0x0002 +#define TI_UART_ENABLE_PARITY_CHECKING 0x0008 +#define TI_UART_ENABLE_DSR_OUT 0x0010 +#define TI_UART_ENABLE_CTS_OUT 0x0020 +#define TI_UART_ENABLE_X_OUT 0x0040 +#define TI_UART_ENABLE_XA_OUT 0x0080 +#define TI_UART_ENABLE_X_IN 0x0100 +#define TI_UART_ENABLE_DTR_IN 0x0800 +#define TI_UART_DISABLE_DTR 0x1000 +#define TI_UART_ENABLE_MS_INTS 0x2000 +#define TI_UART_ENABLE_AUTO_START_DMA 0x4000 + +/* Parity */ +#define TI_UART_NO_PARITY 0x00 +#define TI_UART_ODD_PARITY 0x01 +#define TI_UART_EVEN_PARITY 0x02 +#define TI_UART_MARK_PARITY 0x03 +#define TI_UART_SPACE_PARITY 0x04 + +/* Stop bits */ +#define TI_UART_1_STOP_BITS 0x00 +#define TI_UART_1_5_STOP_BITS 0x01 +#define TI_UART_2_STOP_BITS 0x02 + +/* Bits per character */ +#define TI_UART_5_DATA_BITS 0x00 +#define TI_UART_6_DATA_BITS 0x01 +#define TI_UART_7_DATA_BITS 0x02 +#define TI_UART_8_DATA_BITS 0x03 + +/* 232/485 modes */ +#define TI_UART_232 0x00 +#define TI_UART_485_RECEIVER_DISABLED 0x01 +#define TI_UART_485_RECEIVER_ENABLED 0x02 + +/* Pipe transfer mode and timeout */ +#define TI_PIPE_MODE_CONTINUOUS 0x01 +#define TI_PIPE_MODE_MASK 0x03 +#define TI_PIPE_TIMEOUT_MASK 0x7C +#define TI_PIPE_TIMEOUT_ENABLE 0x80 + +/* Config struct */ +struct ti_uart_config { + __u16 wBaudRate; + __u16 wFlags; + __u8 bDataBits; + __u8 bParity; + __u8 bStopBits; + char cXon; + char cXoff; + __u8 bUartMode; +} __packed; + +/* Get port status */ +struct ti_port_status { + __u8 bCmdCode; + __u8 bModuleId; + __u8 bErrorCode; + __u8 bMSR; + __u8 bLSR; +} __packed; + +/* Purge modes */ +#define TI_PURGE_OUTPUT 0x00 +#define TI_PURGE_INPUT 0x80 + +/* Read/Write data */ +#define TI_RW_DATA_ADDR_SFR 0x10 +#define TI_RW_DATA_ADDR_IDATA 0x20 +#define TI_RW_DATA_ADDR_XDATA 0x30 +#define TI_RW_DATA_ADDR_CODE 0x40 +#define TI_RW_DATA_ADDR_GPIO 0x50 +#define TI_RW_DATA_ADDR_I2C 0x60 +#define TI_RW_DATA_ADDR_FLASH 0x70 +#define TI_RW_DATA_ADDR_DSP 0x80 + +#define TI_RW_DATA_UNSPECIFIED 0x00 +#define TI_RW_DATA_BYTE 0x01 +#define TI_RW_DATA_WORD 0x02 +#define TI_RW_DATA_DOUBLE_WORD 0x04 + +struct ti_write_data_bytes { + __u8 bAddrType; + __u8 bDataType; + __u8 bDataCounter; + __be16 wBaseAddrHi; + __be16 wBaseAddrLo; + __u8 bData[0]; +} __packed; + +struct ti_read_data_request { + __u8 bAddrType; + __u8 bDataType; + __u8 bDataCounter; + __be16 wBaseAddrHi; + __be16 wBaseAddrLo; +} __packed; + +struct ti_read_data_bytes { + __u8 bCmdCode; + __u8 bModuleId; + __u8 bErrorCode; + __u8 bData[0]; +} __packed; + +/* Interrupt struct */ +struct ti_interrupt { + __u8 bICode; + __u8 bIInfo; +} __packed; + +/* Interrupt codes */ +#define TI_CODE_HARDWARE_ERROR 0xFF +#define TI_CODE_DATA_ERROR 0x03 +#define TI_CODE_MODEM_STATUS 0x04 + +/* Download firmware max packet size */ +#define TI_DOWNLOAD_MAX_PACKET_SIZE 64 + +/* Firmware image header */ +struct ti_firmware_header { + __le16 wLength; + __u8 bCheckSum; +} __packed; + +/* UART addresses */ +#define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */ +#define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */ +#define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */ +#define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */ #define TI_DRIVER_AUTHOR "Al Borchers <alborchers@steinerpoint.com>" #define TI_DRIVER_DESC "TI USB 3410/5052 Serial Driver" @@ -58,9 +286,6 @@ #define TI_EXTRA_VID_PID_COUNT 5 - -/* Structures */ - struct ti_port { int tp_is_open; __u8 tp_msr; @@ -84,9 +309,6 @@ struct ti_device { int td_urb_error; }; - -/* Function Declarations */ - static int ti_startup(struct usb_serial *serial); static void ti_release(struct usb_serial *serial); static int ti_port_probe(struct usb_serial_port *port); @@ -136,13 +358,8 @@ static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev, static int ti_download_firmware(struct ti_device *tdev); - -/* Data */ - -/* module parameters */ static int closing_wait = TI_DEFAULT_CLOSING_WAIT; -/* supported devices */ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, @@ -174,7 +391,7 @@ static const struct usb_device_id ti_id_table_5052[] = { { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, - { } /* terminator */ + { } }; static const struct usb_device_id ti_id_table_combined[] = { @@ -275,8 +492,6 @@ static struct usb_serial_driver * const serial_drivers[] = { &ti_1port_device, &ti_2port_device, NULL }; -/* Module */ - MODULE_AUTHOR(TI_DRIVER_AUTHOR); MODULE_DESCRIPTION(TI_DRIVER_DESC); MODULE_LICENSE("GPL"); @@ -302,8 +517,6 @@ MODULE_DEVICE_TABLE(usb, ti_id_table_combined); module_usb_serial_driver(serial_drivers, ti_id_table_combined); -/* Functions */ - static int ti_startup(struct usb_serial *serial) { struct ti_device *tdev; @@ -319,7 +532,6 @@ static int ti_startup(struct usb_serial *serial) dev->descriptor.bNumConfigurations, dev->actconfig->desc.bConfigurationValue); - /* create device structure */ tdev = kzalloc(sizeof(struct ti_device), GFP_KERNEL); if (!tdev) return -ENOMEM; @@ -435,7 +647,7 @@ static int ti_open(struct tty_struct *tty, struct usb_serial_port *port) struct urb *urb; int port_number; int status; - __u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINOUS | + __u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINUOUS | TI_PIPE_TIMEOUT_ENABLE | (TI_TRANSFER_TIMEOUT << 2)); @@ -954,6 +1166,15 @@ static void ti_break(struct tty_struct *tty, int break_state) dev_dbg(&port->dev, "%s - error setting break, %d\n", __func__, status); } +static int ti_get_port_from_code(unsigned char code) +{ + return (code >> 4) - 3; +} + +static int ti_get_func_from_code(unsigned char code) +{ + return code & 0x0f; +} static void ti_interrupt_callback(struct urb *urb) { @@ -995,8 +1216,8 @@ static void ti_interrupt_callback(struct urb *urb) goto exit; } - port_number = TI_GET_PORT_FROM_CODE(data[0]); - function = TI_GET_FUNC_FROM_CODE(data[0]); + port_number = ti_get_port_from_code(data[0]); + function = ti_get_func_from_code(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, data 0x%02X\n", __func__, port_number, function, data[1]); diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h deleted file mode 100644 index bbfd3a184600..000000000000 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ /dev/null @@ -1,259 +0,0 @@ -/* vi: ts=8 sw=8 - * - * TI 3410/5052 USB Serial Driver Header - * - * Copyright (C) 2004 Texas Instruments - * - * This driver is based on the Linux io_ti driver, which is - * Copyright (C) 2000-2002 Inside Out Networks - * Copyright (C) 2001-2002 Greg Kroah-Hartman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * For questions or problems with this driver, contact Texas Instruments - * technical support, or Al Borchers <alborchers@steinerpoint.com>, or - * Peter Berger <pberger@brimson.com>. - */ - -#ifndef _TI_3410_5052_H_ -#define _TI_3410_5052_H_ - -/* Configuration ids */ -#define TI_BOOT_CONFIG 1 -#define TI_ACTIVE_CONFIG 2 - -/* Vendor and product ids */ -#define TI_VENDOR_ID 0x0451 -#define IBM_VENDOR_ID 0x04b3 -#define TI_3410_PRODUCT_ID 0x3410 -#define IBM_4543_PRODUCT_ID 0x4543 -#define IBM_454B_PRODUCT_ID 0x454b -#define IBM_454C_PRODUCT_ID 0x454c -#define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ -#define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ -#define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ -#define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */ -#define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */ -#define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */ - -/* Multi-Tech vendor and product ids */ -#define MTS_VENDOR_ID 0x06E0 -#define MTS_GSM_NO_FW_PRODUCT_ID 0xF108 -#define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109 -#define MTS_CDMA_PRODUCT_ID 0xF110 -#define MTS_GSM_PRODUCT_ID 0xF111 -#define MTS_EDGE_PRODUCT_ID 0xF112 -#define MTS_MT9234MU_PRODUCT_ID 0xF114 -#define MTS_MT9234ZBA_PRODUCT_ID 0xF115 -#define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319 - -/* Abbott Diabetics vendor and product ids */ -#define ABBOTT_VENDOR_ID 0x1a61 -#define ABBOTT_STEREO_PLUG_ID 0x3410 -#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID -#define ABBOTT_STRIP_PORT_ID 0x3420 - -/* Honeywell vendor and product IDs */ -#define HONEYWELL_VENDOR_ID 0x10ac -#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ - -/* Moxa UPORT 11x0 vendor and product IDs */ -#define MXU1_VENDOR_ID 0x110a -#define MXU1_1110_PRODUCT_ID 0x1110 -#define MXU1_1130_PRODUCT_ID 0x1130 -#define MXU1_1131_PRODUCT_ID 0x1131 -#define MXU1_1150_PRODUCT_ID 0x1150 -#define MXU1_1151_PRODUCT_ID 0x1151 - -/* Commands */ -#define TI_GET_VERSION 0x01 -#define TI_GET_PORT_STATUS 0x02 -#define TI_GET_PORT_DEV_INFO 0x03 -#define TI_GET_CONFIG 0x04 -#define TI_SET_CONFIG 0x05 -#define TI_OPEN_PORT 0x06 -#define TI_CLOSE_PORT 0x07 -#define TI_START_PORT 0x08 -#define TI_STOP_PORT 0x09 -#define TI_TEST_PORT 0x0A -#define TI_PURGE_PORT 0x0B -#define TI_RESET_EXT_DEVICE 0x0C -#define TI_WRITE_DATA 0x80 -#define TI_READ_DATA 0x81 -#define TI_REQ_TYPE_CLASS 0x82 - -/* Module identifiers */ -#define TI_I2C_PORT 0x01 -#define TI_IEEE1284_PORT 0x02 -#define TI_UART1_PORT 0x03 -#define TI_UART2_PORT 0x04 -#define TI_RAM_PORT 0x05 - -/* Modem status */ -#define TI_MSR_DELTA_CTS 0x01 -#define TI_MSR_DELTA_DSR 0x02 -#define TI_MSR_DELTA_RI 0x04 -#define TI_MSR_DELTA_CD 0x08 -#define TI_MSR_CTS 0x10 -#define TI_MSR_DSR 0x20 -#define TI_MSR_RI 0x40 -#define TI_MSR_CD 0x80 -#define TI_MSR_DELTA_MASK 0x0F -#define TI_MSR_MASK 0xF0 - -/* Line status */ -#define TI_LSR_OVERRUN_ERROR 0x01 -#define TI_LSR_PARITY_ERROR 0x02 -#define TI_LSR_FRAMING_ERROR 0x04 -#define TI_LSR_BREAK 0x08 -#define TI_LSR_ERROR 0x0F -#define TI_LSR_RX_FULL 0x10 -#define TI_LSR_TX_EMPTY 0x20 - -/* Line control */ -#define TI_LCR_BREAK 0x40 - -/* Modem control */ -#define TI_MCR_LOOP 0x04 -#define TI_MCR_DTR 0x10 -#define TI_MCR_RTS 0x20 - -/* Mask settings */ -#define TI_UART_ENABLE_RTS_IN 0x0001 -#define TI_UART_DISABLE_RTS 0x0002 -#define TI_UART_ENABLE_PARITY_CHECKING 0x0008 -#define TI_UART_ENABLE_DSR_OUT 0x0010 -#define TI_UART_ENABLE_CTS_OUT 0x0020 -#define TI_UART_ENABLE_X_OUT 0x0040 -#define TI_UART_ENABLE_XA_OUT 0x0080 -#define TI_UART_ENABLE_X_IN 0x0100 -#define TI_UART_ENABLE_DTR_IN 0x0800 -#define TI_UART_DISABLE_DTR 0x1000 -#define TI_UART_ENABLE_MS_INTS 0x2000 -#define TI_UART_ENABLE_AUTO_START_DMA 0x4000 - -/* Parity */ -#define TI_UART_NO_PARITY 0x00 -#define TI_UART_ODD_PARITY 0x01 -#define TI_UART_EVEN_PARITY 0x02 -#define TI_UART_MARK_PARITY 0x03 -#define TI_UART_SPACE_PARITY 0x04 - -/* Stop bits */ -#define TI_UART_1_STOP_BITS 0x00 -#define TI_UART_1_5_STOP_BITS 0x01 -#define TI_UART_2_STOP_BITS 0x02 - -/* Bits per character */ -#define TI_UART_5_DATA_BITS 0x00 -#define TI_UART_6_DATA_BITS 0x01 -#define TI_UART_7_DATA_BITS 0x02 -#define TI_UART_8_DATA_BITS 0x03 - -/* 232/485 modes */ -#define TI_UART_232 0x00 -#define TI_UART_485_RECEIVER_DISABLED 0x01 -#define TI_UART_485_RECEIVER_ENABLED 0x02 - -/* Pipe transfer mode and timeout */ -#define TI_PIPE_MODE_CONTINOUS 0x01 -#define TI_PIPE_MODE_MASK 0x03 -#define TI_PIPE_TIMEOUT_MASK 0x7C -#define TI_PIPE_TIMEOUT_ENABLE 0x80 - -/* Config struct */ -struct ti_uart_config { - __u16 wBaudRate; - __u16 wFlags; - __u8 bDataBits; - __u8 bParity; - __u8 bStopBits; - char cXon; - char cXoff; - __u8 bUartMode; -} __attribute__((packed)); - -/* Get port status */ -struct ti_port_status { - __u8 bCmdCode; - __u8 bModuleId; - __u8 bErrorCode; - __u8 bMSR; - __u8 bLSR; -} __attribute__((packed)); - -/* Purge modes */ -#define TI_PURGE_OUTPUT 0x00 -#define TI_PURGE_INPUT 0x80 - -/* Read/Write data */ -#define TI_RW_DATA_ADDR_SFR 0x10 -#define TI_RW_DATA_ADDR_IDATA 0x20 -#define TI_RW_DATA_ADDR_XDATA 0x30 -#define TI_RW_DATA_ADDR_CODE 0x40 -#define TI_RW_DATA_ADDR_GPIO 0x50 -#define TI_RW_DATA_ADDR_I2C 0x60 -#define TI_RW_DATA_ADDR_FLASH 0x70 -#define TI_RW_DATA_ADDR_DSP 0x80 - -#define TI_RW_DATA_UNSPECIFIED 0x00 -#define TI_RW_DATA_BYTE 0x01 -#define TI_RW_DATA_WORD 0x02 -#define TI_RW_DATA_DOUBLE_WORD 0x04 - -struct ti_write_data_bytes { - __u8 bAddrType; - __u8 bDataType; - __u8 bDataCounter; - __be16 wBaseAddrHi; - __be16 wBaseAddrLo; - __u8 bData[0]; -} __attribute__((packed)); - -struct ti_read_data_request { - __u8 bAddrType; - __u8 bDataType; - __u8 bDataCounter; - __be16 wBaseAddrHi; - __be16 wBaseAddrLo; -} __attribute__((packed)); - -struct ti_read_data_bytes { - __u8 bCmdCode; - __u8 bModuleId; - __u8 bErrorCode; - __u8 bData[0]; -} __attribute__((packed)); - -/* Interrupt struct */ -struct ti_interrupt { - __u8 bICode; - __u8 bIInfo; -} __attribute__((packed)); - -/* Interrupt codes */ -#define TI_GET_PORT_FROM_CODE(c) (((c) >> 4) - 3) -#define TI_GET_FUNC_FROM_CODE(c) ((c) & 0x0f) -#define TI_CODE_HARDWARE_ERROR 0xFF -#define TI_CODE_DATA_ERROR 0x03 -#define TI_CODE_MODEM_STATUS 0x04 - -/* Download firmware max packet size */ -#define TI_DOWNLOAD_MAX_PACKET_SIZE 64 - -/* Firmware image header */ -struct ti_firmware_header { - __le16 wLength; - __u8 bCheckSum; -} __attribute__((packed)); - -/* UART addresses */ -#define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */ -#define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */ -#define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */ -#define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */ - -#endif /* _TI_3410_5052_H_ */ diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 533eaf04f12f..40764ecad9ce 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -2,7 +2,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) select VHOST - select VHOST_RING ---help--- This kernel module can be loaded in host kernel to accelerate guest networking with virtio_net. Not to be confused with virtio_net @@ -15,17 +14,24 @@ config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD && m select VHOST - select VHOST_RING default n ---help--- Say M here to enable the vhost_scsi TCM fabric module for use with virtio-scsi guests -config VHOST_RING - tristate +config VHOST_VSOCK + tristate "vhost virtio-vsock driver" + depends on VSOCKETS && EVENTFD + select VIRTIO_VSOCKETS_COMMON + select VHOST + default n ---help--- - This option is selected by any driver which needs to access - the host side of a virtio ring. + This kernel module can be loaded in the host kernel to provide AF_VSOCK + sockets for communicating with guests. The guests must have the + virtio_transport.ko driver loaded to use the virtio-vsock device. + + To compile this driver as a module, choose M here: the module will be called + vhost_vsock. config VHOST tristate diff --git a/drivers/vhost/Kconfig.vringh b/drivers/vhost/Kconfig.vringh new file mode 100644 index 000000000000..6a4490c09d7f --- /dev/null +++ b/drivers/vhost/Kconfig.vringh @@ -0,0 +1,5 @@ +config VHOST_RING + tristate + ---help--- + This option is selected by any driver which needs to access + the host side of a virtio ring. diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index e0441c34db1c..6b012b986b57 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,5 +4,9 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o +obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o +vhost_vsock-y := vsock.o + obj-$(CONFIG_VHOST_RING) += vringh.o + obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e032ca397371..5dc128a8da83 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_IOMMU_PLATFORM) }; enum { @@ -334,7 +335,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, { unsigned long uninitialized_var(endtime); int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - out_num, in_num, NULL, NULL); + out_num, in_num, NULL, NULL); if (r == vq->num && vq->busyloop_timeout) { preempt_disable(); @@ -344,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, cpu_relax_lowlatency(); preempt_enable(); r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - out_num, in_num, NULL, NULL); + out_num, in_num, NULL, NULL); } return r; @@ -377,6 +378,9 @@ static void handle_tx(struct vhost_net *net) if (!sock) goto out; + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -652,6 +656,10 @@ static void handle_rx(struct vhost_net *net) sock = vq->private_data; if (!sock) goto out; + + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); vhost_net_disable_vq(net, vq); @@ -1052,20 +1060,20 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); @@ -1096,10 +1104,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&n->dev)) { - mutex_unlock(&n->dev.mutex); - return -EFAULT; + !vhost_log_access_ok(&n->dev)) + goto out_unlock; + + if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if (vhost_init_device_iotlb(&n->dev, true)) + goto out_unlock; } + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); n->vqs[i].vq.acked_features = features; @@ -1109,6 +1121,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_unlock(&n->dev.mutex); return 0; + +out_unlock: + mutex_unlock(&n->dev.mutex); + return -EFAULT; } static long vhost_net_set_owner(struct vhost_net *n) @@ -1182,9 +1198,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, } #endif +static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_write_iter(dev, from); +} + +static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, + .read_iter = vhost_net_chr_read_iter, + .write_iter = vhost_net_chr_write_iter, + .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vhost_net_compat_ioctl, diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 669fef1e2bb6..c6f2d89c0e97 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,7 @@ #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/interval_tree_generic.h> #include "vhost.h" @@ -34,6 +35,10 @@ static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. (default: 2048)"); enum { VHOST_MEMORY_F_LOG = 0x1, @@ -42,6 +47,10 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) +INTERVAL_TREE_DEFINE(struct vhost_umem_node, + rb, __u64, __subtree_last, + START, LAST, , vhost_umem_interval_tree); + #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { @@ -131,6 +140,19 @@ static void vhost_reset_is_le(struct vhost_virtqueue *vq) vq->is_le = virtio_legacy_is_little_endian(); } +struct vhost_flush_struct { + struct vhost_work work; + struct completion wait_event; +}; + +static void vhost_flush_work(struct vhost_work *work) +{ + struct vhost_flush_struct *s; + + s = container_of(work, struct vhost_flush_struct, work); + complete(&s->wait_event); +} + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -155,11 +177,9 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { - INIT_LIST_HEAD(&work->node); + clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; init_waitqueue_head(&work->done); - work->flushing = 0; - work->queue_seq = work->done_seq = 0; } EXPORT_SYMBOL_GPL(vhost_work_init); @@ -211,31 +231,17 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, - unsigned seq) -{ - int left; - - spin_lock_irq(&dev->work_lock); - left = seq - work->done_seq; - spin_unlock_irq(&dev->work_lock); - return left <= 0; -} - void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { - unsigned seq; - int flushing; + struct vhost_flush_struct flush; - spin_lock_irq(&dev->work_lock); - seq = work->queue_seq; - work->flushing++; - spin_unlock_irq(&dev->work_lock); - wait_event(work->done, vhost_work_seq_done(dev, work, seq)); - spin_lock_irq(&dev->work_lock); - flushing = --work->flushing; - spin_unlock_irq(&dev->work_lock); - BUG_ON(flushing < 0); + if (dev->worker) { + init_completion(&flush.wait_event); + vhost_work_init(&flush.work, vhost_flush_work); + + vhost_work_queue(dev, &flush.work); + wait_for_completion(&flush.wait_event); + } } EXPORT_SYMBOL_GPL(vhost_work_flush); @@ -249,16 +255,16 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - unsigned long flags; + if (!dev->worker) + return; - spin_lock_irqsave(&dev->work_lock, flags); - if (list_empty(&work->node)) { - list_add_tail(&work->node, &dev->work_list); - work->queue_seq++; - spin_unlock_irqrestore(&dev->work_lock, flags); + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { + /* We can only add the work to the list after we're + * sure it was not in the list. + */ + smp_mb(); + llist_add(&work->node, &dev->work_list); wake_up_process(dev->worker); - } else { - spin_unlock_irqrestore(&dev->work_lock, flags); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -266,7 +272,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue); /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { - return !list_empty(&dev->work_list); + return !llist_empty(&dev->work_list); } EXPORT_SYMBOL_GPL(vhost_has_work); @@ -300,17 +306,18 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vq->memory = NULL; vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); vq->busyloop_timeout = 0; + vq->umem = NULL; + vq->iotlb = NULL; } static int vhost_worker(void *data) { struct vhost_dev *dev = data; - struct vhost_work *work = NULL; - unsigned uninitialized_var(seq); + struct vhost_work *work, *work_next; + struct llist_node *node; mm_segment_t oldfs = get_fs(); set_fs(USER_DS); @@ -320,35 +327,25 @@ static int vhost_worker(void *data) /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&dev->work_lock); - if (work) { - work->done_seq = seq; - if (work->flushing) - wake_up_all(&work->done); - } - if (kthread_should_stop()) { - spin_unlock_irq(&dev->work_lock); __set_current_state(TASK_RUNNING); break; } - if (!list_empty(&dev->work_list)) { - work = list_first_entry(&dev->work_list, - struct vhost_work, node); - list_del_init(&work->node); - seq = work->queue_seq; - } else - work = NULL; - spin_unlock_irq(&dev->work_lock); - if (work) { + node = llist_del_all(&dev->work_list); + if (!node) + schedule(); + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); work->fn(work); if (need_resched()) schedule(); - } else - schedule(); - + } } unuse_mm(dev->mm); set_fs(oldfs); @@ -407,11 +404,16 @@ void vhost_dev_init(struct vhost_dev *dev, mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->log_file = NULL; - dev->memory = NULL; + dev->umem = NULL; + dev->iotlb = NULL; dev->mm = NULL; - spin_lock_init(&dev->work_lock); - INIT_LIST_HEAD(&dev->work_list); dev->worker = NULL; + init_llist_head(&dev->work_list); + init_waitqueue_head(&dev->wait); + INIT_LIST_HEAD(&dev->read_list); + INIT_LIST_HEAD(&dev->pending_list); + spin_lock_init(&dev->iotlb_lock); + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -512,27 +514,36 @@ err_mm: } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); -struct vhost_memory *vhost_dev_reset_owner_prepare(void) +static void *vhost_kvzalloc(unsigned long size) { - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + + if (!n) + n = vzalloc(size); + return n; +} + +struct vhost_umem *vhost_dev_reset_owner_prepare(void) +{ + return vhost_kvzalloc(sizeof(struct vhost_umem)); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) { int i; vhost_dev_cleanup(dev, true); /* Restore memory to default empty mapping. */ - memory->nregions = 0; - dev->memory = memory; + INIT_LIST_HEAD(&umem->umem_list); + dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) - dev->vqs[i]->memory = memory; + dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); @@ -549,6 +560,47 @@ void vhost_dev_stop(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_stop); +static void vhost_umem_free(struct vhost_umem *umem, + struct vhost_umem_node *node) +{ + vhost_umem_interval_tree_remove(node, &umem->umem_tree); + list_del(&node->link); + kfree(node); + umem->numem--; +} + +static void vhost_umem_clean(struct vhost_umem *umem) +{ + struct vhost_umem_node *node, *tmp; + + if (!umem) + return; + + list_for_each_entry_safe(node, tmp, &umem->umem_list, link) + vhost_umem_free(umem, node); + + kvfree(umem); +} + +static void vhost_clear_msg(struct vhost_dev *dev) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&dev->iotlb_lock); + + list_for_each_entry_safe(node, n, &dev->read_list, node) { + list_del(&node->node); + kfree(node); + } + + list_for_each_entry_safe(node, n, &dev->pending_list, node) { + list_del(&node->node); + kfree(node); + } + + spin_unlock(&dev->iotlb_lock); +} + /* Caller should have device mutex if and only if locked is set */ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) { @@ -575,9 +627,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kvfree(dev->memory); - dev->memory = NULL; - WARN_ON(!list_empty(&dev->work_list)); + vhost_umem_clean(dev->umem); + dev->umem = NULL; + vhost_umem_clean(dev->iotlb); + dev->iotlb = NULL; + vhost_clear_msg(dev); + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); + WARN_ON(!llist_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; @@ -601,26 +657,34 @@ static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +static bool vhost_overflow(u64 uaddr, u64 size) +{ + /* Make sure 64 bit math will not overflow. */ + return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; +} + /* Caller should have vq mutex and device mutex. */ -static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, +static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, int log_all) { - int i; + struct vhost_umem_node *node; - if (!mem) + if (!umem) return 0; - for (i = 0; i < mem->nregions; ++i) { - struct vhost_memory_region *m = mem->regions + i; - unsigned long a = m->userspace_addr; - if (m->memory_size > ULONG_MAX) + list_for_each_entry(node, &umem->umem_list, link) { + unsigned long a = node->userspace_addr; + + if (vhost_overflow(node->userspace_addr, node->size)) return 0; - else if (!access_ok(VERIFY_WRITE, (void __user *)a, - m->memory_size)) + + + if (!access_ok(VERIFY_WRITE, (void __user *)a, + node->size)) return 0; else if (log_all && !log_access_ok(log_base, - m->guest_phys_addr, - m->memory_size)) + node->start, + node->size)) return 0; } return 1; @@ -628,7 +692,7 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ -static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, +static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, int log_all) { int i; @@ -641,7 +705,8 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) - ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log); + ok = vq_memory_access_ok(d->vqs[i]->log_base, + umem, log); else ok = 1; mutex_unlock(&d->vqs[i]->mutex); @@ -651,12 +716,385 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } +static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct iovec iov[], int iov_size, int access); + +static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to, + const void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_to_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that all vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter t; + ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_WO); + if (ret < 0) + goto out; + iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size); + ret = copy_to_iter(from, size, &t); + if (ret == size) + ret = 0; + } +out: + return ret; +} + +static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, + void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_from_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter f; + ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", from, + (unsigned long long) size); + goto out; + } + iov_iter_init(&f, READ, vq->iotlb_iov, ret, size); + ret = copy_from_iter(to, size, &f); + if (ret == size) + ret = 0; + } + +out: + return ret; +} + +static void __user *__vhost_get_user(struct vhost_virtqueue *vq, + void *addr, unsigned size) +{ + int ret; + + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { + vq_err(vq, "Non atomic userspace memory access: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + return vq->iotlb_iov[0].iov_base; +} + +#define vhost_put_user(vq, x, ptr) \ +({ \ + int ret = -EFAULT; \ + if (!vq->iotlb) { \ + ret = __put_user(x, ptr); \ + } else { \ + __typeof__(ptr) to = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (to != NULL) \ + ret = __put_user(x, to); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +#define vhost_get_user(vq, x, ptr) \ +({ \ + int ret; \ + if (!vq->iotlb) { \ + ret = __get_user(x, ptr); \ + } else { \ + __typeof__(ptr) from = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (from != NULL) \ + ret = __get_user(x, from); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +static void vhost_dev_lock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_lock(&d->vqs[i]->mutex); +} + +static void vhost_dev_unlock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_unlock(&d->vqs[i]->mutex); +} + +static int vhost_new_umem_range(struct vhost_umem *umem, + u64 start, u64 size, u64 end, + u64 userspace_addr, int perm) +{ + struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + + if (!node) + return -ENOMEM; + + if (umem->numem == max_iotlb_entries) { + tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); + vhost_umem_free(umem, tmp); + } + + node->start = start; + node->size = size; + node->last = end; + node->userspace_addr = userspace_addr; + node->perm = perm; + INIT_LIST_HEAD(&node->link); + list_add_tail(&node->link, &umem->umem_list); + vhost_umem_interval_tree_insert(node, &umem->umem_tree); + umem->numem++; + + return 0; +} + +static void vhost_del_umem_range(struct vhost_umem *umem, + u64 start, u64 end) +{ + struct vhost_umem_node *node; + + while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + start, end))) + vhost_umem_free(umem, node); +} + +static void vhost_iotlb_notify_vq(struct vhost_dev *d, + struct vhost_iotlb_msg *msg) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&d->iotlb_lock); + + list_for_each_entry_safe(node, n, &d->pending_list, node) { + struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; + if (msg->iova <= vq_msg->iova && + msg->iova + msg->size - 1 > vq_msg->iova && + vq_msg->type == VHOST_IOTLB_MISS) { + vhost_poll_queue(&node->vq->poll); + list_del(&node->node); + kfree(node); + } + } + + spin_unlock(&d->iotlb_lock); +} + +static int umem_access_ok(u64 uaddr, u64 size, int access) +{ + unsigned long a = uaddr; + + /* Make sure 64 bit math will not overflow. */ + if (vhost_overflow(uaddr, size)) + return -EFAULT; + + if ((access & VHOST_ACCESS_RO) && + !access_ok(VERIFY_READ, (void __user *)a, size)) + return -EFAULT; + if ((access & VHOST_ACCESS_WO) && + !access_ok(VERIFY_WRITE, (void __user *)a, size)) + return -EFAULT; + return 0; +} + +int vhost_process_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *msg) +{ + int ret = 0; + + vhost_dev_lock_vqs(dev); + switch (msg->type) { + case VHOST_IOTLB_UPDATE: + if (!dev->iotlb) { + ret = -EFAULT; + break; + } + if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) { + ret = -EFAULT; + break; + } + if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, + msg->iova + msg->size - 1, + msg->uaddr, msg->perm)) { + ret = -ENOMEM; + break; + } + vhost_iotlb_notify_vq(dev, msg); + break; + case VHOST_IOTLB_INVALIDATE: + vhost_del_umem_range(dev->iotlb, msg->iova, + msg->iova + msg->size - 1); + break; + default: + ret = -EINVAL; + break; + } + + vhost_dev_unlock_vqs(dev); + return ret; +} +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from) +{ + struct vhost_msg_node node; + unsigned size = sizeof(struct vhost_msg); + size_t ret; + int err; + + if (iov_iter_count(from) < size) + return 0; + ret = copy_from_iter(&node.msg, size, from); + if (ret != size) + goto done; + + switch (node.msg.type) { + case VHOST_IOTLB_MSG: + err = vhost_process_iotlb_msg(dev, &node.msg.iotlb); + if (err) + ret = err; + break; + default: + ret = -EINVAL; + break; + } + +done: + return ret; +} +EXPORT_SYMBOL(vhost_chr_write_iter); + +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &dev->wait, wait); + + if (!list_empty(&dev->read_list)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} +EXPORT_SYMBOL(vhost_chr_poll); + +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock) +{ + DEFINE_WAIT(wait); + struct vhost_msg_node *node; + ssize_t ret = 0; + unsigned size = sizeof(struct vhost_msg); + + if (iov_iter_count(to) < size) + return 0; + + while (1) { + if (!noblock) + prepare_to_wait(&dev->wait, &wait, + TASK_INTERRUPTIBLE); + + node = vhost_dequeue_msg(dev, &dev->read_list); + if (node) + break; + if (noblock) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + if (!dev->iotlb) { + ret = -EBADFD; + break; + } + + schedule(); + } + + if (!noblock) + finish_wait(&dev->wait, &wait); + + if (node) { + ret = copy_to_iter(&node->msg, size, to); + + if (ret != size || node->msg.type != VHOST_IOTLB_MISS) { + kfree(node); + return ret; + } + + vhost_enqueue_msg(dev, &dev->pending_list, node); + } + + return ret; +} +EXPORT_SYMBOL_GPL(vhost_chr_read_iter); + +static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) +{ + struct vhost_dev *dev = vq->dev; + struct vhost_msg_node *node; + struct vhost_iotlb_msg *msg; + + node = vhost_new_msg(vq, VHOST_IOTLB_MISS); + if (!node) + return -ENOMEM; + + msg = &node->msg.iotlb; + msg->type = VHOST_IOTLB_MISS; + msg->iova = iova; + msg->perm = access; + + vhost_enqueue_msg(dev, &dev->read_list, node); + + return 0; +} + static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) + { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, sizeof *avail + num * sizeof *avail->ring + s) && @@ -664,11 +1102,59 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, sizeof *used + num * sizeof *used->ring + s); } +static int iotlb_access_ok(struct vhost_virtqueue *vq, + int access, u64 addr, u64 len) +{ + const struct vhost_umem_node *node; + struct vhost_umem *umem = vq->iotlb; + u64 s = 0, size; + + while (len > s) { + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, + addr + len - 1); + if (node == NULL || node->start > addr) { + vhost_iotlb_miss(vq, addr, access); + return false; + } else if (!(node->perm & access)) { + /* Report the possible access violation by + * request another translation from userspace. + */ + return false; + } + + size = node->size - addr + node->start; + s += size; + addr += size; + } + + return true; +} + +int vq_iotlb_prefetch(struct vhost_virtqueue *vq) +{ + size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + unsigned int num = vq->num; + + if (!vq->iotlb) + return 1; + + return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, + num * sizeof *vq->desc) && + iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, + sizeof *vq->avail + + num * sizeof *vq->avail->ring + s) && + iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, + sizeof *vq->used + + num * sizeof *vq->used->ring + s); +} +EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); + /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - return memory_access_ok(dev, dev->memory, 1); + return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); @@ -679,7 +1165,7 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return vq_memory_access_ok(log_base, vq->memory, + return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -690,33 +1176,36 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { + if (vq->iotlb) { + /* When device IOTLB was used, the access validation + * will be validated during prefetching. + */ + return 1; + } return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) && vq_log_access_ok(vq, vq->log_base); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); -static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2) +static struct vhost_umem *vhost_umem_alloc(void) { - const struct vhost_memory_region *r1 = p1, *r2 = p2; - if (r1->guest_phys_addr < r2->guest_phys_addr) - return 1; - if (r1->guest_phys_addr > r2->guest_phys_addr) - return -1; - return 0; -} + struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem)); -static void *vhost_kvzalloc(unsigned long size) -{ - void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!umem) + return NULL; - if (!n) - n = vzalloc(size); - return n; + umem->umem_tree = RB_ROOT; + umem->numem = 0; + INIT_LIST_HEAD(&umem->umem_list); + + return umem; } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { - struct vhost_memory mem, *newmem, *oldmem; + struct vhost_memory mem, *newmem; + struct vhost_memory_region *region; + struct vhost_umem *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; @@ -736,24 +1225,47 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) kvfree(newmem); return -EFAULT; } - sort(newmem->regions, newmem->nregions, sizeof(*newmem->regions), - vhost_memory_reg_sort_cmp, NULL); - if (!memory_access_ok(d, newmem, 0)) { + newumem = vhost_umem_alloc(); + if (!newumem) { kvfree(newmem); - return -EFAULT; + return -ENOMEM; } - oldmem = d->memory; - d->memory = newmem; + + for (region = newmem->regions; + region < newmem->regions + mem.nregions; + region++) { + if (vhost_new_umem_range(newumem, + region->guest_phys_addr, + region->memory_size, + region->guest_phys_addr + + region->memory_size - 1, + region->userspace_addr, + VHOST_ACCESS_RW)) + goto err; + } + + if (!memory_access_ok(d, newumem, 0)) + goto err; + + oldumem = d->umem; + d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); - d->vqs[i]->memory = newmem; + d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } - kvfree(oldmem); + + kvfree(newmem); + vhost_umem_clean(oldumem); return 0; + +err: + vhost_umem_clean(newumem); + kvfree(newmem); + return -EFAULT; } long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) @@ -974,6 +1486,30 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) +{ + struct vhost_umem *niotlb, *oiotlb; + int i; + + niotlb = vhost_umem_alloc(); + if (!niotlb) + return -ENOMEM; + + oiotlb = d->iotlb; + d->iotlb = niotlb; + + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->iotlb = niotlb; + mutex_unlock(&d->vqs[i]->mutex); + } + + vhost_umem_clean(oiotlb); + + return 0; +} +EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); + /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { @@ -1056,28 +1592,6 @@ done: } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); -static const struct vhost_memory_region *find_region(struct vhost_memory *mem, - __u64 addr, __u32 len) -{ - const struct vhost_memory_region *reg; - int start = 0, end = mem->nregions; - - while (start < end) { - int slot = start + (end - start) / 2; - reg = mem->regions + slot; - if (addr >= reg->guest_phys_addr) - end = slot; - else - start = slot + 1; - } - - reg = mem->regions + start; - if (addr >= reg->guest_phys_addr && - reg->guest_phys_addr + reg->memory_size > addr) - return reg; - return NULL; -} - /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/x86/exception-tables.txt. @@ -1156,7 +1670,8 @@ EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; - if (__put_user(cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags) < 0) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), + &vq->used->flags) < 0) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ @@ -1174,7 +1689,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) { - if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq))) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), + vhost_avail_event(vq))) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; @@ -1208,15 +1724,20 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) if (r) goto err; vq->signalled_used_valid = false; - if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { + if (!vq->iotlb && + !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { r = -EFAULT; goto err; } - r = __get_user(last_used_idx, &vq->used->idx); - if (r) + r = vhost_get_user(vq, last_used_idx, &vq->used->idx); + if (r) { + vq_err(vq, "Can't access used idx at %p\n", + &vq->used->idx); goto err; + } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; + err: vq->is_le = is_le; return r; @@ -1224,36 +1745,48 @@ err: EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, - struct iovec iov[], int iov_size) + struct iovec iov[], int iov_size, int access) { - const struct vhost_memory_region *reg; - struct vhost_memory *mem; + const struct vhost_umem_node *node; + struct vhost_dev *dev = vq->dev; + struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0; int ret = 0; - mem = vq->memory; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } - reg = find_region(mem, addr, len); - if (unlikely(!reg)) { - ret = -EFAULT; + + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, addr + len - 1); + if (node == NULL || node->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; + break; + } + ret = -EAGAIN; + break; + } else if (!(node->perm & access)) { + ret = -EPERM; break; } + _iov = iov + ret; - size = reg->memory_size - addr + reg->guest_phys_addr; + size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) - (reg->userspace_addr + addr - reg->guest_phys_addr); + (node->userspace_addr + addr - node->start); s += size; addr += size; ++ret; } + if (ret == -EAGAIN) + vhost_iotlb_miss(vq, addr, access); return ret; } @@ -1288,7 +1821,7 @@ static int get_indirect(struct vhost_virtqueue *vq, unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; - int ret; + int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { @@ -1300,9 +1833,10 @@ static int get_indirect(struct vhost_virtqueue *vq, } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, - UIO_MAXIOV); + UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d in indirect.\n", ret); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, READ, vq->indirect, ret, len); @@ -1340,16 +1874,22 @@ static int get_indirect(struct vhost_virtqueue *vq, return -EINVAL; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; + ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d indirect idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); return ret; } /* If this is an input descriptor, increment that count. */ - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); @@ -1388,11 +1928,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, u16 last_avail_idx; __virtio16 avail_idx; __virtio16 ring_head; - int ret; + int ret, access; /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(__get_user(avail_idx, &vq->avail->idx))) { + if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1414,8 +1954,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(__get_user(ring_head, - &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { + if (unlikely(vhost_get_user(vq, ring_head, + &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -1450,7 +1990,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, i, vq->num, head); return -EINVAL; } - ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); + ret = vhost_copy_from_user(vq, &desc, vq->desc + i, + sizeof desc); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); @@ -1461,22 +2002,28 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { - vq_err(vq, "Failure detected " - "in indirect descriptor at idx %d\n", i); + if (ret != -EAGAIN) + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); return ret; } continue; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d descriptor idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); return ret; } - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; @@ -1538,15 +2085,15 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (count == 1) { - if (__put_user(heads[0].id, &used->id)) { + if (vhost_put_user(vq, heads[0].id, &used->id)) { vq_err(vq, "Failed to write used id"); return -EFAULT; } - if (__put_user(heads[0].len, &used->len)) { + if (vhost_put_user(vq, heads[0].len, &used->len)) { vq_err(vq, "Failed to write used len"); return -EFAULT; } - } else if (__copy_to_user(used, heads, count * sizeof *used)) { + } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } @@ -1590,7 +2137,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure buffer is written before we update index. */ smp_wmb(); - if (__put_user(cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx)) { + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), + &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } @@ -1622,7 +2170,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; - if (__get_user(flags, &vq->avail->flags)) { + if (vhost_get_user(vq, flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } @@ -1636,7 +2184,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (unlikely(!v)) return true; - if (__get_user(event, vhost_used_event(vq))) { + if (vhost_get_user(vq, event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } @@ -1678,7 +2226,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) __virtio16 avail_idx; int r; - r = __get_user(avail_idx, &vq->avail->idx); + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); if (r) return false; @@ -1713,7 +2261,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = __get_user(avail_idx, &vq->avail->idx); + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); @@ -1741,6 +2289,47 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_disable_notify); +/* Create a new message. */ +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) +{ + struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + if (!node) + return NULL; + node->vq = vq; + node->msg.type = type; + return node; +} +EXPORT_SYMBOL_GPL(vhost_new_msg); + +void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, + struct vhost_msg_node *node) +{ + spin_lock(&dev->iotlb_lock); + list_add_tail(&node->node, head); + spin_unlock(&dev->iotlb_lock); + + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); +} +EXPORT_SYMBOL_GPL(vhost_enqueue_msg); + +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head) +{ + struct vhost_msg_node *node = NULL; + + spin_lock(&dev->iotlb_lock); + if (!list_empty(head)) { + node = list_first_entry(head, struct vhost_msg_node, + node); + list_del(&node->node); + } + spin_unlock(&dev->iotlb_lock); + + return node; +} +EXPORT_SYMBOL_GPL(vhost_dequeue_msg); + + static int __init vhost_init(void) { return 0; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d36d8beb3351..78f3c5fc02e4 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -15,13 +15,15 @@ struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); +#define VHOST_WORK_QUEUED 1 struct vhost_work { - struct list_head node; + struct llist_node node; vhost_work_fn_t fn; wait_queue_head_t done; int flushing; unsigned queue_seq; unsigned done_seq; + unsigned long flags; }; /* Poll a file (eventfd or socket) */ @@ -53,6 +55,27 @@ struct vhost_log { u64 len; }; +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +struct vhost_umem_node { + struct rb_node rb; + struct list_head link; + __u64 start; + __u64 last; + __u64 size; + __u64 userspace_addr; + __u32 perm; + __u32 flags_padding; + __u64 __subtree_last; +}; + +struct vhost_umem { + struct rb_root umem_tree; + struct list_head umem_list; + int numem; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -98,10 +121,12 @@ struct vhost_virtqueue { u64 log_addr; struct iovec iov[UIO_MAXIOV]; + struct iovec iotlb_iov[64]; struct iovec *indirect; struct vring_used_elem *heads; /* Protected by virtqueue mutex. */ - struct vhost_memory *memory; + struct vhost_umem *umem; + struct vhost_umem *iotlb; void *private_data; u64 acked_features; /* Log write descriptors */ @@ -118,25 +143,35 @@ struct vhost_virtqueue { u32 busyloop_timeout; }; +struct vhost_msg_node { + struct vhost_msg msg; + struct vhost_virtqueue *vq; + struct list_head node; +}; + struct vhost_dev { - struct vhost_memory *memory; struct mm_struct *mm; struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; - spinlock_t work_lock; - struct list_head work_list; + struct llist_head work_list; struct task_struct *worker; + struct vhost_umem *umem; + struct vhost_umem *iotlb; + spinlock_t iotlb_lock; + struct list_head read_list; + struct list_head pending_list; + wait_queue_head_t wait; }; void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); -struct vhost_memory *vhost_dev_reset_owner_prepare(void); -void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); +struct vhost_umem *vhost_dev_reset_owner_prepare(void); +void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); @@ -165,6 +200,21 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); +int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); +void vhost_enqueue_msg(struct vhost_dev *dev, + struct list_head *head, + struct vhost_msg_node *node); +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head); +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait); +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock); +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled); #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c new file mode 100644 index 000000000000..0ddf3a2dbfc4 --- /dev/null +++ b/drivers/vhost/vsock.c @@ -0,0 +1,719 @@ +/* + * vhost transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He <asias@redhat.com> + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include <linux/miscdevice.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/vmalloc.h> +#include <net/sock.h> +#include <linux/virtio_vsock.h> +#include <linux/vhost.h> + +#include <net/af_vsock.h> +#include "vhost.h" + +#define VHOST_VSOCK_DEFAULT_HOST_CID 2 + +enum { + VHOST_VSOCK_FEATURES = VHOST_FEATURES, +}; + +/* Used to track all the vhost_vsock instances on the system. */ +static DEFINE_SPINLOCK(vhost_vsock_lock); +static LIST_HEAD(vhost_vsock_list); + +struct vhost_vsock { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + + /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ + struct list_head list; + + struct vhost_work send_pkt_work; + spinlock_t send_pkt_list_lock; + struct list_head send_pkt_list; /* host->guest pending packets */ + + atomic_t queued_replies; + + u32 guest_cid; +}; + +static u32 vhost_transport_get_local_cid(void) +{ + return VHOST_VSOCK_DEFAULT_HOST_CID; +} + +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +{ + struct vhost_vsock *vsock; + + spin_lock_bh(&vhost_vsock_lock); + list_for_each_entry(vsock, &vhost_vsock_list, list) { + u32 other_cid = vsock->guest_cid; + + /* Skip instances that have no CID yet */ + if (other_cid == 0) + continue; + + if (other_cid == guest_cid) { + spin_unlock_bh(&vhost_vsock_lock); + return vsock; + } + } + spin_unlock_bh(&vhost_vsock_lock); + + return NULL; +} + +static void +vhost_transport_do_send_pkt(struct vhost_vsock *vsock, + struct vhost_virtqueue *vq) +{ + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + bool added = false; + bool restart_tx = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + /* Avoid further vmexits, we're already processing the virtqueue */ + vhost_disable_notify(&vsock->dev, vq); + + for (;;) { + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + unsigned out, in; + size_t nbytes; + size_t len; + int head; + + spin_lock_bh(&vsock->send_pkt_list_lock); + if (list_empty(&vsock->send_pkt_list)) { + spin_unlock_bh(&vsock->send_pkt_list_lock); + vhost_enable_notify(&vsock->dev, vq); + break; + } + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + break; + } + + if (head == vq->num) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + /* We cannot finish yet if more buffers snuck in while + * re-enabling notify. + */ + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + if (out) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Expected 0 output buffers, got %u\n", out); + break; + } + + len = iov_length(&vq->iov[out], in); + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + + nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt hdr\n"); + break; + } + + nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt buf\n"); + break; + } + + vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + added = true; + + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx processing? */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); + + if (restart_tx) + vhost_poll_queue(&tx_vq->poll); +} + +static void vhost_transport_send_pkt_work(struct vhost_work *work) +{ + struct vhost_virtqueue *vq; + struct vhost_vsock *vsock; + + vsock = container_of(work, struct vhost_vsock, send_pkt_work); + vq = &vsock->vqs[VSOCK_VQ_RX]; + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int +vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) +{ + struct vhost_vsock *vsock; + struct vhost_virtqueue *vq; + int len = pkt->len; + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); + if (!vsock) { + virtio_transport_free_pkt(pkt); + return -ENODEV; + } + + vq = &vsock->vqs[VSOCK_VQ_RX]; + + if (pkt->reply) + atomic_inc(&vsock->queued_replies); + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add_tail(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + return len; +} + +static struct virtio_vsock_pkt * +vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, + unsigned int out, unsigned int in) +{ + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + size_t nbytes; + size_t len; + + if (in != 0) { + vq_err(vq, "Expected 0 input buffers, got %u\n", in); + return NULL; + } + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + len = iov_length(vq->iov, out); + iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); + + nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", + sizeof(pkt->hdr), nbytes); + kfree(pkt); + return NULL; + } + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) + pkt->len = le32_to_cpu(pkt->hdr.len); + + /* No payload */ + if (!pkt->len) + return pkt; + + /* The pkt is too big */ + if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + kfree(pkt); + return NULL; + } + + pkt->buf = kmalloc(pkt->len, GFP_KERNEL); + if (!pkt->buf) { + kfree(pkt); + return NULL; + } + + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + vq_err(vq, "Expected %u byte payload, got %zu bytes\n", + pkt->len, nbytes); + virtio_transport_free_pkt(pkt); + return NULL; + } + + return pkt; +} + +/* Is there space left for replies to rx packets? */ +static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) +{ + struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; + int val; + + smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ + val = atomic_read(&vsock->queued_replies); + + return val < vq->num; +} + +static void vhost_vsock_handle_tx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + struct virtio_vsock_pkt *pkt; + int head; + unsigned int out, in; + bool added = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + vhost_disable_notify(&vsock->dev, vq); + for (;;) { + if (!vhost_vsock_more_replies(vsock)) { + /* Stop tx until the device processes already + * pending replies. Leave tx virtqueue + * callbacks disabled. + */ + goto no_more_replies; + } + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) + break; + + if (head == vq->num) { + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + pkt = vhost_vsock_alloc_pkt(vq, out, in); + if (!pkt) { + vq_err(vq, "Faulted on pkt\n"); + continue; + } + + /* Only accept correctly addressed packets */ + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt); + + vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + added = true; + } + +no_more_replies: + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); +} + +static void vhost_vsock_handle_rx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int vhost_vsock_start(struct vhost_vsock *vsock) +{ + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + + if (!vhost_vq_access_ok(vq)) { + ret = -EFAULT; + mutex_unlock(&vq->mutex); + goto err_vq; + } + + if (!vq->private_data) { + vq->private_data = vsock; + vhost_vq_init_access(vq); + } + + mutex_unlock(&vq->mutex); + } + + mutex_unlock(&vsock->dev.mutex); + return 0; + +err_vq: + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static int vhost_vsock_stop(struct vhost_vsock *vsock) +{ + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } + +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static void vhost_vsock_free(struct vhost_vsock *vsock) +{ + kvfree(vsock); +} + +static int vhost_vsock_dev_open(struct inode *inode, struct file *file) +{ + struct vhost_virtqueue **vqs; + struct vhost_vsock *vsock; + int ret; + + /* This struct is large and allocation could fail, fall back to vmalloc + * if there is no other way. + */ + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!vsock) { + vsock = vmalloc(sizeof(*vsock)); + if (!vsock) + return -ENOMEM; + } + + vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + ret = -ENOMEM; + goto out; + } + + atomic_set(&vsock->queued_replies, 0); + + vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; + vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; + vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; + vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; + + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + + file->private_data = vsock; + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); + + spin_lock_bh(&vhost_vsock_lock); + list_add_tail(&vsock->list, &vhost_vsock_list); + spin_unlock_bh(&vhost_vsock_lock); + return 0; + +out: + vhost_vsock_free(vsock); + return ret; +} + +static void vhost_vsock_flush(struct vhost_vsock *vsock) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) + if (vsock->vqs[i].handle_kick) + vhost_poll_flush(&vsock->vqs[i].poll); + vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); +} + +static void vhost_vsock_reset_orphans(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + + if (!vhost_vsock_get(vsk->local_addr.svm_cid)) { + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + } +} + +static int vhost_vsock_dev_release(struct inode *inode, struct file *file) +{ + struct vhost_vsock *vsock = file->private_data; + + spin_lock_bh(&vhost_vsock_lock); + list_del(&vsock->list); + spin_unlock_bh(&vhost_vsock_lock); + + /* Iterating over all connections for all CIDs to find orphans is + * inefficient. Room for improvement here. */ + vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + + vhost_vsock_stop(vsock); + vhost_vsock_flush(vsock); + vhost_dev_stop(&vsock->dev); + + spin_lock_bh(&vsock->send_pkt_list_lock); + while (!list_empty(&vsock->send_pkt_list)) { + struct virtio_vsock_pkt *pkt; + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + virtio_transport_free_pkt(pkt); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_dev_cleanup(&vsock->dev, false); + kfree(vsock->dev.vqs); + vhost_vsock_free(vsock); + return 0; +} + +static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) +{ + struct vhost_vsock *other; + + /* Refuse reserved CIDs */ + if (guest_cid <= VMADDR_CID_HOST || + guest_cid == U32_MAX) + return -EINVAL; + + /* 64-bit CIDs are not yet supported */ + if (guest_cid > U32_MAX) + return -EINVAL; + + /* Refuse if CID is already in use */ + other = vhost_vsock_get(guest_cid); + if (other && other != vsock) + return -EADDRINUSE; + + spin_lock_bh(&vhost_vsock_lock); + vsock->guest_cid = guest_cid; + spin_unlock_bh(&vhost_vsock_lock); + + return 0; +} + +static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) +{ + struct vhost_virtqueue *vq; + int i; + + if (features & ~VHOST_VSOCK_FEATURES) + return -EOPNOTSUPP; + + mutex_lock(&vsock->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&vsock->dev)) { + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + mutex_lock(&vq->mutex); + vq->acked_features = features; + mutex_unlock(&vq->mutex); + } + mutex_unlock(&vsock->dev.mutex); + return 0; +} + +static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_vsock *vsock = f->private_data; + void __user *argp = (void __user *)arg; + u64 guest_cid; + u64 features; + int start; + int r; + + switch (ioctl) { + case VHOST_VSOCK_SET_GUEST_CID: + if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) + return -EFAULT; + return vhost_vsock_set_cid(vsock, guest_cid); + case VHOST_VSOCK_SET_RUNNING: + if (copy_from_user(&start, argp, sizeof(start))) + return -EFAULT; + if (start) + return vhost_vsock_start(vsock); + else + return vhost_vsock_stop(vsock); + case VHOST_GET_FEATURES: + features = VHOST_VSOCK_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + return vhost_vsock_set_features(vsock, features); + default: + mutex_lock(&vsock->dev.mutex); + r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); + if (r == -ENOIOCTLCMD) + r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); + else + vhost_vsock_flush(vsock); + mutex_unlock(&vsock->dev.mutex); + return r; + } +} + +static const struct file_operations vhost_vsock_fops = { + .owner = THIS_MODULE, + .open = vhost_vsock_dev_open, + .release = vhost_vsock_dev_release, + .llseek = noop_llseek, + .unlocked_ioctl = vhost_vsock_dev_ioctl, +}; + +static struct miscdevice vhost_vsock_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vhost-vsock", + .fops = &vhost_vsock_fops, +}; + +static struct virtio_transport vhost_transport = { + .transport = { + .get_local_cid = vhost_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_bind = virtio_transport_dgram_bind, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, + }, + + .send_pkt = vhost_transport_send_pkt, +}; + +static int __init vhost_vsock_init(void) +{ + int ret; + + ret = vsock_core_init(&vhost_transport.transport); + if (ret < 0) + return ret; + return misc_register(&vhost_vsock_misc); +}; + +static void __exit vhost_vsock_exit(void) +{ + misc_deregister(&vhost_vsock_misc); + vsock_core_exit(); +}; + +module_init(vhost_vsock_init); +module_exit(vhost_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/video/fbdev/bfin_adv7393fb.c b/drivers/video/fbdev/bfin_adv7393fb.c index 8fe41caac38e..e2d7d039ce3b 100644 --- a/drivers/video/fbdev/bfin_adv7393fb.c +++ b/drivers/video/fbdev/bfin_adv7393fb.c @@ -10,6 +10,8 @@ * TODO: Code Cleanup */ +#define DRIVER_NAME "bfin-adv7393" + #define pr_fmt(fmt) DRIVER_NAME ": " fmt #include <linux/module.h> diff --git a/drivers/video/fbdev/bfin_adv7393fb.h b/drivers/video/fbdev/bfin_adv7393fb.h index cd591b5152a5..afd0380e19e1 100644 --- a/drivers/video/fbdev/bfin_adv7393fb.h +++ b/drivers/video/fbdev/bfin_adv7393fb.h @@ -59,8 +59,6 @@ enum { BLANK_OFF, }; -#define DRIVER_NAME "bfin-adv7393" - struct adv7393fb_modes { const s8 name[25]; /* Full name */ u16 xres; /* Active Horizonzal Pixels */ diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c index 2fb90cb6803f..1d7c012f09db 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c @@ -1332,7 +1332,7 @@ static void omapfb_free_fbmem(struct fb_info *fbi) } dma_free_attrs(fbdev->dev, rg->size, rg->token, rg->dma_handle, - &rg->attrs); + rg->attrs); rg->token = NULL; rg->vaddr = NULL; @@ -1370,7 +1370,7 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size, struct omapfb2_device *fbdev = ofbi->fbdev; struct omapfb2_mem_region *rg; void *token; - DEFINE_DMA_ATTRS(attrs); + unsigned long attrs; dma_addr_t dma_handle; int r; @@ -1386,15 +1386,15 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size, size = PAGE_ALIGN(size); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + attrs = DMA_ATTR_WRITE_COMBINE; if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + attrs |= DMA_ATTR_NO_KERNEL_MAPPING; DBG("allocating %lu bytes for fb %d\n", size, ofbi->id); token = dma_alloc_attrs(fbdev->dev, size, &dma_handle, - GFP_KERNEL, &attrs); + GFP_KERNEL, attrs); if (token == NULL) { dev_err(fbdev->dev, "failed to allocate framebuffer\n"); @@ -1408,7 +1408,7 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size, r = omap_vrfb_request_ctx(&rg->vrfb); if (r) { dma_free_attrs(fbdev->dev, size, token, dma_handle, - &attrs); + attrs); dev_err(fbdev->dev, "vrfb create ctx failed\n"); return r; } diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb.h b/drivers/video/fbdev/omap2/omapfb/omapfb.h index bcb9ff4a607d..555487d6dbea 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb.h +++ b/drivers/video/fbdev/omap2/omapfb/omapfb.h @@ -28,7 +28,6 @@ #endif #include <linux/rwsem.h> -#include <linux/dma-attrs.h> #include <linux/dma-mapping.h> #include <video/omapfb_dss.h> @@ -51,7 +50,7 @@ extern bool omapfb_debug; struct omapfb2_mem_region { int id; - struct dma_attrs attrs; + unsigned long attrs; void *token; dma_addr_t dma_handle; u32 paddr; diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index 10fbfd8ab963..b6bc4a0bda2a 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -36,11 +36,11 @@ static int __init fb_logo_late_init(void) late_initcall(fb_logo_late_init); -/* logo's are marked __initdata. Use __init_refok to tell +/* logo's are marked __initdata. Use __ref to tell * modpost that it is intended that this function uses data * marked __initdata. */ -const struct linux_logo * __init_refok fb_find_logo(int depth) +const struct linux_logo * __ref fb_find_logo(int depth) { const struct linux_logo *logo = NULL; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 888d5f8322ce..4e7003db12c4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -207,6 +207,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) num = min(num, ARRAY_SIZE(vb->pfns)); mutex_lock(&vb->balloon_lock); + /* We can't release more pages than taken */ + num = min(num, (size_t)vb->num_pages); for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { page = balloon_page_dequeue(vb_dev_info); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ca6bfddaacad..114a0c88afb8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -117,7 +117,10 @@ struct vring_virtqueue { #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) /* - * The interaction between virtio and a possible IOMMU is a mess. + * Modern virtio devices have feature bits to specify whether they need a + * quirk and bypass the IOMMU. If not there, just use the DMA API. + * + * If there, the interaction between virtio and DMA API is messy. * * On most systems with virtio, physical addresses match bus addresses, * and it doesn't particularly matter whether we use the DMA API. @@ -133,10 +136,18 @@ struct vring_virtqueue { * * For the time being, we preserve historic behavior and bypass the DMA * API. + * + * TODO: install a per-device DMA ops structure that does the right thing + * taking into account all the above quirks, and use the DMA API + * unconditionally on data path. */ static bool vring_use_dma_api(struct virtio_device *vdev) { + if (!virtio_has_iommu_quirk(vdev)) + return true; + + /* Otherwise, we are left to guess. */ /* * In theory, it's possible to have a buggy QEMU-supposed * emulated Q35 IOMMU and Xen enabled at the same time. On @@ -1099,6 +1110,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_VERSION_1: break; + case VIRTIO_F_IOMMU_PLATFORM: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c index a2eec97d5064..bb09de633939 100644 --- a/drivers/w1/masters/omap_hdq.c +++ b/drivers/w1/masters/omap_hdq.c @@ -390,8 +390,6 @@ static int hdq_read_byte(struct hdq_data *hdq_data, u8 *val) goto out; } - hdq_data->hdq_irqstatus = 0; - if (!(hdq_data->hdq_irqstatus & OMAP_HDQ_INT_STATUS_RXCOMPLETE)) { hdq_reg_merge(hdq_data, OMAP_HDQ_CTRL_STATUS, OMAP_HDQ_CTRL_STATUS_DIR | OMAP_HDQ_CTRL_STATUS_GO, diff --git a/drivers/w1/slaves/w1_ds2406.c b/drivers/w1/slaves/w1_ds2406.c index d488961a8c90..51f2f66d6555 100644 --- a/drivers/w1/slaves/w1_ds2406.c +++ b/drivers/w1/slaves/w1_ds2406.c @@ -153,16 +153,4 @@ static struct w1_family w1_family_12 = { .fid = W1_FAMILY_DS2406, .fops = &w1_f12_fops, }; - -static int __init w1_f12_init(void) -{ - return w1_register_family(&w1_family_12); -} - -static void __exit w1_f12_exit(void) -{ - w1_unregister_family(&w1_family_12); -} - -module_init(w1_f12_init); -module_exit(w1_f12_exit); +module_w1_family(w1_family_12); diff --git a/drivers/w1/slaves/w1_ds2408.c b/drivers/w1/slaves/w1_ds2408.c index 7dfa0e11688a..aec5958e66e9 100644 --- a/drivers/w1/slaves/w1_ds2408.c +++ b/drivers/w1/slaves/w1_ds2408.c @@ -351,16 +351,4 @@ static struct w1_family w1_family_29 = { .fid = W1_FAMILY_DS2408, .fops = &w1_f29_fops, }; - -static int __init w1_f29_init(void) -{ - return w1_register_family(&w1_family_29); -} - -static void __exit w1_f29_exit(void) -{ - w1_unregister_family(&w1_family_29); -} - -module_init(w1_f29_init); -module_exit(w1_f29_exit); +module_w1_family(w1_family_29); diff --git a/drivers/w1/slaves/w1_ds2413.c b/drivers/w1/slaves/w1_ds2413.c index ee28fc1ff390..f2e1c51533b9 100644 --- a/drivers/w1/slaves/w1_ds2413.c +++ b/drivers/w1/slaves/w1_ds2413.c @@ -135,16 +135,4 @@ static struct w1_family w1_family_3a = { .fid = W1_FAMILY_DS2413, .fops = &w1_f3a_fops, }; - -static int __init w1_f3a_init(void) -{ - return w1_register_family(&w1_family_3a); -} - -static void __exit w1_f3a_exit(void) -{ - w1_unregister_family(&w1_family_3a); -} - -module_init(w1_f3a_init); -module_exit(w1_f3a_exit); +module_w1_family(w1_family_3a); diff --git a/drivers/w1/slaves/w1_ds2423.c b/drivers/w1/slaves/w1_ds2423.c index 7e41b7d91fb5..4ab54fd9dde2 100644 --- a/drivers/w1/slaves/w1_ds2423.c +++ b/drivers/w1/slaves/w1_ds2423.c @@ -138,19 +138,7 @@ static struct w1_family w1_family_1d = { .fid = W1_COUNTER_DS2423, .fops = &w1_f1d_fops, }; - -static int __init w1_f1d_init(void) -{ - return w1_register_family(&w1_family_1d); -} - -static void __exit w1_f1d_exit(void) -{ - w1_unregister_family(&w1_family_1d); -} - -module_init(w1_f1d_init); -module_exit(w1_f1d_exit); +module_w1_family(w1_family_1d); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mika Laitio <lamikr@pilppa.org>"); diff --git a/drivers/w1/slaves/w1_ds2431.c b/drivers/w1/slaves/w1_ds2431.c index 9c4ff9d28adc..80572cb63ba8 100644 --- a/drivers/w1/slaves/w1_ds2431.c +++ b/drivers/w1/slaves/w1_ds2431.c @@ -288,19 +288,7 @@ static struct w1_family w1_family_2d = { .fid = W1_EEPROM_DS2431, .fops = &w1_f2d_fops, }; - -static int __init w1_f2d_init(void) -{ - return w1_register_family(&w1_family_2d); -} - -static void __exit w1_f2d_fini(void) -{ - w1_unregister_family(&w1_family_2d); -} - -module_init(w1_f2d_init); -module_exit(w1_f2d_fini); +module_w1_family(w1_family_2d); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bernhard Weirich <bernhard.weirich@riedel.net>"); diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index 72319a968a9e..6cf378c89ecb 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -305,16 +305,4 @@ static struct w1_family w1_family_23 = { .fid = W1_EEPROM_DS2433, .fops = &w1_f23_fops, }; - -static int __init w1_f23_init(void) -{ - return w1_register_family(&w1_family_23); -} - -static void __exit w1_f23_fini(void) -{ - w1_unregister_family(&w1_family_23); -} - -module_init(w1_f23_init); -module_exit(w1_f23_fini); +module_w1_family(w1_family_23); diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c index d9079d48d112..ffa37f773b3b 100644 --- a/drivers/w1/slaves/w1_ds2760.c +++ b/drivers/w1/slaves/w1_ds2760.c @@ -121,25 +121,14 @@ static const struct attribute_group *w1_ds2760_groups[] = { NULL, }; -static DEFINE_IDA(bat_ida); - static int w1_ds2760_add_slave(struct w1_slave *sl) { int ret; - int id; struct platform_device *pdev; - id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL); - if (id < 0) { - ret = id; - goto noid; - } - - pdev = platform_device_alloc("ds2760-battery", id); - if (!pdev) { - ret = -ENOMEM; - goto pdev_alloc_failed; - } + pdev = platform_device_alloc("ds2760-battery", PLATFORM_DEVID_AUTO); + if (!pdev) + return -ENOMEM; pdev->dev.parent = &sl->dev; ret = platform_device_add(pdev); @@ -148,24 +137,19 @@ static int w1_ds2760_add_slave(struct w1_slave *sl) dev_set_drvdata(&sl->dev, pdev); - goto success; + return 0; pdev_add_failed: platform_device_put(pdev); -pdev_alloc_failed: - ida_simple_remove(&bat_ida, id); -noid: -success: + return ret; } static void w1_ds2760_remove_slave(struct w1_slave *sl) { struct platform_device *pdev = dev_get_drvdata(&sl->dev); - int id = pdev->id; platform_device_unregister(pdev); - ida_simple_remove(&bat_ida, id); } static struct w1_family_ops w1_ds2760_fops = { @@ -178,28 +162,13 @@ static struct w1_family w1_ds2760_family = { .fid = W1_FAMILY_DS2760, .fops = &w1_ds2760_fops, }; - -static int __init w1_ds2760_init(void) -{ - pr_info("1-Wire driver for the DS2760 battery monitor chip - (c) 2004-2005, Szabolcs Gyurko\n"); - ida_init(&bat_ida); - return w1_register_family(&w1_ds2760_family); -} - -static void __exit w1_ds2760_exit(void) -{ - w1_unregister_family(&w1_ds2760_family); - ida_destroy(&bat_ida); -} +module_w1_family(w1_ds2760_family); EXPORT_SYMBOL(w1_ds2760_read); EXPORT_SYMBOL(w1_ds2760_write); EXPORT_SYMBOL(w1_ds2760_store_eeprom); EXPORT_SYMBOL(w1_ds2760_recall_eeprom); -module_init(w1_ds2760_init); -module_exit(w1_ds2760_exit); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>"); MODULE_DESCRIPTION("1-wire Driver Dallas 2760 battery monitor chip"); diff --git a/drivers/w1/slaves/w1_ds2780.c b/drivers/w1/slaves/w1_ds2780.c index 50e85f7929d4..f5c2aa429a92 100644 --- a/drivers/w1/slaves/w1_ds2780.c +++ b/drivers/w1/slaves/w1_ds2780.c @@ -113,25 +113,14 @@ static const struct attribute_group *w1_ds2780_groups[] = { NULL, }; -static DEFINE_IDA(bat_ida); - static int w1_ds2780_add_slave(struct w1_slave *sl) { int ret; - int id; struct platform_device *pdev; - id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL); - if (id < 0) { - ret = id; - goto noid; - } - - pdev = platform_device_alloc("ds2780-battery", id); - if (!pdev) { - ret = -ENOMEM; - goto pdev_alloc_failed; - } + pdev = platform_device_alloc("ds2780-battery", PLATFORM_DEVID_AUTO); + if (!pdev) + return -ENOMEM; pdev->dev.parent = &sl->dev; ret = platform_device_add(pdev); @@ -144,19 +133,15 @@ static int w1_ds2780_add_slave(struct w1_slave *sl) pdev_add_failed: platform_device_put(pdev); -pdev_alloc_failed: - ida_simple_remove(&bat_ida, id); -noid: + return ret; } static void w1_ds2780_remove_slave(struct w1_slave *sl) { struct platform_device *pdev = dev_get_drvdata(&sl->dev); - int id = pdev->id; platform_device_unregister(pdev); - ida_simple_remove(&bat_ida, id); } static struct w1_family_ops w1_ds2780_fops = { @@ -169,21 +154,7 @@ static struct w1_family w1_ds2780_family = { .fid = W1_FAMILY_DS2780, .fops = &w1_ds2780_fops, }; - -static int __init w1_ds2780_init(void) -{ - ida_init(&bat_ida); - return w1_register_family(&w1_ds2780_family); -} - -static void __exit w1_ds2780_exit(void) -{ - w1_unregister_family(&w1_ds2780_family); - ida_destroy(&bat_ida); -} - -module_init(w1_ds2780_init); -module_exit(w1_ds2780_exit); +module_w1_family(w1_ds2780_family); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Clifton Barnes <cabarnes@indesign-llc.com>"); diff --git a/drivers/w1/slaves/w1_ds2781.c b/drivers/w1/slaves/w1_ds2781.c index 1eb98fb1688d..9c03e014cf9e 100644 --- a/drivers/w1/slaves/w1_ds2781.c +++ b/drivers/w1/slaves/w1_ds2781.c @@ -17,7 +17,6 @@ #include <linux/types.h> #include <linux/platform_device.h> #include <linux/mutex.h> -#include <linux/idr.h> #include "../w1.h" #include "../w1_int.h" @@ -111,25 +110,14 @@ static const struct attribute_group *w1_ds2781_groups[] = { NULL, }; -static DEFINE_IDA(bat_ida); - static int w1_ds2781_add_slave(struct w1_slave *sl) { int ret; - int id; struct platform_device *pdev; - id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL); - if (id < 0) { - ret = id; - goto noid; - } - - pdev = platform_device_alloc("ds2781-battery", id); - if (!pdev) { - ret = -ENOMEM; - goto pdev_alloc_failed; - } + pdev = platform_device_alloc("ds2781-battery", PLATFORM_DEVID_AUTO); + if (!pdev) + return -ENOMEM; pdev->dev.parent = &sl->dev; ret = platform_device_add(pdev); @@ -142,19 +130,15 @@ static int w1_ds2781_add_slave(struct w1_slave *sl) pdev_add_failed: platform_device_put(pdev); -pdev_alloc_failed: - ida_simple_remove(&bat_ida, id); -noid: + return ret; } static void w1_ds2781_remove_slave(struct w1_slave *sl) { struct platform_device *pdev = dev_get_drvdata(&sl->dev); - int id = pdev->id; platform_device_unregister(pdev); - ida_simple_remove(&bat_ida, id); } static struct w1_family_ops w1_ds2781_fops = { @@ -167,21 +151,7 @@ static struct w1_family w1_ds2781_family = { .fid = W1_FAMILY_DS2781, .fops = &w1_ds2781_fops, }; - -static int __init w1_ds2781_init(void) -{ - ida_init(&bat_ida); - return w1_register_family(&w1_ds2781_family); -} - -static void __exit w1_ds2781_exit(void) -{ - w1_unregister_family(&w1_ds2781_family); - ida_destroy(&bat_ida); -} - -module_init(w1_ds2781_init); -module_exit(w1_ds2781_exit); +module_w1_family(w1_ds2781_family); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Renata Sayakhova <renata@oktetlabs.ru>"); diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c index 365d6dff21de..5e348d38ec5c 100644 --- a/drivers/w1/slaves/w1_ds28e04.c +++ b/drivers/w1/slaves/w1_ds28e04.c @@ -427,16 +427,4 @@ static struct w1_family w1_family_1C = { .fid = W1_FAMILY_DS28E04, .fops = &w1_f1C_fops, }; - -static int __init w1_f1C_init(void) -{ - return w1_register_family(&w1_family_1C); -} - -static void __exit w1_f1C_fini(void) -{ - w1_unregister_family(&w1_family_1C); -} - -module_init(w1_f1C_init); -module_exit(w1_f1C_fini); +module_w1_family(w1_family_1C); diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h index ed5dcb80a1f7..10a7a0767187 100644 --- a/drivers/w1/w1_family.h +++ b/drivers/w1/w1_family.h @@ -88,4 +88,16 @@ struct w1_family * w1_family_registered(u8); void w1_unregister_family(struct w1_family *); int w1_register_family(struct w1_family *); +/** + * module_w1_driver() - Helper macro for registering a 1-Wire families + * @__w1_family: w1_family struct + * + * Helper macro for 1-Wire families which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_w1_family(__w1_family) \ + module_driver(__w1_family, w1_register_family, \ + w1_unregister_family) + #endif /* __W1_FAMILY_H */ diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 7399782c0998..87e6035c9e81 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -294,7 +294,7 @@ error: void * xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; int order = get_order(size); @@ -346,7 +346,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); void xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dev_addr, struct dma_attrs *attrs) + dma_addr_t dev_addr, unsigned long attrs) { int order = get_order(size); phys_addr_t phys; @@ -378,7 +378,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t map, phys = page_to_phys(page) + offset; dma_addr_t dev_addr = xen_phys_to_bus(phys); @@ -434,7 +434,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); */ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t paddr = xen_bus_to_phys(dev_addr); @@ -462,7 +462,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { xen_unmap_single(hwdev, dev_addr, size, dir, attrs); } @@ -538,7 +538,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_for_device); int xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -599,7 +599,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); void xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; |