diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-12 10:35:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-12 10:35:23 -0800 |
commit | c9193f48e94deaeff0c9abbc67b9584e8ddc42ed (patch) | |
tree | fb50432abb9783a3a78a079e2b142bf3206ea1ca | |
parent | d3c810803576d867265277df8e94eee386351c9d (diff) | |
parent | d85bd8233fff000567cda4e108112bcb33478616 (diff) | |
download | lwn-c9193f48e94deaeff0c9abbc67b9584e8ddc42ed.tar.gz lwn-c9193f48e94deaeff0c9abbc67b9584e8ddc42ed.zip |
Merge tag 'for-5.17/drivers-2022-01-11' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
- mtip32xx pci cleanups (Bjorn)
- mtip32xx conversion to generic power management (Vaibhav)
- rsxx pci powermanagement cleanups (Bjorn)
- Remove the rsxx driver. This hardware never saw much adoption, and
it's been end of lifed for a while. (Christoph)
- MD pull request from Song:
- REQ_NOWAIT support (Vishal Verma)
- raid6 benchmark optimization (Dirk Müller)
- Fix for acct bioset (Xiao Ni)
- Clean up max_queued_requests (Mariusz Tkaczyk)
- PREEMPT_RT optimization (Davidlohr Bueso)
- Use default_groups in kobj_type (Greg Kroah-Hartman)
- Use attribute groups in pktcdvd and rnbd (Greg)
- NVMe pull request from Christoph:
- increment request genctr on completion (Keith Busch, Geliang
Tang)
- add a 'iopolicy' module parameter (Hannes Reinecke)
- print out valid arguments when reading from /dev/nvme-fabrics
(Hannes Reinecke)
- Use struct_group() in drbd (Kees)
- null_blk fixes (Ming)
- Get rid of congestion logic in pktcdvd (Neil)
- Floppy ejection hang fix (Tasos)
- Floppy max user request size fix (Xiongwei)
- Loop locking fix (Tetsuo)
* tag 'for-5.17/drivers-2022-01-11' of git://git.kernel.dk/linux-block: (32 commits)
md: use default_groups in kobj_type
md: Move alloc/free acct bioset in to personality
lib/raid6: Use strict priority ranking for pq gen() benchmarking
lib/raid6: skip benchmark of non-chosen xor_syndrome functions
md: fix spelling of "its"
md: raid456 add nowait support
md: raid10 add nowait support
md: raid1 add nowait support
md: add support for REQ_NOWAIT
md: drop queue limitation for RAID1 and RAID10
md/raid5: play nice with PREEMPT_RT
block/rnbd-clt-sysfs: use default_groups in kobj_type
pktcdvd: convert to use attribute groups
block: null_blk: only set set->nr_maps as 3 if active poll_queues is > 0
nvme: add 'iopolicy' module parameter
nvme: drop unused variable ctrl in nvme_setup_cmd
nvme: increment request genctr on completion
nvme-fabrics: print out valid arguments when reading from /dev/nvme-fabrics
block: remove the rsxx driver
rsxx: Drop PCI legacy power management
...
40 files changed, 612 insertions, 4501 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a31b5e4c4ab7..009885cf0d21 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7489,12 +7489,6 @@ F: Documentation/firmware_class/ F: drivers/base/firmware_loader/ F: include/linux/firmware.h -FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card) -M: Joshua Morris <josh.h.morris@us.ibm.com> -M: Philip Kelleher <pjk1939@linux.ibm.com> -S: Maintained -F: drivers/block/rsxx/ - FLEXTIMER FTM-QUADDEC DRIVER M: Patrick Havelange <patrick.havelange@essensium.com> L: linux-iio@vger.kernel.org diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 2a51dfb09c8f..519b6d38d4df 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -392,17 +392,6 @@ config BLK_DEV_RBD If unsure, say N. -config BLK_DEV_RSXX - tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" - depends on PCI - select CRC32 - help - Device driver for IBM's high speed PCIe SSD - storage device: Flash Adapter 900GB Full Height. - - To compile this driver as a module, choose M here: the - module will be called rsxx. - source "drivers/block/rnbd/Kconfig" endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 11a74f17c9ad..934a9c7c3a7c 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -34,7 +34,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ -obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 07b3c6093e7d..6f450816c4fa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -729,7 +729,8 @@ int drbd_send_sync_param(struct drbd_peer_device *peer_device) cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; /* initialize verify_alg and csums_alg */ - memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX); + memset(&p->algs, 0, sizeof(p->algs)); if (get_ldev(peer_device->device)) { dc = rcu_dereference(peer_device->device->ldev->disk_conf); diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index dea59c92ecc1..a882b65ab5d2 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h @@ -283,8 +283,10 @@ struct p_rs_param_89 { struct p_rs_param_95 { u32 resync_rate; - char verify_alg[SHARED_SECRET_MAX]; - char csums_alg[SHARED_SECRET_MAX]; + struct_group(algs, + char verify_alg[SHARED_SECRET_MAX]; + char csums_alg[SHARED_SECRET_MAX]; + ); u32 c_plan_ahead; u32 c_delay_target; u32 c_fill_target; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1f740e42e457..6df2539e215b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3921,7 +3921,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i /* initialize verify_alg and csums_alg */ p = pi->data; - memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX); + memset(&p->algs, 0, sizeof(p->algs)); err = drbd_recv_all(peer_device->connection, p, header_size); if (err) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 0c638de25023..e611411a934c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1015,7 +1015,7 @@ static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn); static void cancel_activity(void) { do_floppy = NULL; - cancel_delayed_work_sync(&fd_timer); + cancel_delayed_work(&fd_timer); cancel_work_sync(&floppy_work); } @@ -3081,6 +3081,8 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } +#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT) + static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) { @@ -3108,7 +3110,7 @@ loop: ptr->resultcode = 0; if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { - if (ptr->length <= 0) + if (ptr->length <= 0 || ptr->length >= MAX_LEN) return -EINVAL; ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length); fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e98ddf08d77d..b1b05c45c07c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1082,13 +1082,10 @@ out_putf: return error; } -static int __loop_clr_fd(struct loop_device *lo, bool release) +static void __loop_clr_fd(struct loop_device *lo) { - struct file *filp = NULL; + struct file *filp; gfp_t gfp = lo->old_gfp_mask; - int err = 0; - bool partscan = false; - int lo_number; struct loop_worker *pos, *worker; /* @@ -1103,17 +1100,14 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) * became visible. */ + /* + * Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close() + * after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if + * lo->lo_state has changed while waiting for lo->lo_mutex. + */ mutex_lock(&lo->lo_mutex); - if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { - err = -ENXIO; - goto out_unlock; - } - - filp = lo->lo_backing_file; - if (filp == NULL) { - err = -EINVAL; - goto out_unlock; - } + BUG_ON(lo->lo_state != Lo_rundown); + mutex_unlock(&lo->lo_mutex); if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags)) blk_queue_write_cache(lo->lo_queue, false, false); @@ -1134,6 +1128,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) del_timer_sync(&lo->timer); spin_lock_irq(&lo->lo_lock); + filp = lo->lo_backing_file; lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); @@ -1149,60 +1144,59 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) /* let user-space know about this change */ kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); mapping_set_gfp_mask(filp->f_mapping, gfp); - /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); - partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - lo_number = lo->lo_number; disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); -out_unlock: - mutex_unlock(&lo->lo_mutex); - if (partscan) { - /* - * open_mutex has been held already in release path, so don't - * acquire it if this function is called in such case. - * - * If the reread partition isn't from release path, lo_refcnt - * must be at least one and it can only become zero when the - * current holder is released. - */ - if (!release) - mutex_lock(&lo->lo_disk->open_mutex); + + if (lo->lo_flags & LO_FLAGS_PARTSCAN) { + int err; + + mutex_lock(&lo->lo_disk->open_mutex); err = bdev_disk_changed(lo->lo_disk, false); - if (!release) - mutex_unlock(&lo->lo_disk->open_mutex); + mutex_unlock(&lo->lo_disk->open_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", - __func__, lo_number, err); + __func__, lo->lo_number, err); /* Device is gone, no point in returning error */ - err = 0; } - /* - * lo->lo_state is set to Lo_unbound here after above partscan has - * finished. - * - * There cannot be anybody else entering __loop_clr_fd() as - * lo->lo_backing_file is already cleared and Lo_rundown state - * protects us from all the other places trying to change the 'lo' - * device. - */ - mutex_lock(&lo->lo_mutex); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART; + + fput(filp); +} + +static void loop_rundown_completed(struct loop_device *lo) +{ + mutex_lock(&lo->lo_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&lo->lo_mutex); + module_put(THIS_MODULE); +} - /* - * Need not hold lo_mutex to fput backing file. Calling fput holding - * lo_mutex triggers a circular lock dependency possibility warning as - * fput can take open_mutex which is usually taken before lo_mutex. - */ - if (filp) - fput(filp); - return err; +static void loop_rundown_workfn(struct work_struct *work) +{ + struct loop_device *lo = container_of(work, struct loop_device, + rundown_work); + struct block_device *bdev = lo->lo_device; + struct gendisk *disk = lo->lo_disk; + + __loop_clr_fd(lo); + kobject_put(&bdev->bd_device.kobj); + module_put(disk->fops->owner); + loop_rundown_completed(lo); +} + +static void loop_schedule_rundown(struct loop_device *lo) +{ + struct block_device *bdev = lo->lo_device; + struct gendisk *disk = lo->lo_disk; + + __module_get(disk->fops->owner); + kobject_get(&bdev->bd_device.kobj); + INIT_WORK(&lo->rundown_work, loop_rundown_workfn); + queue_work(system_long_wq, &lo->rundown_work); } static int loop_clr_fd(struct loop_device *lo) @@ -1234,7 +1228,9 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_rundown; mutex_unlock(&lo->lo_mutex); - return __loop_clr_fd(lo, false); + __loop_clr_fd(lo); + loop_rundown_completed(lo); + return 0; } static int @@ -1758,7 +1754,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - __loop_clr_fd(lo, true); + loop_schedule_rundown(lo); return; } else if (lo->lo_state == Lo_bound) { /* diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 082d4b6bfc6a..918a7a2dc025 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -56,6 +56,7 @@ struct loop_device { struct gendisk *lo_disk; struct mutex lo_mutex; bool idr_visible; + struct work_struct rundown_work; }; struct loop_cmd { diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 30f471021a40..e6005c232328 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -136,16 +136,15 @@ struct mtip_compat_ide_task_request_s { * return value * true if device removed, else false */ -static bool mtip_check_surprise_removal(struct pci_dev *pdev) +static bool mtip_check_surprise_removal(struct driver_data *dd) { u16 vendor_id = 0; - struct driver_data *dd = pci_get_drvdata(pdev); if (dd->sr) return true; /* Read the vendorID from the configuration space */ - pci_read_config_word(pdev, 0x00, &vendor_id); + pci_read_config_word(dd->pdev, 0x00, &vendor_id); if (vendor_id == 0xFFFF) { dd->sr = true; if (dd->queue) @@ -447,7 +446,7 @@ static int mtip_device_reset(struct driver_data *dd) { int rv = 0; - if (mtip_check_surprise_removal(dd->pdev)) + if (mtip_check_surprise_removal(dd)) return 0; if (mtip_hba_reset(dd) < 0) @@ -727,7 +726,7 @@ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) dev_warn(&dd->pdev->dev, "Port stat errors %x unhandled\n", (port_stat & ~PORT_IRQ_HANDLED)); - if (mtip_check_surprise_removal(dd->pdev)) + if (mtip_check_surprise_removal(dd)) return; } if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) { @@ -752,7 +751,7 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) /* Acknowledge the interrupt status on the port.*/ port_stat = readl(port->mmio + PORT_IRQ_STAT); if (unlikely(port_stat == 0xFFFFFFFF)) { - mtip_check_surprise_removal(dd->pdev); + mtip_check_surprise_removal(dd); return IRQ_HANDLED; } writel(port_stat, port->mmio + PORT_IRQ_STAT); @@ -796,7 +795,7 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) } if (unlikely(port_stat & PORT_IRQ_ERR)) { - if (unlikely(mtip_check_surprise_removal(dd->pdev))) { + if (unlikely(mtip_check_surprise_removal(dd))) { /* don't proceed further */ return IRQ_HANDLED; } @@ -915,7 +914,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) msleep(100); - if (mtip_check_surprise_removal(port->dd->pdev)) + if (mtip_check_surprise_removal(port->dd)) goto err_fault; active = mtip_commands_active(port); @@ -980,7 +979,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, return -EFAULT; } - if (mtip_check_surprise_removal(dd->pdev)) + if (mtip_check_surprise_removal(dd)) return -EFAULT; rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); @@ -1022,7 +1021,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, fis->command, int_cmd->status); rv = -EIO; - if (mtip_check_surprise_removal(dd->pdev) || + if (mtip_check_surprise_removal(dd) || test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { dev_err(&dd->pdev->dev, @@ -2513,7 +2512,7 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) return -EFAULT; - if (mtip_check_surprise_removal(dd->pdev)) + if (mtip_check_surprise_removal(dd)) return -EFAULT; if (mtip_get_identify(dd->port, NULL) < 0) @@ -2891,7 +2890,7 @@ static int mtip_hw_init(struct driver_data *dd) time_before(jiffies, timeout)) { mdelay(100); } - if (unlikely(mtip_check_surprise_removal(dd->pdev))) { + if (unlikely(mtip_check_surprise_removal(dd))) { timetaken = jiffies - timetaken; dev_warn(&dd->pdev->dev, "Surprise removal detected at %u ms\n", @@ -4098,7 +4097,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) list_add(&dd->remove_list, &removing_list); spin_unlock_irqrestore(&dev_lock, flags); - mtip_check_surprise_removal(pdev); + mtip_check_surprise_removal(dd); synchronize_irq(dd->pdev->irq); /* Spin until workers are done */ @@ -4145,36 +4144,17 @@ static void mtip_pci_remove(struct pci_dev *pdev) * 0 Success * <0 Error */ -static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) +static int __maybe_unused mtip_pci_suspend(struct device *dev) { int rv = 0; - struct driver_data *dd = pci_get_drvdata(pdev); - - if (!dd) { - dev_err(&pdev->dev, - "Driver private datastructure is NULL\n"); - return -EFAULT; - } + struct driver_data *dd = dev_get_drvdata(dev); set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag); /* Disable ports & interrupts then send standby immediate */ rv = mtip_block_suspend(dd); - if (rv < 0) { - dev_err(&pdev->dev, - "Failed to suspend controller\n"); - return rv; - } - - /* - * Save the pci config space to pdev structure & - * disable the device - */ - pci_save_state(pdev); - pci_disable_device(pdev); - - /* Move to Low power state*/ - pci_set_power_state(pdev, PCI_D3hot); + if (rv < 0) + dev_err(dev, "Failed to suspend controller\n"); return rv; } @@ -4186,32 +4166,10 @@ static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) * 0 Success * <0 Error */ -static int mtip_pci_resume(struct pci_dev *pdev) +static int __maybe_unused mtip_pci_resume(struct device *dev) { int rv = 0; - struct driver_data *dd; - - dd = pci_get_drvdata(pdev); - if (!dd) { - dev_err(&pdev->dev, - "Driver private datastructure is NULL\n"); - return -EFAULT; - } - - /* Move the device to active State */ - pci_set_power_state(pdev, PCI_D0); - - /* Restore PCI configuration space */ - pci_restore_state(pdev); - - /* Enable the PCI device*/ - rv = pcim_enable_device(pdev); - if (rv < 0) { - dev_err(&pdev->dev, - "Failed to enable card during resume\n"); - goto err; - } - pci_set_master(pdev); + struct driver_data *dd = dev_get_drvdata(dev); /* * Calls hbaReset, initPort, & startPort function @@ -4219,9 +4177,8 @@ static int mtip_pci_resume(struct pci_dev *pdev) */ rv = mtip_block_resume(dd); if (rv < 0) - dev_err(&pdev->dev, "Unable to resume\n"); + dev_err(dev, "Unable to resume\n"); -err: clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag); return rv; @@ -4252,14 +4209,15 @@ static const struct pci_device_id mtip_pci_tbl[] = { { 0 } }; +static SIMPLE_DEV_PM_OPS(mtip_pci_pm_ops, mtip_pci_suspend, mtip_pci_resume); + /* Structure that describes the PCI driver functions. */ static struct pci_driver mtip_pci_driver = { .name = MTIP_DRV_NAME, .id_table = mtip_pci_tbl, .probe = mtip_pci_probe, .remove = mtip_pci_remove, - .suspend = mtip_pci_suspend, - .resume = mtip_pci_resume, + .driver.pm = &mtip_pci_pm_ops, .shutdown = mtip_pci_shutdown, }; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 54f7d490f8eb..13004beb48ca 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -340,9 +340,9 @@ static int nullb_update_nr_hw_queues(struct nullb_device *dev, return 0; /* - * Make sure at least one queue exists for each of submit and poll. + * Make sure at least one submit queue exists. */ - if (!submit_queues || !poll_queues) + if (!submit_queues) return -EINVAL; /* @@ -1574,7 +1574,9 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) cmd = blk_mq_rq_to_pdu(req); cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req), blk_rq_sectors(req)); - end_cmd(cmd); + if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, + blk_mq_end_request_batch)) + end_cmd(cmd); nr++; } @@ -1890,7 +1892,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) if (g_shared_tag_bitmap) set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; set->driver_data = nullb; - if (g_poll_queues) + if (poll_queues) set->nr_maps = 3; else set->nr_maps = 1; @@ -1917,8 +1919,6 @@ static int null_validate_conf(struct nullb_device *dev) if (dev->poll_queues > g_poll_queues) dev->poll_queues = g_poll_queues; - else if (dev->poll_queues == 0) - dev->poll_queues = 1; dev->prev_poll_queues = dev->poll_queues; dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 887c98d61684..2b6b70a39e76 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -113,57 +113,10 @@ static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd) return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1); } -/* - * create and register a pktcdvd kernel object. - */ -static struct pktcdvd_kobj* pkt_kobj_create(struct pktcdvd_device *pd, - const char* name, - struct kobject* parent, - struct kobj_type* ktype) -{ - struct pktcdvd_kobj *p; - int error; - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return NULL; - p->pd = pd; - error = kobject_init_and_add(&p->kobj, ktype, parent, "%s", name); - if (error) { - kobject_put(&p->kobj); - return NULL; - } - kobject_uevent(&p->kobj, KOBJ_ADD); - return p; -} -/* - * remove a pktcdvd kernel object. - */ -static void pkt_kobj_remove(struct pktcdvd_kobj *p) -{ - if (p) - kobject_put(&p->kobj); -} -/* - * default release function for pktcdvd kernel objects. - */ -static void pkt_kobj_release(struct kobject *kobj) -{ - kfree(to_pktcdvdkobj(kobj)); -} - - /********************************************************** - * * sysfs interface for pktcdvd * by (C) 2006 Thomas Maier <balagi@justmail.de> - * - **********************************************************/ - -#define DEF_ATTR(_obj,_name,_mode) \ - static struct attribute _obj = { .name = _name, .mode = _mode } - -/********************************************************** + /sys/class/pktcdvd/pktcdvd[0-7]/ stat/reset stat/packets_started @@ -176,75 +129,94 @@ static void pkt_kobj_release(struct kobject *kobj) write_queue/congestion_on **********************************************************/ -DEF_ATTR(kobj_pkt_attr_st1, "reset", 0200); -DEF_ATTR(kobj_pkt_attr_st2, "packets_started", 0444); -DEF_ATTR(kobj_pkt_attr_st3, "packets_finished", 0444); -DEF_ATTR(kobj_pkt_attr_st4, "kb_written", 0444); -DEF_ATTR(kobj_pkt_attr_st5, "kb_read", 0444); -DEF_ATTR(kobj_pkt_attr_st6, "kb_read_gather", 0444); - -static struct attribute *kobj_pkt_attrs_stat[] = { - &kobj_pkt_attr_st1, - &kobj_pkt_attr_st2, - &kobj_pkt_attr_st3, - &kobj_pkt_attr_st4, - &kobj_pkt_attr_st5, - &kobj_pkt_attr_st6, - NULL -}; +static ssize_t packets_started_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); -DEF_ATTR(kobj_pkt_attr_wq1, "size", 0444); -DEF_ATTR(kobj_pkt_attr_wq2, "congestion_off", 0644); -DEF_ATTR(kobj_pkt_attr_wq3, "congestion_on", 0644); + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started); +} +static DEVICE_ATTR_RO(packets_started); -static struct attribute *kobj_pkt_attrs_wqueue[] = { - &kobj_pkt_attr_wq1, - &kobj_pkt_attr_wq2, - &kobj_pkt_attr_wq3, - NULL -}; +static ssize_t packets_finished_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); -static ssize_t kobj_pkt_show(struct kobject *kobj, - struct attribute *attr, char *data) + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended); +} +static DEVICE_ATTR_RO(packets_finished); + +static ssize_t kb_written_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct pktcdvd_device *pd = to_pktcdvdkobj(kobj)->pd; - int n = 0; - int v; - if (strcmp(attr->name, "packets_started") == 0) { - n = sprintf(data, "%lu\n", pd->stats.pkt_started); + struct pktcdvd_device *pd = dev_get_drvdata(dev); - } else if (strcmp(attr->name, "packets_finished") == 0) { - n = sprintf(data, "%lu\n", pd->stats.pkt_ended); + return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1); +} +static DEVICE_ATTR_RO(kb_written); - } else if (strcmp(attr->name, "kb_written") == 0) { - n = sprintf(data, "%lu\n", pd->stats.secs_w >> 1); +static ssize_t kb_read_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); - } else if (strcmp(attr->name, "kb_read") == 0) { - n = sprintf(data, "%lu\n", pd->stats.secs_r >> 1); + return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1); +} +static DEVICE_ATTR_RO(kb_read); - } else if (strcmp(attr->name, "kb_read_gather") == 0) { - n = sprintf(data, "%lu\n", pd->stats.secs_rg >> 1); +static ssize_t kb_read_gather_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); - } else if (strcmp(attr->name, "size") == 0) { - spin_lock(&pd->lock); - v = pd->bio_queue_size; - spin_unlock(&pd->lock); - n = sprintf(data, "%d\n", v); + return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1); +} +static DEVICE_ATTR_RO(kb_read_gather); - } else if (strcmp(attr->name, "congestion_off") == 0) { - spin_lock(&pd->lock); - v = pd->write_congestion_off; - spin_unlock(&pd->lock); - n = sprintf(data, "%d\n", v); +static ssize_t reset_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); - } else if (strcmp(attr->name, "congestion_on") == 0) { - spin_lock(&pd->lock); - v = pd->write_congestion_on; - spin_unlock(&pd->lock); - n = sprintf(data, "%d\n", v); + if (len > 0) { + pd->stats.pkt_started = 0; + pd->stats.pkt_ended = 0; + pd->stats.secs_w = 0; + pd->stats.secs_rg = 0; + pd->stats.secs_r = 0; } + return len; +} +static DEVICE_ATTR_WO(reset); + +static struct attribute *pkt_stat_attrs[] = { + &dev_attr_packets_finished.attr, + &dev_attr_packets_started.attr, + &dev_attr_kb_read.attr, + &dev_attr_kb_written.attr, + &dev_attr_kb_read_gather.attr, + &dev_attr_reset.attr, + NULL, +}; + +static const struct attribute_group pkt_stat_group = { + .name = "stat", + .attrs = pkt_stat_attrs, +}; + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->bio_queue_size); + spin_unlock(&pd->lock); return n; } +static DEVICE_ATTR_RO(size); static void init_write_congestion_marks(int* lo, int* hi) { @@ -263,30 +235,56 @@ static void init_write_congestion_marks(int* lo, int* hi) } } -static ssize_t kobj_pkt_store(struct kobject *kobj, - struct attribute *attr, - const char *data, size_t len) +static ssize_t congestion_off_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct pktcdvd_device *pd = to_pktcdvdkobj(kobj)->pd; - int val; + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; - if (strcmp(attr->name, "reset") == 0 && len > 0) { - pd->stats.pkt_started = 0; - pd->stats.pkt_ended = 0; - pd->stats.secs_w = 0; - pd->stats.secs_rg = 0; - pd->stats.secs_r = 0; + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_off); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_off_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; - } else if (strcmp(attr->name, "congestion_off") == 0 - && sscanf(data, "%d", &val) == 1) { + if (sscanf(buf, "%d", &val) == 1) { spin_lock(&pd->lock); pd->write_congestion_off = val; init_write_congestion_marks(&pd->write_congestion_off, &pd->write_congestion_on); spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_off); + +static ssize_t congestion_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_on); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; - } else if (strcmp(attr->name, "congestion_on") == 0 - && sscanf(data, "%d", &val) == 1) { + if (sscanf(buf, "%d", &val) == 1) { spin_lock(&pd->lock); pd->write_congestion_on = val; init_write_congestion_marks(&pd->write_congestion_off, @@ -295,44 +293,39 @@ static ssize_t kobj_pkt_store(struct kobject *kobj, } return len; } +static DEVICE_ATTR_RW(congestion_on); -static const struct sysfs_ops kobj_pkt_ops = { - .show = kobj_pkt_show, - .store = kobj_pkt_store +static struct attribute *pkt_wq_attrs[] = { + &dev_attr_congestion_on.attr, + &dev_attr_congestion_off.attr, + &dev_attr_size.attr, + NULL, }; -static struct kobj_type kobj_pkt_type_stat = { - .release = pkt_kobj_release, - .sysfs_ops = &kobj_pkt_ops, - .default_attrs = kobj_pkt_attrs_stat + +static const struct attribute_group pkt_wq_group = { + .name = "write_queue", + .attrs = pkt_wq_attrs, }; -static struct kobj_type kobj_pkt_type_wqueue = { - .release = pkt_kobj_release, - .sysfs_ops = &kobj_pkt_ops, - .default_attrs = kobj_pkt_attrs_wqueue + +static const struct attribute_group *pkt_groups[] = { + &pkt_stat_group, + &pkt_wq_group, + NULL, }; static void pkt_sysfs_dev_new(struct pktcdvd_device *pd) { if (class_pktcdvd) { - pd->dev = device_create(class_pktcdvd, NULL, MKDEV(0, 0), NULL, - "%s", pd->name); + pd->dev = device_create_with_groups(class_pktcdvd, NULL, + MKDEV(0, 0), pd, pkt_groups, + "%s", pd->name); if (IS_ERR(pd->dev)) pd->dev = NULL; } - if (pd->dev) { - pd->kobj_stat = pkt_kobj_create(pd, "stat", - &pd->dev->kobj, - &kobj_pkt_type_stat); - pd->kobj_wqueue = pkt_kobj_create(pd, "write_queue", - &pd->dev->kobj, - &kobj_pkt_type_wqueue); - } } static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd) { - pkt_kobj_remove(pd->kobj_stat); - pkt_kobj_remove(pd->kobj_wqueue); if (class_pktcdvd) device_unregister(pd->dev); } @@ -1107,7 +1100,6 @@ static int pkt_handle_queue(struct pktcdvd_device *pd) sector_t zone = 0; /* Suppress gcc warning */ struct pkt_rb_node *node, *first_node; struct rb_node *n; - int wakeup; atomic_set(&pd->scan_queue, 0); @@ -1179,12 +1171,14 @@ try_next_bio: spin_unlock(&pkt->lock); } /* check write congestion marks, and if bio_queue_size is - below, wake up any waiters */ - wakeup = (pd->write_congestion_on > 0 - && pd->bio_queue_size <= pd->write_congestion_off); + * below, wake up any waiters + */ + if (pd->congested && + pd->bio_queue_size <= pd->write_congestion_off) { + pd->congested = false; + wake_up_var(&pd->congested); + } spin_unlock(&pd->lock); - if (wakeup) - clear_bdi_congested(pd->disk->bdi, BLK_RW_ASYNC); pkt->sleep_time = max(PACKET_WAIT_TIME, 1); pkt_set_state(pkt, PACKET_WAITING_STATE); @@ -2356,7 +2350,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) } spin_unlock(&pd->cdrw.active_list_lock); - /* + /* * Test if there is enough room left in the bio work queue * (queue size >= congestion on mark). * If not, wait till the work queue size is below the congestion off mark. @@ -2364,12 +2358,20 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) spin_lock(&pd->lock); if (pd->write_congestion_on > 0 && pd->bio_queue_size >= pd->write_congestion_on) { - set_bdi_congested(bio->bi_bdev->bd_disk->bdi, BLK_RW_ASYNC); - do { + struct wait_bit_queue_entry wqe; + + init_wait_var_entry(&wqe, &pd->congested, 0); + for (;;) { + prepare_to_wait_event(__var_waitqueue(&pd->congested), + &wqe.wq_entry, + TASK_UNINTERRUPTIBLE); + if (pd->bio_queue_size <= pd->write_congestion_off) + break; + pd->congested = true; spin_unlock(&pd->lock); - congestion_wait(BLK_RW_ASYNC, HZ); + schedule(); spin_lock(&pd->lock); - } while(pd->bio_queue_size > pd->write_congestion_off); + } } spin_unlock(&pd->lock); diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 44e45af00e83..2be5d87a3ca6 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -452,6 +452,7 @@ static struct attribute *rnbd_dev_attrs[] = { &rnbd_clt_nr_poll_queues.attr, NULL, }; +ATTRIBUTE_GROUPS(rnbd_dev); void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) { @@ -474,7 +475,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) static struct kobj_type rnbd_dev_ktype = { .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = rnbd_dev_attrs, + .default_groups = rnbd_dev_groups, }; static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev) diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile deleted file mode 100644 index 7ef158099d33..000000000000 --- a/drivers/block/rsxx/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o -rsxx-objs := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c deleted file mode 100644 index 11ed1d9646b9..000000000000 --- a/drivers/block/rsxx/config.c +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* -* Filename: config.c -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#include <linux/types.h> -#include <linux/crc32.h> -#include <linux/swab.h> - -#include "rsxx_priv.h" -#include "rsxx_cfg.h" - -static void initialize_config(struct rsxx_card_cfg *cfg) -{ - cfg->hdr.version = RSXX_CFG_VERSION; - - cfg->data.block_size = RSXX_HW_BLK_SIZE; - cfg->data.stripe_size = RSXX_HW_BLK_SIZE; - cfg->data.vendor_id = RSXX_VENDOR_ID_IBM; - cfg->data.cache_order = (-1); - cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; - cfg->data.intr_coal.count = 0; - cfg->data.intr_coal.latency = 0; -} - -static u32 config_data_crc32(struct rsxx_card_cfg *cfg) -{ - /* - * Return the compliment of the CRC to ensure compatibility - * (i.e. this is how early rsxx drivers did it.) - */ - - return ~crc32(~0, &cfg->data, sizeof(cfg->data)); -} - - -/*----------------- Config Byte Swap Functions -------------------*/ -static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr) -{ - hdr->version = be32_to_cpu((__force __be32) hdr->version); - hdr->crc = be32_to_cpu((__force __be32) hdr->crc); -} - -static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr) -{ - hdr->version = (__force u32) cpu_to_be32(hdr->version); - hdr->crc = (__force u32) cpu_to_be32(hdr->crc); -} - -static void config_data_swab(struct rsxx_card_cfg *cfg) -{ - u32 *data = (u32 *) &cfg->data; - int i; - - for (i = 0; i < (sizeof(cfg->data) / 4); i++) - data[i] = swab32(data[i]); -} - -static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg) -{ - u32 *data = (u32 *) &cfg->data; - int i; - - for (i = 0; i < (sizeof(cfg->data) / 4); i++) - data[i] = le32_to_cpu((__force __le32) data[i]); -} - -static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg) -{ - u32 *data = (u32 *) &cfg->data; - int i; - - for (i = 0; i < (sizeof(cfg->data) / 4); i++) - data[i] = (__force u32) cpu_to_le32(data[i]); -} - - -/*----------------- Config Operations ------------------*/ -static int rsxx_save_config(struct rsxx_cardinfo *card) -{ - struct rsxx_card_cfg cfg; - int st; - - memcpy(&cfg, &card->config, sizeof(cfg)); - - if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) { - dev_err(CARD_TO_DEV(card), - "Cannot save config with invalid version %d\n", - cfg.hdr.version); - return -EINVAL; - } - - /* Convert data to little endian for the CRC calculation. */ - config_data_cpu_to_le(&cfg); - - cfg.hdr.crc = config_data_crc32(&cfg); - - /* - * Swap the data from little endian to big endian so it can be - * stored. - */ - config_data_swab(&cfg); - config_hdr_cpu_to_be(&cfg.hdr); - - st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1); - if (st) - return st; - - return 0; -} - -int rsxx_load_config(struct rsxx_cardinfo *card) -{ - int st; - u32 crc; - - st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config), - &card->config, 1); - if (st) { - dev_err(CARD_TO_DEV(card), - "Failed reading card config.\n"); - return st; - } - - config_hdr_be_to_cpu(&card->config.hdr); - - if (card->config.hdr.version == RSXX_CFG_VERSION) { - /* - * We calculate the CRC with the data in little endian, because - * early drivers did not take big endian CPUs into account. - * The data is always stored in big endian, so we need to byte - * swap it before calculating the CRC. - */ - - config_data_swab(&card->config); - - /* Check the CRC */ - crc = config_data_crc32(&card->config); - if (crc != card->config.hdr.crc) { - dev_err(CARD_TO_DEV(card), - "Config corruption detected!\n"); - dev_info(CARD_TO_DEV(card), - "CRC (sb x%08x is x%08x)\n", - card->config.hdr.crc, crc); - return -EIO; - } - - /* Convert the data to CPU byteorder */ - config_data_le_to_cpu(&card->config); - - } else if (card->config.hdr.version != 0) { - dev_err(CARD_TO_DEV(card), - "Invalid config version %d.\n", - card->config.hdr.version); - /* - * Config version changes require special handling from the - * user - */ - return -EINVAL; - } else { - dev_info(CARD_TO_DEV(card), - "Initializing card configuration.\n"); - initialize_config(&card->config); - st = rsxx_save_config(card); - if (st) - return st; - } - - card->config_valid = 1; - - dev_dbg(CARD_TO_DEV(card), "version: x%08x\n", - card->config.hdr.version); - dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n", - card->config.hdr.crc); - dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n", - card->config.data.block_size); - dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n", - card->config.data.stripe_size); - dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n", - card->config.data.vendor_id); - dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n", - card->config.data.cache_order); - dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n", - card->config.data.intr_coal.mode); - dev_dbg(CARD_TO_DEV(card), "count: x%08x\n", - card->config.data.intr_coal.count); - dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n", - card->config.data.intr_coal.latency); - - return 0; -} - diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c deleted file mode 100644 index 8d9d69f5dfbc..000000000000 --- a/drivers/block/rsxx/core.c +++ /dev/null @@ -1,1126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* -* Filename: core.c -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/reboot.h> -#include <linux/slab.h> -#include <linux/bitops.h> -#include <linux/delay.h> -#include <linux/debugfs.h> -#include <linux/seq_file.h> - -#include <linux/genhd.h> -#include <linux/idr.h> - -#include "rsxx_priv.h" -#include "rsxx_cfg.h" - -#define NO_LEGACY 0 -#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */ - -MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver"); -MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRIVER_VERSION); - -static unsigned int force_legacy = NO_LEGACY; -module_param(force_legacy, uint, 0444); -MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); - -static unsigned int sync_start = 1; -module_param(sync_start, uint, 0444); -MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete " - "until the card startup has completed."); - -static DEFINE_IDA(rsxx_disk_ida); - -/* --------------------Debugfs Setup ------------------- */ - -static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p) -{ - struct rsxx_cardinfo *card = m->private; - - seq_printf(m, "HWID 0x%08x\n", - ioread32(card->regmap + HWID)); - seq_printf(m, "SCRATCH 0x%08x\n", - ioread32(card->regmap + SCRATCH)); - seq_printf(m, "IER 0x%08x\n", - ioread32(card->regmap + IER)); - seq_printf(m, "IPR 0x%08x\n", - ioread32(card->regmap + IPR)); - seq_printf(m, "CREG_CMD 0x%08x\n", - ioread32(card->regmap + CREG_CMD)); - seq_printf(m, "CREG_ADD 0x%08x\n", - ioread32(card->regmap + CREG_ADD)); - seq_printf(m, "CREG_CNT 0x%08x\n", - ioread32(card->regmap + CREG_CNT)); - seq_printf(m, "CREG_STAT 0x%08x\n", - ioread32(card->regmap + CREG_STAT)); - seq_printf(m, "CREG_DATA0 0x%08x\n", - ioread32(card->regmap + CREG_DATA0)); - seq_printf(m, "CREG_DATA1 0x%08x\n", - ioread32(card->regmap + CREG_DATA1)); - seq_printf(m, "CREG_DATA2 0x%08x\n", - ioread32(card->regmap + CREG_DATA2)); - seq_printf(m, "CREG_DATA3 0x%08x\n", - ioread32(card->regmap + CREG_DATA3)); - seq_printf(m, "CREG_DATA4 0x%08x\n", - ioread32(card->regmap + CREG_DATA4)); - seq_printf(m, "CREG_DATA5 0x%08x\n", - ioread32(card->regmap + CREG_DATA5)); - seq_printf(m, "CREG_DATA6 0x%08x\n", - ioread32(card->regmap + CREG_DATA6)); - seq_printf(m, "CREG_DATA7 0x%08x\n", - ioread32(card->regmap + CREG_DATA7)); - seq_printf(m, "INTR_COAL 0x%08x\n", - ioread32(card->regmap + INTR_COAL)); - seq_printf(m, "HW_ERROR 0x%08x\n", - ioread32(card->regmap + HW_ERROR)); - seq_printf(m, "DEBUG0 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG0)); - seq_printf(m, "DEBUG1 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG1)); - seq_printf(m, "DEBUG2 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG2)); - seq_printf(m, "DEBUG3 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG3)); - seq_printf(m, "DEBUG4 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG4)); - seq_printf(m, "DEBUG5 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG5)); - seq_printf(m, "DEBUG6 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG6)); - seq_printf(m, "DEBUG7 0x%08x\n", - ioread32(card->regmap + PCI_DEBUG7)); - seq_printf(m, "RECONFIG 0x%08x\n", - ioread32(card->regmap + PCI_RECONFIG)); - - return 0; -} - -static int rsxx_attr_stats_show(struct seq_file *m, void *p) -{ - struct rsxx_cardinfo *card = m->private; - int i; - - for (i = 0; i < card->n_targets; i++) { - seq_printf(m, "Ctrl %d CRC Errors = %d\n", - i, card->ctrl[i].stats.crc_errors); - seq_printf(m, "Ctrl %d Hard Errors = %d\n", - i, card->ctrl[i].stats.hard_errors); - seq_printf(m, "Ctrl %d Soft Errors = %d\n", - i, card->ctrl[i].stats.soft_errors); - seq_printf(m, "Ctrl %d Writes Issued = %d\n", - i, card->ctrl[i].stats.writes_issued); - seq_printf(m, "Ctrl %d Writes Failed = %d\n", - i, card->ctrl[i].stats.writes_failed); - seq_printf(m, "Ctrl %d Reads Issued = %d\n", - i, card->ctrl[i].stats.reads_issued); - seq_printf(m, "Ctrl %d Reads Failed = %d\n", - i, card->ctrl[i].stats.reads_failed); - seq_printf(m, "Ctrl %d Reads Retried = %d\n", - i, card->ctrl[i].stats.reads_retried); - seq_printf(m, "Ctrl %d Discards Issued = %d\n", - i, card->ctrl[i].stats.discards_issued); - seq_printf(m, "Ctrl %d Discards Failed = %d\n", - i, card->ctrl[i].stats.discards_failed); - seq_printf(m, "Ctrl %d DMA SW Errors = %d\n", - i, card->ctrl[i].stats.dma_sw_err); - seq_printf(m, "Ctrl %d DMA HW Faults = %d\n", - i, card->ctrl[i].stats.dma_hw_fault); - seq_printf(m, "Ctrl %d DMAs Cancelled = %d\n", - i, card->ctrl[i].stats.dma_cancelled); - seq_printf(m, "Ctrl %d SW Queue Depth = %d\n", - i, card->ctrl[i].stats.sw_q_depth); - seq_printf(m, "Ctrl %d HW Queue Depth = %d\n", - i, atomic_read(&card->ctrl[i].stats.hw_q_depth)); - } - - return 0; -} - -static int rsxx_attr_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, rsxx_attr_stats_show, inode->i_private); -} - -static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, rsxx_attr_pci_regs_show, inode->i_private); -} - -static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - struct rsxx_cardinfo *card = file_inode(fp)->i_private; - char *buf; - int st; - - buf = kzalloc(cnt, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); - if (!st) { - if (copy_to_user(ubuf, buf, cnt)) - st = -EFAULT; - } - kfree(buf); - if (st) - return st; - *ppos += cnt; - return cnt; -} - -static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - struct rsxx_cardinfo *card = file_inode(fp)->i_private; - char *buf; - ssize_t st; - - buf = memdup_user(ubuf, cnt); - if (IS_ERR(buf)) - return PTR_ERR(buf); - - st = rsxx_creg_write(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); - kfree(buf); - if (st) - return st; - *ppos += cnt; - return cnt; -} - -static const struct file_operations debugfs_cram_fops = { - .owner = THIS_MODULE, - .read = rsxx_cram_read, - .write = rsxx_cram_write, -}; - -static const struct file_operations debugfs_stats_fops = { - .owner = THIS_MODULE, - .open = rsxx_attr_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations debugfs_pci_regs_fops = { - .owner = THIS_MODULE, - .open = rsxx_attr_pci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card) -{ - struct dentry *debugfs_stats; - struct dentry *debugfs_pci_regs; - struct dentry *debugfs_cram; - - card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL); - if (IS_ERR_OR_NULL(card->debugfs_dir)) - goto failed_debugfs_dir; - - debugfs_stats = debugfs_create_file("stats", 0444, - card->debugfs_dir, card, - &debugfs_stats_fops); - if (IS_ERR_OR_NULL(debugfs_stats)) - goto failed_debugfs_stats; - - debugfs_pci_regs = debugfs_create_file("pci_regs", 0444, - card->debugfs_dir, card, - &debugfs_pci_regs_fops); - if (IS_ERR_OR_NULL(debugfs_pci_regs)) - goto failed_debugfs_pci_regs; - - debugfs_cram = debugfs_create_file("cram", 0644, - card->debugfs_dir, card, - &debugfs_cram_fops); - if (IS_ERR_OR_NULL(debugfs_cram)) - goto failed_debugfs_cram; - - return; -failed_debugfs_cram: - debugfs_remove(debugfs_pci_regs); -failed_debugfs_pci_regs: - debugfs_remove(debugfs_stats); -failed_debugfs_stats: - debugfs_remove(card->debugfs_dir); -failed_debugfs_dir: - card->debugfs_dir = NULL; -} - -/*----------------- Interrupt Control & Handling -------------------*/ - -static void rsxx_mask_interrupts(struct rsxx_cardinfo *card) -{ - card->isr_mask = 0; - card->ier_mask = 0; -} - -static void __enable_intr(unsigned int *mask, unsigned int intr) -{ - *mask |= intr; -} - -static void __disable_intr(unsigned int *mask, unsigned int intr) -{ - *mask &= ~intr; -} - -/* - * NOTE: Disabling the IER will disable the hardware interrupt. - * Disabling the ISR will disable the software handling of the ISR bit. - * - * Enable/Disable interrupt functions assume the card->irq_lock - * is held by the caller. - */ -void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) -{ - if (unlikely(card->halt) || - unlikely(card->eeh_state)) - return; - - __enable_intr(&card->ier_mask, intr); - iowrite32(card->ier_mask, card->regmap + IER); -} - -void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) -{ - if (unlikely(card->eeh_state)) - return; - - __disable_intr(&card->ier_mask, intr); - iowrite32(card->ier_mask, card->regmap + IER); -} - -void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, - unsigned int intr) -{ - if (unlikely(card->halt) || - unlikely(card->eeh_state)) - return; - - __enable_intr(&card->isr_mask, intr); - __enable_intr(&card->ier_mask, intr); - iowrite32(card->ier_mask, card->regmap + IER); -} -void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, - unsigned int intr) -{ - if (unlikely(card->eeh_state)) - return; - - __disable_intr(&card->isr_mask, intr); - __disable_intr(&card->ier_mask, intr); - iowrite32(card->ier_mask, card->regmap + IER); -} - -static irqreturn_t rsxx_isr(int irq, void *pdata) -{ - struct rsxx_cardinfo *card = pdata; - unsigned int isr; - int handled = 0; - int reread_isr; - int i; - - spin_lock(&card->irq_lock); - - do { - reread_isr = 0; - - if (unlikely(card->eeh_state)) - break; - - isr = ioread32(card->regmap + ISR); - if (isr == 0xffffffff) { - /* - * A few systems seem to have an intermittent issue - * where PCI reads return all Fs, but retrying the read - * a little later will return as expected. - */ - dev_info(CARD_TO_DEV(card), - "ISR = 0xFFFFFFFF, retrying later\n"); - break; - } - - isr &= card->isr_mask; - if (!isr) - break; - - for (i = 0; i < card->n_targets; i++) { - if (isr & CR_INTR_DMA(i)) { - if (card->ier_mask & CR_INTR_DMA(i)) { - rsxx_disable_ier(card, CR_INTR_DMA(i)); - reread_isr = 1; - } - queue_work(card->ctrl[i].done_wq, - &card->ctrl[i].dma_done_work); - handled++; - } - } - - if (isr & CR_INTR_CREG) { - queue_work(card->creg_ctrl.creg_wq, - &card->creg_ctrl.done_work); - handled++; - } - - if (isr & CR_INTR_EVENT) { - queue_work(card->event_wq, &card->event_work); - rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); - handled++; - } - } while (reread_isr); - - spin_unlock(&card->irq_lock); - - return handled ? IRQ_HANDLED : IRQ_NONE; -} - -/*----------------- Card Event Handler -------------------*/ -static const char *rsxx_card_state_to_str(unsigned int state) -{ - static const char * const state_strings[] = { - "Unknown", "Shutdown", "Starting", "Formatting", - "Uninitialized", "Good", "Shutting Down", - "Fault", "Read Only Fault", "dStroying" - }; - - return state_strings[ffs(state)]; -} - -static void card_state_change(struct rsxx_cardinfo *card, - unsigned int new_state) -{ - int st; - - dev_info(CARD_TO_DEV(card), - "card state change detected.(%s -> %s)\n", - rsxx_card_state_to_str(card->state), - rsxx_card_state_to_str(new_state)); - - card->state = new_state; - - /* Don't attach DMA interfaces if the card has an invalid config */ - if (!card->config_valid) - return; - - switch (new_state) { - case CARD_STATE_RD_ONLY_FAULT: - dev_crit(CARD_TO_DEV(card), - "Hardware has entered read-only mode!\n"); - /* - * Fall through so the DMA devices can be attached and - * the user can attempt to pull off their data. - */ - fallthrough; - case CARD_STATE_GOOD: - st = rsxx_get_card_size8(card, &card->size8); - if (st) - dev_err(CARD_TO_DEV(card), - "Failed attaching DMA devices\n"); - - if (card->config_valid) - set_capacity(card->gendisk, card->size8 >> 9); - break; - - case CARD_STATE_FAULT: - dev_crit(CARD_TO_DEV(card), - "Hardware Fault reported!\n"); - fallthrough; - - /* Everything else, detach DMA interface if it's attached. */ - case CARD_STATE_SHUTDOWN: - case CARD_STATE_STARTING: - case CARD_STATE_FORMATTING: - case CARD_STATE_UNINITIALIZED: - case CARD_STATE_SHUTTING_DOWN: - /* - * dStroy is a term coined by marketing to represent the low level - * secure erase. - */ - case CARD_STATE_DSTROYING: - set_capacity(card->gendisk, 0); - break; - } -} - -static void card_event_handler(struct work_struct *work) -{ - struct rsxx_cardinfo *card; - unsigned int state; - unsigned long flags; - int st; - - card = container_of(work, struct rsxx_cardinfo, event_work); - - if (unlikely(card->halt)) - return; - - /* - * Enable the interrupt now to avoid any weird race conditions where a - * state change might occur while rsxx_get_card_state() is - * processing a returned creg cmd. - */ - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); - spin_unlock_irqrestore(&card->irq_lock, flags); - - st = rsxx_get_card_state(card, &state); - if (st) { - dev_info(CARD_TO_DEV(card), - "Failed reading state after event.\n"); - return; - } - - if (card->state != state) - card_state_change(card, state); - - if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING) - rsxx_read_hw_log(card); -} - -/*----------------- Card Operations -------------------*/ -static int card_shutdown(struct rsxx_cardinfo *card) -{ - unsigned int state; - signed long start; - const int timeout = msecs_to_jiffies(120000); - int st; - - /* We can't issue a shutdown if the card is in a transition state */ - start = jiffies; - do { - st = rsxx_get_card_state(card, &state); - if (st) - return st; - } while (state == CARD_STATE_STARTING && - (jiffies - start < timeout)); - - if (state == CARD_STATE_STARTING) - return -ETIMEDOUT; - - /* Only issue a shutdown if we need to */ - if ((state != CARD_STATE_SHUTTING_DOWN) && - (state != CARD_STATE_SHUTDOWN)) { - st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN); - if (st) - return st; - } - - start = jiffies; - do { - st = rsxx_get_card_state(card, &state); - if (st) - return st; - } while (state != CARD_STATE_SHUTDOWN && - (jiffies - start < timeout)); - - if (state != CARD_STATE_SHUTDOWN) - return -ETIMEDOUT; - - return 0; -} - -static int rsxx_eeh_frozen(struct pci_dev *dev) -{ - struct rsxx_cardinfo *card = pci_get_drvdata(dev); - int i; - int st; - - dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n"); - - card->eeh_state = 1; - rsxx_mask_interrupts(card); - - /* - * We need to guarantee that the write for eeh_state and masking - * interrupts does not become reordered. This will prevent a possible - * race condition with the EEH code. - */ - wmb(); - - pci_disable_device(dev); - - st = rsxx_eeh_save_issued_dmas(card); - if (st) - return st; - - rsxx_eeh_save_issued_creg(card); - - for (i = 0; i < card->n_targets; i++) { - if (card->ctrl[i].status.buf) - dma_free_coherent(&card->dev->dev, - STATUS_BUFFER_SIZE8, - card->ctrl[i].status.buf, - card->ctrl[i].status.dma_addr); - if (card->ctrl[i].cmd.buf) - dma_free_coherent(&card->dev->dev, - COMMAND_BUFFER_SIZE8, - card->ctrl[i].cmd.buf, - card->ctrl[i].cmd.dma_addr); - } - - return 0; -} - -static void rsxx_eeh_failure(struct pci_dev *dev) -{ - struct rsxx_cardinfo *card = pci_get_drvdata(dev); - int i; - int cnt = 0; - - dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n"); - - card->eeh_state = 1; - card->halt = 1; - - for (i = 0; i < card->n_targets; i++) { - spin_lock_bh(&card->ctrl[i].queue_lock); - cnt = rsxx_cleanup_dma_queue(&card->ctrl[i], - &card->ctrl[i].queue, - COMPLETE_DMA); - spin_unlock_bh(&card->ctrl[i].queue_lock); - - cnt += rsxx_dma_cancel(&card->ctrl[i]); - - if (cnt) - dev_info(CARD_TO_DEV(card), - "Freed %d queued DMAs on channel %d\n", - cnt, card->ctrl[i].id); - } -} - -static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) -{ - unsigned int status; - int iter = 0; - - /* We need to wait for the hardware to reset */ - while (iter++ < 10) { - status = ioread32(card->regmap + PCI_RECONFIG); - - if (status & RSXX_FLUSH_BUSY) { - ssleep(1); - continue; - } - - if (status & RSXX_FLUSH_TIMEOUT) - dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n"); - return 0; - } - - /* Hardware failed resetting itself. */ - return -1; -} - -static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, - pci_channel_state_t error) -{ - int st; - - if (dev->revision < RSXX_EEH_SUPPORT) - return PCI_ERS_RESULT_NONE; - - if (error == pci_channel_io_perm_failure) { - rsxx_eeh_failure(dev); - return PCI_ERS_RESULT_DISCONNECT; - } - - st = rsxx_eeh_frozen(dev); - if (st) { - dev_err(&dev->dev, "Slot reset setup failed\n"); - rsxx_eeh_failure(dev); - return PCI_ERS_RESULT_DISCONNECT; - } - - return PCI_ERS_RESULT_NEED_RESET; -} - -static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) -{ - struct rsxx_cardinfo *card = pci_get_drvdata(dev); - unsigned long flags; - int i; - int st; - - dev_warn(&dev->dev, - "IBM Flash Adapter PCI: recovering from slot reset.\n"); - - st = pci_enable_device(dev); - if (st) - goto failed_hw_setup; - - pci_set_master(dev); - - st = rsxx_eeh_fifo_flush_poll(card); - if (st) - goto failed_hw_setup; - - rsxx_dma_queue_reset(card); - - for (i = 0; i < card->n_targets; i++) { - st = rsxx_hw_buffers_init(dev, &card->ctrl[i]); - if (st) - goto failed_hw_buffers_init; - } - - if (card->config_valid) - rsxx_dma_configure(card); - - /* Clears the ISR register from spurious interrupts */ - st = ioread32(card->regmap + ISR); - - card->eeh_state = 0; - - spin_lock_irqsave(&card->irq_lock, flags); - if (card->n_targets & RSXX_MAX_TARGETS) - rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G); - else - rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C); - spin_unlock_irqrestore(&card->irq_lock, flags); - - rsxx_kick_creg_queue(card); - - for (i = 0; i < card->n_targets; i++) { - spin_lock(&card->ctrl[i].queue_lock); - if (list_empty(&card->ctrl[i].queue)) { - spin_unlock(&card->ctrl[i].queue_lock); - continue; - } - spin_unlock(&card->ctrl[i].queue_lock); - - queue_work(card->ctrl[i].issue_wq, - &card->ctrl[i].issue_dma_work); - } - - dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n"); - - return PCI_ERS_RESULT_RECOVERED; - -failed_hw_buffers_init: - for (i = 0; i < card->n_targets; i++) { - if (card->ctrl[i].status.buf) - dma_free_coherent(&card->dev->dev, - STATUS_BUFFER_SIZE8, - card->ctrl[i].status.buf, - card->ctrl[i].status.dma_addr); - if (card->ctrl[i].cmd.buf) - dma_free_coherent(&card->dev->dev, - COMMAND_BUFFER_SIZE8, - card->ctrl[i].cmd.buf, - card->ctrl[i].cmd.dma_addr); - } -failed_hw_setup: - rsxx_eeh_failure(dev); - return PCI_ERS_RESULT_DISCONNECT; - -} - -/*----------------- Driver Initialization & Setup -------------------*/ -/* Returns: 0 if the driver is compatible with the device - -1 if the driver is NOT compatible with the device */ -static int rsxx_compatibility_check(struct rsxx_cardinfo *card) -{ - unsigned char pci_rev; - - pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); - - if (pci_rev > RS70_PCI_REV_SUPPORTED) - return -1; - return 0; -} - -static int rsxx_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct rsxx_cardinfo *card; - int st; - unsigned int sync_timeout; - - dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); - - card = kzalloc(sizeof(*card), GFP_KERNEL); - if (!card) - return -ENOMEM; - - card->dev = dev; - pci_set_drvdata(dev, card); - - st = ida_alloc(&rsxx_disk_ida, GFP_KERNEL); - if (st < 0) - goto failed_ida_get; - card->disk_id = st; - - st = pci_enable_device(dev); - if (st) - goto failed_enable; - - pci_set_master(dev); - - st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); - if (st) { - dev_err(CARD_TO_DEV(card), - "No usable DMA configuration,aborting\n"); - goto failed_dma_mask; - } - - st = pci_request_regions(dev, DRIVER_NAME); - if (st) { - dev_err(CARD_TO_DEV(card), - "Failed to request memory region\n"); - goto failed_request_regions; - } - - if (pci_resource_len(dev, 0) == 0) { - dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n"); - st = -ENOMEM; - goto failed_iomap; - } - - card->regmap = pci_iomap(dev, 0, 0); - if (!card->regmap) { - dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n"); - st = -ENOMEM; - goto failed_iomap; - } - - spin_lock_init(&card->irq_lock); - card->halt = 0; - card->eeh_state = 0; - - spin_lock_irq(&card->irq_lock); - rsxx_disable_ier_and_isr(card, CR_INTR_ALL); - spin_unlock_irq(&card->irq_lock); - - if (!force_legacy) { - st = pci_enable_msi(dev); - if (st) - dev_warn(CARD_TO_DEV(card), - "Failed to enable MSI\n"); - } - - st = request_irq(dev->irq, rsxx_isr, IRQF_SHARED, - DRIVER_NAME, card); - if (st) { - dev_err(CARD_TO_DEV(card), - "Failed requesting IRQ%d\n", dev->irq); - goto failed_irq; - } - - /************* Setup Processor Command Interface *************/ - st = rsxx_creg_setup(card); - if (st) { - dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n"); - goto failed_creg_setup; - } - - spin_lock_irq(&card->irq_lock); - rsxx_enable_ier_and_isr(card, CR_INTR_CREG); - spin_unlock_irq(&card->irq_lock); - - st = rsxx_compatibility_check(card); - if (st) { - dev_warn(CARD_TO_DEV(card), - "Incompatible driver detected. Please update the driver.\n"); - st = -EINVAL; - goto failed_compatiblity_check; - } - - /************* Load Card Config *************/ - st = rsxx_load_config(card); - if (st) - dev_err(CARD_TO_DEV(card), - "Failed loading card config\n"); - - /************* Setup DMA Engine *************/ - st = rsxx_get_num_targets(card, &card->n_targets); - if (st) - dev_info(CARD_TO_DEV(card), - "Failed reading the number of DMA targets\n"); - - card->ctrl = kcalloc(card->n_targets, sizeof(*card->ctrl), - GFP_KERNEL); - if (!card->ctrl) { - st = -ENOMEM; - goto failed_dma_setup; - } - - st = rsxx_dma_setup(card); - if (st) { - dev_info(CARD_TO_DEV(card), - "Failed to setup DMA engine\n"); - goto failed_dma_setup; - } - - /************* Setup Card Event Handler *************/ - card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event"); - if (!card->event_wq) { - dev_err(CARD_TO_DEV(card), "Failed card event setup.\n"); - st = -ENOMEM; - goto failed_event_handler; - } - - INIT_WORK(&card->event_work, card_event_handler); - - st = rsxx_setup_dev(card); - if (st) - goto failed_create_dev; - - rsxx_get_card_state(card, &card->state); - - dev_info(CARD_TO_DEV(card), - "card state: %s\n", - rsxx_card_state_to_str(card->state)); - - /* - * Now that the DMA Engine and devices have been setup, - * we can enable the event interrupt(it kicks off actions in - * those layers so we couldn't enable it right away.) - */ - spin_lock_irq(&card->irq_lock); - rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); - spin_unlock_irq(&card->irq_lock); - - if (card->state == CARD_STATE_SHUTDOWN) { - st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP); - if (st) - dev_crit(CARD_TO_DEV(card), - "Failed issuing card startup\n"); - if (sync_start) { - sync_timeout = SYNC_START_TIMEOUT; - - dev_info(CARD_TO_DEV(card), - "Waiting for card to startup\n"); - - do { - ssleep(1); - sync_timeout--; - - rsxx_get_card_state(card, &card->state); - } while (sync_timeout && - (card->state == CARD_STATE_STARTING)); - - if (card->state == CARD_STATE_STARTING) { - dev_warn(CARD_TO_DEV(card), - "Card startup timed out\n"); - card->size8 = 0; - } else { - dev_info(CARD_TO_DEV(card), - "card state: %s\n", - rsxx_card_state_to_str(card->state)); - st = rsxx_get_card_size8(card, &card->size8); - if (st) - card->size8 = 0; - } - } - } else if (card->state == CARD_STATE_GOOD || - card->state == CARD_STATE_RD_ONLY_FAULT) { - st = rsxx_get_card_size8(card, &card->size8); - if (st) - card->size8 = 0; - } - - st = rsxx_attach_dev(card); - if (st) - goto failed_create_dev; - - /************* Setup Debugfs *************/ - rsxx_debugfs_dev_new(card); - - return 0; - -failed_create_dev: - destroy_workqueue(card->event_wq); - card->event_wq = NULL; -failed_event_handler: - rsxx_dma_destroy(card); -failed_dma_setup: -failed_compatiblity_check: - destroy_workqueue(card->creg_ctrl.creg_wq); - card->creg_ctrl.creg_wq = NULL; -failed_creg_setup: - spin_lock_irq(&card->irq_lock); - rsxx_disable_ier_and_isr(card, CR_INTR_ALL); - spin_unlock_irq(&card->irq_lock); - free_irq(dev->irq, card); - if (!force_legacy) - pci_disable_msi(dev); -failed_irq: - pci_iounmap(dev, card->regmap); -failed_iomap: - pci_release_regions(dev); -failed_request_regions: -failed_dma_mask: - pci_disable_device(dev); -failed_enable: - ida_free(&rsxx_disk_ida, card->disk_id); -failed_ida_get: - kfree(card); - - return st; -} - -static void rsxx_pci_remove(struct pci_dev *dev) -{ - struct rsxx_cardinfo *card = pci_get_drvdata(dev); - unsigned long flags; - int st; - int i; - - if (!card) - return; - - dev_info(CARD_TO_DEV(card), - "Removing PCI-Flash SSD.\n"); - - rsxx_detach_dev(card); - - for (i = 0; i < card->n_targets; i++) { - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); - spin_unlock_irqrestore(&card->irq_lock, flags); - } - - st = card_shutdown(card); - if (st) - dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n"); - - /* Sync outstanding event handlers. */ - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); - spin_unlock_irqrestore(&card->irq_lock, flags); - - cancel_work_sync(&card->event_work); - - destroy_workqueue(card->event_wq); - rsxx_destroy_dev(card); - rsxx_dma_destroy(card); - destroy_workqueue(card->creg_ctrl.creg_wq); - - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_disable_ier_and_isr(card, CR_INTR_ALL); - spin_unlock_irqrestore(&card->irq_lock, flags); - - /* Prevent work_structs from re-queuing themselves. */ - card->halt = 1; - - debugfs_remove_recursive(card->debugfs_dir); - - free_irq(dev->irq, card); - - if (!force_legacy) - pci_disable_msi(dev); - - rsxx_creg_destroy(card); - - pci_iounmap(dev, card->regmap); - - pci_disable_device(dev); - pci_release_regions(dev); - - ida_free(&rsxx_disk_ida, card->disk_id); - kfree(card); -} - -static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state) -{ - /* We don't support suspend at this time. */ - return -ENOSYS; -} - -static void rsxx_pci_shutdown(struct pci_dev *dev) -{ - struct rsxx_cardinfo *card = pci_get_drvdata(dev); - unsigned long flags; - int i; - - if (!card) - return; - - dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n"); - - rsxx_detach_dev(card); - - for (i = 0; i < card->n_targets; i++) { - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); - spin_unlock_irqrestore(&card->irq_lock, flags); - } - - card_shutdown(card); -} - -static const struct pci_error_handlers rsxx_err_handler = { - .error_detected = rsxx_error_detected, - .slot_reset = rsxx_slot_reset, -}; - -static const struct pci_device_id rsxx_pci_ids[] = { - {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)}, - {0,}, -}; - -MODULE_DEVICE_TABLE(pci, rsxx_pci_ids); - -static struct pci_driver rsxx_pci_driver = { - .name = DRIVER_NAME, - .id_table = rsxx_pci_ids, - .probe = rsxx_pci_probe, - .remove = rsxx_pci_remove, - .suspend = rsxx_pci_suspend, - .shutdown = rsxx_pci_shutdown, - .err_handler = &rsxx_err_handler, -}; - -static int __init rsxx_core_init(void) -{ - int st; - - st = rsxx_dev_init(); - if (st) - return st; - - st = rsxx_dma_init(); - if (st) - goto dma_init_failed; - - st = rsxx_creg_init(); - if (st) - goto creg_init_failed; - - return pci_register_driver(&rsxx_pci_driver); - -creg_init_failed: - rsxx_dma_cleanup(); -dma_init_failed: - rsxx_dev_cleanup(); - - return st; -} - -static void __exit rsxx_core_cleanup(void) -{ - pci_unregister_driver(&rsxx_pci_driver); - rsxx_creg_cleanup(); - rsxx_dma_cleanup(); - rsxx_dev_cleanup(); -} - -module_init(rsxx_core_init); -module_exit(rsxx_core_cleanup); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c deleted file mode 100644 index 60ecd3f7cbd2..000000000000 --- a/drivers/block/rsxx/cregs.c +++ /dev/null @@ -1,789 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* -* Filename: cregs.c -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#include <linux/completion.h> -#include <linux/slab.h> - -#include "rsxx_priv.h" - -#define CREG_TIMEOUT_MSEC 10000 - -typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card, - struct creg_cmd *cmd, - int st); - -struct creg_cmd { - struct list_head list; - creg_cmd_cb cb; - void *cb_private; - unsigned int op; - unsigned int addr; - int cnt8; - void *buf; - unsigned int stream; - unsigned int status; -}; - -static struct kmem_cache *creg_cmd_pool; - - -/*------------ Private Functions --------------*/ - -#if defined(__LITTLE_ENDIAN) -#define LITTLE_ENDIAN 1 -#elif defined(__BIG_ENDIAN) -#define LITTLE_ENDIAN 0 -#else -#error Unknown endianess!!! Aborting... -#endif - -static int copy_to_creg_data(struct rsxx_cardinfo *card, - int cnt8, - void *buf, - unsigned int stream) -{ - int i = 0; - u32 *data = buf; - - if (unlikely(card->eeh_state)) - return -EIO; - - for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { - /* - * Firmware implementation makes it necessary to byte swap on - * little endian processors. - */ - if (LITTLE_ENDIAN && stream) - iowrite32be(data[i], card->regmap + CREG_DATA(i)); - else - iowrite32(data[i], card->regmap + CREG_DATA(i)); - } - - return 0; -} - - -static int copy_from_creg_data(struct rsxx_cardinfo *card, - int cnt8, - void *buf, - unsigned int stream) -{ - int i = 0; - u32 *data = buf; - - if (unlikely(card->eeh_state)) - return -EIO; - - for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { - /* - * Firmware implementation makes it necessary to byte swap on - * little endian processors. - */ - if (LITTLE_ENDIAN && stream) - data[i] = ioread32be(card->regmap + CREG_DATA(i)); - else - data[i] = ioread32(card->regmap + CREG_DATA(i)); - } - - return 0; -} - -static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) -{ - int st; - - if (unlikely(card->eeh_state)) - return; - - iowrite32(cmd->addr, card->regmap + CREG_ADD); - iowrite32(cmd->cnt8, card->regmap + CREG_CNT); - - if (cmd->op == CREG_OP_WRITE) { - if (cmd->buf) { - st = copy_to_creg_data(card, cmd->cnt8, - cmd->buf, cmd->stream); - if (st) - return; - } - } - - if (unlikely(card->eeh_state)) - return; - - /* Setting the valid bit will kick off the command. */ - iowrite32(cmd->op, card->regmap + CREG_CMD); -} - -static void creg_kick_queue(struct rsxx_cardinfo *card) -{ - if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue)) - return; - - card->creg_ctrl.active = 1; - card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue, - struct creg_cmd, list); - list_del(&card->creg_ctrl.active_cmd->list); - card->creg_ctrl.q_depth--; - - /* - * We have to set the timer before we push the new command. Otherwise, - * we could create a race condition that would occur if the timer - * was not canceled, and expired after the new command was pushed, - * but before the command was issued to hardware. - */ - mod_timer(&card->creg_ctrl.cmd_timer, - jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC)); - - creg_issue_cmd(card, card->creg_ctrl.active_cmd); -} - -static int creg_queue_cmd(struct rsxx_cardinfo *card, - unsigned int op, - unsigned int addr, - unsigned int cnt8, - void *buf, - int stream, - creg_cmd_cb callback, - void *cb_private) -{ - struct creg_cmd *cmd; - - /* Don't queue stuff up if we're halted. */ - if (unlikely(card->halt)) - return -EINVAL; - - if (card->creg_ctrl.reset) - return -EAGAIN; - - if (cnt8 > MAX_CREG_DATA8) - return -EINVAL; - - cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - INIT_LIST_HEAD(&cmd->list); - - cmd->op = op; - cmd->addr = addr; - cmd->cnt8 = cnt8; - cmd->buf = buf; - cmd->stream = stream; - cmd->cb = callback; - cmd->cb_private = cb_private; - cmd->status = 0; - - spin_lock_bh(&card->creg_ctrl.lock); - list_add_tail(&cmd->list, &card->creg_ctrl.queue); - card->creg_ctrl.q_depth++; - creg_kick_queue(card); - spin_unlock_bh(&card->creg_ctrl.lock); - - return 0; -} - -static void creg_cmd_timed_out(struct timer_list *t) -{ - struct rsxx_cardinfo *card = from_timer(card, t, creg_ctrl.cmd_timer); - struct creg_cmd *cmd; - - spin_lock(&card->creg_ctrl.lock); - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - spin_unlock(&card->creg_ctrl.lock); - - if (cmd == NULL) { - card->creg_ctrl.creg_stats.creg_timeout++; - dev_warn(CARD_TO_DEV(card), - "No active command associated with timeout!\n"); - return; - } - - if (cmd->cb) - cmd->cb(card, cmd, -ETIMEDOUT); - - kmem_cache_free(creg_cmd_pool, cmd); - - - spin_lock(&card->creg_ctrl.lock); - card->creg_ctrl.active = 0; - creg_kick_queue(card); - spin_unlock(&card->creg_ctrl.lock); -} - - -static void creg_cmd_done(struct work_struct *work) -{ - struct rsxx_cardinfo *card; - struct creg_cmd *cmd; - int st = 0; - - card = container_of(work, struct rsxx_cardinfo, - creg_ctrl.done_work); - - /* - * The timer could not be cancelled for some reason, - * race to pop the active command. - */ - if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) - card->creg_ctrl.creg_stats.failed_cancel_timer++; - - spin_lock_bh(&card->creg_ctrl.lock); - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - spin_unlock_bh(&card->creg_ctrl.lock); - - if (cmd == NULL) { - dev_err(CARD_TO_DEV(card), - "Spurious creg interrupt!\n"); - return; - } - - card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT); - cmd->status = card->creg_ctrl.creg_stats.stat; - if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) { - dev_err(CARD_TO_DEV(card), - "Invalid status on creg command\n"); - /* - * At this point we're probably reading garbage from HW. Don't - * do anything else that could mess up the system and let - * the sync function return an error. - */ - st = -EIO; - goto creg_done; - } else if (cmd->status & CREG_STAT_ERROR) { - st = -EIO; - } - - if (cmd->op == CREG_OP_READ) { - unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); - - /* Paranoid Sanity Checks */ - if (!cmd->buf) { - dev_err(CARD_TO_DEV(card), - "Buffer not given for read.\n"); - st = -EIO; - goto creg_done; - } - if (cnt8 != cmd->cnt8) { - dev_err(CARD_TO_DEV(card), - "count mismatch\n"); - st = -EIO; - goto creg_done; - } - - st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); - } - -creg_done: - if (cmd->cb) - cmd->cb(card, cmd, st); - - kmem_cache_free(creg_cmd_pool, cmd); - - spin_lock_bh(&card->creg_ctrl.lock); - card->creg_ctrl.active = 0; - creg_kick_queue(card); - spin_unlock_bh(&card->creg_ctrl.lock); -} - -static void creg_reset(struct rsxx_cardinfo *card) -{ - struct creg_cmd *cmd = NULL; - struct creg_cmd *tmp; - unsigned long flags; - - /* - * mutex_trylock is used here because if reset_lock is taken then a - * reset is already happening. So, we can just go ahead and return. - */ - if (!mutex_trylock(&card->creg_ctrl.reset_lock)) - return; - - card->creg_ctrl.reset = 1; - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); - spin_unlock_irqrestore(&card->irq_lock, flags); - - dev_warn(CARD_TO_DEV(card), - "Resetting creg interface for recovery\n"); - - /* Cancel outstanding commands */ - spin_lock_bh(&card->creg_ctrl.lock); - list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { - list_del(&cmd->list); - card->creg_ctrl.q_depth--; - if (cmd->cb) - cmd->cb(card, cmd, -ECANCELED); - kmem_cache_free(creg_cmd_pool, cmd); - } - - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - if (cmd) { - if (timer_pending(&card->creg_ctrl.cmd_timer)) - del_timer_sync(&card->creg_ctrl.cmd_timer); - - if (cmd->cb) - cmd->cb(card, cmd, -ECANCELED); - kmem_cache_free(creg_cmd_pool, cmd); - - card->creg_ctrl.active = 0; - } - spin_unlock_bh(&card->creg_ctrl.lock); - - card->creg_ctrl.reset = 0; - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); - spin_unlock_irqrestore(&card->irq_lock, flags); - - mutex_unlock(&card->creg_ctrl.reset_lock); -} - -/* Used for synchronous accesses */ -struct creg_completion { - struct completion *cmd_done; - int st; - u32 creg_status; -}; - -static void creg_cmd_done_cb(struct rsxx_cardinfo *card, - struct creg_cmd *cmd, - int st) -{ - struct creg_completion *cmd_completion; - - cmd_completion = cmd->cb_private; - BUG_ON(!cmd_completion); - - cmd_completion->st = st; - cmd_completion->creg_status = cmd->status; - complete(cmd_completion->cmd_done); -} - -static int __issue_creg_rw(struct rsxx_cardinfo *card, - unsigned int op, - unsigned int addr, - unsigned int cnt8, - void *buf, - int stream, - unsigned int *hw_stat) -{ - DECLARE_COMPLETION_ONSTACK(cmd_done); - struct creg_completion completion; - unsigned long timeout; - int st; - - completion.cmd_done = &cmd_done; - completion.st = 0; - completion.creg_status = 0; - - st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb, - &completion); - if (st) - return st; - - /* - * This timeout is necessary for unresponsive hardware. The additional - * 20 seconds to used to guarantee that each cregs requests has time to - * complete. - */ - timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC * - card->creg_ctrl.q_depth + 20000); - - /* - * The creg interface is guaranteed to complete. It has a timeout - * mechanism that will kick in if hardware does not respond. - */ - st = wait_for_completion_timeout(completion.cmd_done, timeout); - if (st == 0) { - /* - * This is really bad, because the kernel timer did not - * expire and notify us of a timeout! - */ - dev_crit(CARD_TO_DEV(card), - "cregs timer failed\n"); - creg_reset(card); - return -EIO; - } - - *hw_stat = completion.creg_status; - - if (completion.st) { - /* - * This read is needed to verify that there has not been any - * extreme errors that might have occurred, i.e. EEH. The - * function iowrite32 will not detect EEH errors, so it is - * necessary that we recover if such an error is the reason - * for the timeout. This is a dummy read. - */ - ioread32(card->regmap + SCRATCH); - - dev_warn(CARD_TO_DEV(card), - "creg command failed(%d x%08x)\n", - completion.st, addr); - return completion.st; - } - - return 0; -} - -static int issue_creg_rw(struct rsxx_cardinfo *card, - u32 addr, - unsigned int size8, - void *data, - int stream, - int read) -{ - unsigned int hw_stat; - unsigned int xfer; - unsigned int op; - int st; - - op = read ? CREG_OP_READ : CREG_OP_WRITE; - - do { - xfer = min_t(unsigned int, size8, MAX_CREG_DATA8); - - st = __issue_creg_rw(card, op, addr, xfer, - data, stream, &hw_stat); - if (st) - return st; - - data = (char *)data + xfer; - addr += xfer; - size8 -= xfer; - } while (size8); - - return 0; -} - -/* ---------------------------- Public API ---------------------------------- */ -int rsxx_creg_write(struct rsxx_cardinfo *card, - u32 addr, - unsigned int size8, - void *data, - int byte_stream) -{ - return issue_creg_rw(card, addr, size8, data, byte_stream, 0); -} - -int rsxx_creg_read(struct rsxx_cardinfo *card, - u32 addr, - unsigned int size8, - void *data, - int byte_stream) -{ - return issue_creg_rw(card, addr, size8, data, byte_stream, 1); -} - -int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state) -{ - return rsxx_creg_read(card, CREG_ADD_CARD_STATE, - sizeof(*state), state, 0); -} - -int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8) -{ - unsigned int size; - int st; - - st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE, - sizeof(size), &size, 0); - if (st) - return st; - - *size8 = (u64)size * RSXX_HW_BLK_SIZE; - return 0; -} - -int rsxx_get_num_targets(struct rsxx_cardinfo *card, - unsigned int *n_targets) -{ - return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS, - sizeof(*n_targets), n_targets, 0); -} - -int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, - u32 *capabilities) -{ - return rsxx_creg_read(card, CREG_ADD_CAPABILITIES, - sizeof(*capabilities), capabilities, 0); -} - -int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd) -{ - return rsxx_creg_write(card, CREG_ADD_CARD_CMD, - sizeof(cmd), &cmd, 0); -} - - -/*----------------- HW Log Functions -------------------*/ -static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len) -{ - static char level; - - /* - * New messages start with "<#>", where # is the log level. Messages - * that extend past the log buffer will use the previous level - */ - if ((len > 3) && (str[0] == '<') && (str[2] == '>')) { - level = str[1]; - str += 3; /* Skip past the log level. */ - len -= 3; - } - - switch (level) { - case '0': - dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '1': - dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '2': - dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '3': - dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '4': - dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '5': - dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '6': - dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - case '7': - dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - default: - dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); - break; - } -} - -/* - * The substrncpy function copies the src string (which includes the - * terminating '\0' character), up to the count into the dest pointer. - * Returns the number of bytes copied to dest. - */ -static int substrncpy(char *dest, const char *src, int count) -{ - int max_cnt = count; - - while (count) { - count--; - *dest = *src; - if (*dest == '\0') - break; - src++; - dest++; - } - return max_cnt - count; -} - - -static void read_hw_log_done(struct rsxx_cardinfo *card, - struct creg_cmd *cmd, - int st) -{ - char *buf; - char *log_str; - int cnt; - int len; - int off; - - buf = cmd->buf; - off = 0; - - /* Failed getting the log message */ - if (st) - return; - - while (off < cmd->cnt8) { - log_str = &card->log.buf[card->log.buf_len]; - cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len); - len = substrncpy(log_str, &buf[off], cnt); - - off += len; - card->log.buf_len += len; - - /* - * Flush the log if we've hit the end of a message or if we've - * run out of buffer space. - */ - if ((log_str[len - 1] == '\0') || - (card->log.buf_len == LOG_BUF_SIZE8)) { - if (card->log.buf_len != 1) /* Don't log blank lines. */ - hw_log_msg(card, card->log.buf, - card->log.buf_len); - card->log.buf_len = 0; - } - - } - - if (cmd->status & CREG_STAT_LOG_PENDING) - rsxx_read_hw_log(card); -} - -int rsxx_read_hw_log(struct rsxx_cardinfo *card) -{ - int st; - - st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG, - sizeof(card->log.tmp), card->log.tmp, - 1, read_hw_log_done, NULL); - if (st) - dev_err(CARD_TO_DEV(card), - "Failed getting log text\n"); - - return st; -} - -/*-------------- IOCTL REG Access ------------------*/ -static int issue_reg_cmd(struct rsxx_cardinfo *card, - struct rsxx_reg_access *cmd, - int read) -{ - unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE; - - return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data, - cmd->stream, &cmd->stat); -} - -int rsxx_reg_access(struct rsxx_cardinfo *card, - struct rsxx_reg_access __user *ucmd, - int read) -{ - struct rsxx_reg_access cmd; - int st; - - st = copy_from_user(&cmd, ucmd, sizeof(cmd)); - if (st) - return -EFAULT; - - if (cmd.cnt > RSXX_MAX_REG_CNT) - return -EFAULT; - - st = issue_reg_cmd(card, &cmd, read); - if (st) - return st; - - st = put_user(cmd.stat, &ucmd->stat); - if (st) - return -EFAULT; - - if (read) { - st = copy_to_user(ucmd->data, cmd.data, cmd.cnt); - if (st) - return -EFAULT; - } - - return 0; -} - -void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card) -{ - struct creg_cmd *cmd = NULL; - - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - - if (cmd) { - del_timer_sync(&card->creg_ctrl.cmd_timer); - - spin_lock_bh(&card->creg_ctrl.lock); - list_add(&cmd->list, &card->creg_ctrl.queue); - card->creg_ctrl.q_depth++; - card->creg_ctrl.active = 0; - spin_unlock_bh(&card->creg_ctrl.lock); - } -} - -void rsxx_kick_creg_queue(struct rsxx_cardinfo *card) -{ - spin_lock_bh(&card->creg_ctrl.lock); - if (!list_empty(&card->creg_ctrl.queue)) - creg_kick_queue(card); - spin_unlock_bh(&card->creg_ctrl.lock); -} - -/*------------ Initialization & Setup --------------*/ -int rsxx_creg_setup(struct rsxx_cardinfo *card) -{ - card->creg_ctrl.active_cmd = NULL; - - card->creg_ctrl.creg_wq = - create_singlethread_workqueue(DRIVER_NAME"_creg"); - if (!card->creg_ctrl.creg_wq) - return -ENOMEM; - - INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); - mutex_init(&card->creg_ctrl.reset_lock); - INIT_LIST_HEAD(&card->creg_ctrl.queue); - spin_lock_init(&card->creg_ctrl.lock); - timer_setup(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, 0); - - return 0; -} - -void rsxx_creg_destroy(struct rsxx_cardinfo *card) -{ - struct creg_cmd *cmd; - struct creg_cmd *tmp; - int cnt = 0; - - /* Cancel outstanding commands */ - spin_lock_bh(&card->creg_ctrl.lock); - list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { - list_del(&cmd->list); - if (cmd->cb) - cmd->cb(card, cmd, -ECANCELED); - kmem_cache_free(creg_cmd_pool, cmd); - cnt++; - } - - if (cnt) - dev_info(CARD_TO_DEV(card), - "Canceled %d queue creg commands\n", cnt); - - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - if (cmd) { - if (timer_pending(&card->creg_ctrl.cmd_timer)) - del_timer_sync(&card->creg_ctrl.cmd_timer); - - if (cmd->cb) - cmd->cb(card, cmd, -ECANCELED); - dev_info(CARD_TO_DEV(card), - "Canceled active creg command\n"); - kmem_cache_free(creg_cmd_pool, cmd); - } - spin_unlock_bh(&card->creg_ctrl.lock); - - cancel_work_sync(&card->creg_ctrl.done_work); -} - - -int rsxx_creg_init(void) -{ - creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN); - if (!creg_cmd_pool) - return -ENOMEM; - - return 0; -} - -void rsxx_creg_cleanup(void) -{ - kmem_cache_destroy(creg_cmd_pool); -} diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c deleted file mode 100644 index dd33f1bdf3b8..000000000000 --- a/drivers/block/rsxx/dev.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* -* Filename: dev.c -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/slab.h> - -#include <linux/hdreg.h> -#include <linux/genhd.h> -#include <linux/blkdev.h> -#include <linux/bio.h> - -#include <linux/fs.h> - -#include "rsxx_priv.h" - -static unsigned int blkdev_minors = 64; -module_param(blkdev_minors, uint, 0444); -MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)"); - -/* - * For now I'm making this tweakable in case any applications hit this limit. - * If you see a "bio too big" error in the log you will need to raise this - * value. - */ -static unsigned int blkdev_max_hw_sectors = 1024; -module_param(blkdev_max_hw_sectors, uint, 0444); -MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO"); - -static unsigned int enable_blkdev = 1; -module_param(enable_blkdev , uint, 0444); -MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces"); - - -struct rsxx_bio_meta { - struct bio *bio; - atomic_t pending_dmas; - atomic_t error; - unsigned long start_time; -}; - -static struct kmem_cache *bio_meta_pool; - -static void rsxx_submit_bio(struct bio *bio); - -/*----------------- Block Device Operations -----------------*/ -static int rsxx_blkdev_ioctl(struct block_device *bdev, - fmode_t mode, - unsigned int cmd, - unsigned long arg) -{ - struct rsxx_cardinfo *card = bdev->bd_disk->private_data; - - switch (cmd) { - case RSXX_GETREG: - return rsxx_reg_access(card, (void __user *)arg, 1); - case RSXX_SETREG: - return rsxx_reg_access(card, (void __user *)arg, 0); - } - - return -ENOTTY; -} - -static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct rsxx_cardinfo *card = bdev->bd_disk->private_data; - u64 blocks = card->size8 >> 9; - - /* - * get geometry: Fake it. I haven't found any drivers that set - * geo->start, so we won't either. - */ - if (card->size8) { - geo->heads = 64; - geo->sectors = 16; - do_div(blocks, (geo->heads * geo->sectors)); - geo->cylinders = blocks; - } else { - geo->heads = 0; - geo->sectors = 0; - geo->cylinders = 0; - } - return 0; -} - -static const struct block_device_operations rsxx_fops = { - .owner = THIS_MODULE, - .submit_bio = rsxx_submit_bio, - .getgeo = rsxx_getgeo, - .ioctl = rsxx_blkdev_ioctl, -}; - -static void bio_dma_done_cb(struct rsxx_cardinfo *card, - void *cb_data, - unsigned int error) -{ - struct rsxx_bio_meta *meta = cb_data; - - if (error) - atomic_set(&meta->error, 1); - - if (atomic_dec_and_test(&meta->pending_dmas)) { - if (!card->eeh_state && card->gendisk) - bio_end_io_acct(meta->bio, meta->start_time); - - if (atomic_read(&meta->error)) - bio_io_error(meta->bio); - else - bio_endio(meta->bio); - kmem_cache_free(bio_meta_pool, meta); - } -} - -static void rsxx_submit_bio(struct bio *bio) -{ - struct rsxx_cardinfo *card = bio->bi_bdev->bd_disk->private_data; - struct rsxx_bio_meta *bio_meta; - blk_status_t st = BLK_STS_IOERR; - - blk_queue_split(&bio); - - might_sleep(); - - if (!card) - goto req_err; - - if (bio_end_sector(bio) > get_capacity(card->gendisk)) - goto req_err; - - if (unlikely(card->halt)) - goto req_err; - - if (unlikely(card->dma_fault)) - goto req_err; - - if (bio->bi_iter.bi_size == 0) { - dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); - goto req_err; - } - - bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); - if (!bio_meta) { - st = BLK_STS_RESOURCE; - goto req_err; - } - - bio_meta->bio = bio; - atomic_set(&bio_meta->error, 0); - atomic_set(&bio_meta->pending_dmas, 0); - - if (!unlikely(card->halt)) - bio_meta->start_time = bio_start_io_acct(bio); - - dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", - bio_data_dir(bio) ? 'W' : 'R', bio_meta, - (u64)bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size); - - st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, - bio_dma_done_cb, bio_meta); - if (st) - goto queue_err; - - return; - -queue_err: - kmem_cache_free(bio_meta_pool, bio_meta); -req_err: - if (st) - bio->bi_status = st; - bio_endio(bio); -} - -/*----------------- Device Setup -------------------*/ -static bool rsxx_discard_supported(struct rsxx_cardinfo *card) -{ - unsigned char pci_rev; - - pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); - - return (pci_rev >= RSXX_DISCARD_SUPPORT); -} - -int rsxx_attach_dev(struct rsxx_cardinfo *card) -{ - int err = 0; - - mutex_lock(&card->dev_lock); - - /* The block device requires the stripe size from the config. */ - if (enable_blkdev) { - if (card->config_valid) - set_capacity(card->gendisk, card->size8 >> 9); - else - set_capacity(card->gendisk, 0); - err = device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); - if (err == 0) - card->bdev_attached = 1; - } - - mutex_unlock(&card->dev_lock); - - if (err) - blk_cleanup_disk(card->gendisk); - - return err; -} - -void rsxx_detach_dev(struct rsxx_cardinfo *card) -{ - mutex_lock(&card->dev_lock); - - if (card->bdev_attached) { - del_gendisk(card->gendisk); - card->bdev_attached = 0; - } - - mutex_unlock(&card->dev_lock); -} - -int rsxx_setup_dev(struct rsxx_cardinfo *card) -{ - unsigned short blk_size; - - mutex_init(&card->dev_lock); - - if (!enable_blkdev) - return 0; - - card->major = register_blkdev(0, DRIVER_NAME); - if (card->major < 0) { - dev_err(CARD_TO_DEV(card), "Failed to get major number\n"); - return -ENOMEM; - } - - card->gendisk = blk_alloc_disk(blkdev_minors); - if (!card->gendisk) { - dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); - unregister_blkdev(card->major, DRIVER_NAME); - return -ENOMEM; - } - - if (card->config_valid) { - blk_size = card->config.data.block_size; - blk_queue_dma_alignment(card->gendisk->queue, blk_size - 1); - blk_queue_logical_block_size(card->gendisk->queue, blk_size); - } - - blk_queue_max_hw_sectors(card->gendisk->queue, blkdev_max_hw_sectors); - blk_queue_physical_block_size(card->gendisk->queue, RSXX_HW_BLK_SIZE); - - blk_queue_flag_set(QUEUE_FLAG_NONROT, card->gendisk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->gendisk->queue); - if (rsxx_discard_supported(card)) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->gendisk->queue); - blk_queue_max_discard_sectors(card->gendisk->queue, - RSXX_HW_BLK_SIZE >> 9); - card->gendisk->queue->limits.discard_granularity = - RSXX_HW_BLK_SIZE; - card->gendisk->queue->limits.discard_alignment = - RSXX_HW_BLK_SIZE; - } - - snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), - "rsxx%d", card->disk_id); - card->gendisk->major = card->major; - card->gendisk->minors = blkdev_minors; - card->gendisk->fops = &rsxx_fops; - card->gendisk->private_data = card; - - return 0; -} - -void rsxx_destroy_dev(struct rsxx_cardinfo *card) -{ - if (!enable_blkdev) - return; - - blk_cleanup_disk(card->gendisk); - card->gendisk = NULL; - unregister_blkdev(card->major, DRIVER_NAME); -} - -int rsxx_dev_init(void) -{ - bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN); - if (!bio_meta_pool) - return -ENOMEM; - - return 0; -} - -void rsxx_dev_cleanup(void) -{ - kmem_cache_destroy(bio_meta_pool); -} - - diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c deleted file mode 100644 index ed182f3dd054..000000000000 --- a/drivers/block/rsxx/dma.c +++ /dev/null @@ -1,1085 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* -* Filename: dma.c -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#include <linux/slab.h> -#include "rsxx_priv.h" - -struct rsxx_dma { - struct list_head list; - u8 cmd; - unsigned int laddr; /* Logical address */ - struct { - u32 off; - u32 cnt; - } sub_page; - dma_addr_t dma_addr; - struct page *page; - unsigned int pg_off; /* Page Offset */ - rsxx_dma_cb cb; - void *cb_data; -}; - -/* This timeout is used to detect a stalled DMA channel */ -#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000) - -struct hw_status { - u8 status; - u8 tag; - __le16 count; - __le32 _rsvd2; - __le64 _rsvd3; -} __packed; - -enum rsxx_dma_status { - DMA_SW_ERR = 0x1, - DMA_HW_FAULT = 0x2, - DMA_CANCELLED = 0x4, -}; - -struct hw_cmd { - u8 command; - u8 tag; - u8 _rsvd; - u8 sub_page; /* Bit[0:2]: 512byte offset */ - /* Bit[4:6]: 512byte count */ - __le32 device_addr; - __le64 host_addr; -} __packed; - -enum rsxx_hw_cmd { - HW_CMD_BLK_DISCARD = 0x70, - HW_CMD_BLK_WRITE = 0x80, - HW_CMD_BLK_READ = 0xC0, - HW_CMD_BLK_RECON_READ = 0xE0, -}; - -enum rsxx_hw_status { - HW_STATUS_CRC = 0x01, - HW_STATUS_HARD_ERR = 0x02, - HW_STATUS_SOFT_ERR = 0x04, - HW_STATUS_FAULT = 0x08, -}; - -static struct kmem_cache *rsxx_dma_pool; - -struct dma_tracker { - int next_tag; - struct rsxx_dma *dma; -}; - -struct dma_tracker_list { - spinlock_t lock; - int head; - struct dma_tracker list[]; -}; - - -/*----------------- Misc Utility Functions -------------------*/ -static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card) -{ - unsigned long long tgt_addr8; - - tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) & - card->_stripe.upper_mask) | - ((addr8) & card->_stripe.lower_mask); - do_div(tgt_addr8, RSXX_HW_BLK_SIZE); - return tgt_addr8; -} - -static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) -{ - unsigned int tgt; - - tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask; - - return tgt; -} - -void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) -{ - /* Reset all DMA Command/Status Queues */ - iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); -} - -static unsigned int get_dma_size(struct rsxx_dma *dma) -{ - if (dma->sub_page.cnt) - return dma->sub_page.cnt << 9; - else - return RSXX_HW_BLK_SIZE; -} - - -/*----------------- DMA Tracker -------------------*/ -static void set_tracker_dma(struct dma_tracker_list *trackers, - int tag, - struct rsxx_dma *dma) -{ - trackers->list[tag].dma = dma; -} - -static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers, - int tag) -{ - return trackers->list[tag].dma; -} - -static int pop_tracker(struct dma_tracker_list *trackers) -{ - int tag; - - spin_lock(&trackers->lock); - tag = trackers->head; - if (tag != -1) { - trackers->head = trackers->list[tag].next_tag; - trackers->list[tag].next_tag = -1; - } - spin_unlock(&trackers->lock); - - return tag; -} - -static void push_tracker(struct dma_tracker_list *trackers, int tag) -{ - spin_lock(&trackers->lock); - trackers->list[tag].next_tag = trackers->head; - trackers->head = tag; - trackers->list[tag].dma = NULL; - spin_unlock(&trackers->lock); -} - - -/*----------------- Interrupt Coalescing -------------*/ -/* - * Interrupt Coalescing Register Format: - * Interrupt Timer (64ns units) [15:0] - * Interrupt Count [24:16] - * Reserved [31:25] -*/ -#define INTR_COAL_LATENCY_MASK (0x0000ffff) - -#define INTR_COAL_COUNT_SHIFT 16 -#define INTR_COAL_COUNT_BITS 9 -#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \ - INTR_COAL_COUNT_SHIFT) -#define INTR_COAL_LATENCY_UNITS_NS 64 - - -static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency) -{ - u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS; - - if (mode == RSXX_INTR_COAL_DISABLED) - return 0; - - return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) | - (latency_units & INTR_COAL_LATENCY_MASK); - -} - -static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) -{ - int i; - u32 q_depth = 0; - u32 intr_coal; - - if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE || - unlikely(card->eeh_state)) - return; - - for (i = 0; i < card->n_targets; i++) - q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth); - - intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, - q_depth / 2, - card->config.data.intr_coal.latency); - iowrite32(intr_coal, card->regmap + INTR_COAL); -} - -/*----------------- RSXX DMA Handling -------------------*/ -static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma) -{ - if (dma->cmd != HW_CMD_BLK_DISCARD) { - if (!dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) { - dma_unmap_page(&ctrl->card->dev->dev, dma->dma_addr, - get_dma_size(dma), - dma->cmd == HW_CMD_BLK_WRITE ? - DMA_TO_DEVICE : - DMA_FROM_DEVICE); - } - } - - kmem_cache_free(rsxx_dma_pool, dma); -} - -static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, - struct rsxx_dma *dma, - unsigned int status) -{ - if (status & DMA_SW_ERR) - ctrl->stats.dma_sw_err++; - if (status & DMA_HW_FAULT) - ctrl->stats.dma_hw_fault++; - if (status & DMA_CANCELLED) - ctrl->stats.dma_cancelled++; - - if (dma->cb) - dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0); - - rsxx_free_dma(ctrl, dma); -} - -int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, - struct list_head *q, unsigned int done) -{ - struct rsxx_dma *dma; - struct rsxx_dma *tmp; - int cnt = 0; - - list_for_each_entry_safe(dma, tmp, q, list) { - list_del(&dma->list); - if (done & COMPLETE_DMA) - rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); - else - rsxx_free_dma(ctrl, dma); - cnt++; - } - - return cnt; -} - -static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, - struct rsxx_dma *dma) -{ - /* - * Requeued DMAs go to the front of the queue so they are issued - * first. - */ - spin_lock_bh(&ctrl->queue_lock); - ctrl->stats.sw_q_depth++; - list_add(&dma->list, &ctrl->queue); - spin_unlock_bh(&ctrl->queue_lock); -} - -static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, - struct rsxx_dma *dma, - u8 hw_st) -{ - unsigned int status = 0; - int requeue_cmd = 0; - - dev_dbg(CARD_TO_DEV(ctrl->card), - "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n", - dma->cmd, dma->laddr, hw_st); - - if (hw_st & HW_STATUS_CRC) - ctrl->stats.crc_errors++; - if (hw_st & HW_STATUS_HARD_ERR) - ctrl->stats.hard_errors++; - if (hw_st & HW_STATUS_SOFT_ERR) - ctrl->stats.soft_errors++; - - switch (dma->cmd) { - case HW_CMD_BLK_READ: - if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { - if (ctrl->card->scrub_hard) { - dma->cmd = HW_CMD_BLK_RECON_READ; - requeue_cmd = 1; - ctrl->stats.reads_retried++; - } else { - status |= DMA_HW_FAULT; - ctrl->stats.reads_failed++; - } - } else if (hw_st & HW_STATUS_FAULT) { - status |= DMA_HW_FAULT; - ctrl->stats.reads_failed++; - } - - break; - case HW_CMD_BLK_RECON_READ: - if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { - /* Data could not be reconstructed. */ - status |= DMA_HW_FAULT; - ctrl->stats.reads_failed++; - } - - break; - case HW_CMD_BLK_WRITE: - status |= DMA_HW_FAULT; - ctrl->stats.writes_failed++; - - break; - case HW_CMD_BLK_DISCARD: - status |= DMA_HW_FAULT; - ctrl->stats.discards_failed++; - - break; - default: - dev_err(CARD_TO_DEV(ctrl->card), - "Unknown command in DMA!(cmd: x%02x " - "laddr x%08x st: x%02x\n", - dma->cmd, dma->laddr, hw_st); - status |= DMA_SW_ERR; - - break; - } - - if (requeue_cmd) - rsxx_requeue_dma(ctrl, dma); - else - rsxx_complete_dma(ctrl, dma, status); -} - -static void dma_engine_stalled(struct timer_list *t) -{ - struct rsxx_dma_ctrl *ctrl = from_timer(ctrl, t, activity_timer); - int cnt; - - if (atomic_read(&ctrl->stats.hw_q_depth) == 0 || - unlikely(ctrl->card->eeh_state)) - return; - - if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { - /* - * The dma engine was stalled because the SW_CMD_IDX write - * was lost. Issue it again to recover. - */ - dev_warn(CARD_TO_DEV(ctrl->card), - "SW_CMD_IDX write was lost, re-writing...\n"); - iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); - mod_timer(&ctrl->activity_timer, - jiffies + DMA_ACTIVITY_TIMEOUT); - } else { - dev_warn(CARD_TO_DEV(ctrl->card), - "DMA channel %d has stalled, faulting interface.\n", - ctrl->id); - ctrl->card->dma_fault = 1; - - /* Clean up the DMA queue */ - spin_lock(&ctrl->queue_lock); - cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA); - spin_unlock(&ctrl->queue_lock); - - cnt += rsxx_dma_cancel(ctrl); - - if (cnt) - dev_info(CARD_TO_DEV(ctrl->card), - "Freed %d queued DMAs on channel %d\n", - cnt, ctrl->id); - } -} - -static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) -{ - struct rsxx_dma *dma; - int tag; - int cmds_pending = 0; - struct hw_cmd *hw_cmd_buf; - int dir; - - hw_cmd_buf = ctrl->cmd.buf; - - if (unlikely(ctrl->card->halt) || - unlikely(ctrl->card->eeh_state)) - return; - - while (1) { - spin_lock_bh(&ctrl->queue_lock); - if (list_empty(&ctrl->queue)) { - spin_unlock_bh(&ctrl->queue_lock); - break; - } - spin_unlock_bh(&ctrl->queue_lock); - - tag = pop_tracker(ctrl->trackers); - if (tag == -1) - break; - - spin_lock_bh(&ctrl->queue_lock); - dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); - list_del(&dma->list); - ctrl->stats.sw_q_depth--; - spin_unlock_bh(&ctrl->queue_lock); - - /* - * This will catch any DMAs that slipped in right before the - * fault, but was queued after all the other DMAs were - * cancelled. - */ - if (unlikely(ctrl->card->dma_fault)) { - push_tracker(ctrl->trackers, tag); - rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); - continue; - } - - if (dma->cmd != HW_CMD_BLK_DISCARD) { - if (dma->cmd == HW_CMD_BLK_WRITE) - dir = DMA_TO_DEVICE; - else - dir = DMA_FROM_DEVICE; - - /* - * The function dma_map_page is placed here because we - * can only, by design, issue up to 255 commands to the - * hardware at one time per DMA channel. So the maximum - * amount of mapped memory would be 255 * 4 channels * - * 4096 Bytes which is less than 2GB, the limit of a x8 - * Non-HWWD PCIe slot. This way the dma_map_page - * function should never fail because of a lack of - * mappable memory. - */ - dma->dma_addr = dma_map_page(&ctrl->card->dev->dev, dma->page, - dma->pg_off, dma->sub_page.cnt << 9, dir); - if (dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) { - push_tracker(ctrl->trackers, tag); - rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); - continue; - } - } - - set_tracker_dma(ctrl->trackers, tag, dma); - hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; - hw_cmd_buf[ctrl->cmd.idx].tag = tag; - hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0; - hw_cmd_buf[ctrl->cmd.idx].sub_page = - ((dma->sub_page.cnt & 0x7) << 4) | - (dma->sub_page.off & 0x7); - - hw_cmd_buf[ctrl->cmd.idx].device_addr = - cpu_to_le32(dma->laddr); - - hw_cmd_buf[ctrl->cmd.idx].host_addr = - cpu_to_le64(dma->dma_addr); - - dev_dbg(CARD_TO_DEV(ctrl->card), - "Issue DMA%d(laddr %d tag %d) to idx %d\n", - ctrl->id, dma->laddr, tag, ctrl->cmd.idx); - - ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK; - cmds_pending++; - - if (dma->cmd == HW_CMD_BLK_WRITE) - ctrl->stats.writes_issued++; - else if (dma->cmd == HW_CMD_BLK_DISCARD) - ctrl->stats.discards_issued++; - else - ctrl->stats.reads_issued++; - } - - /* Let HW know we've queued commands. */ - if (cmds_pending) { - atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); - mod_timer(&ctrl->activity_timer, - jiffies + DMA_ACTIVITY_TIMEOUT); - - if (unlikely(ctrl->card->eeh_state)) { - del_timer_sync(&ctrl->activity_timer); - return; - } - - iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); - } -} - -static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl) -{ - struct rsxx_dma *dma; - unsigned long flags; - u16 count; - u8 status; - u8 tag; - struct hw_status *hw_st_buf; - - hw_st_buf = ctrl->status.buf; - - if (unlikely(ctrl->card->halt) || - unlikely(ctrl->card->dma_fault) || - unlikely(ctrl->card->eeh_state)) - return; - - count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); - - while (count == ctrl->e_cnt) { - /* - * The read memory-barrier is necessary to keep aggressive - * processors/optimizers (such as the PPC Apple G5) from - * reordering the following status-buffer tag & status read - * *before* the count read on subsequent iterations of the - * loop! - */ - rmb(); - - status = hw_st_buf[ctrl->status.idx].status; - tag = hw_st_buf[ctrl->status.idx].tag; - - dma = get_tracker_dma(ctrl->trackers, tag); - if (dma == NULL) { - spin_lock_irqsave(&ctrl->card->irq_lock, flags); - rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL); - spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); - - dev_err(CARD_TO_DEV(ctrl->card), - "No tracker for tag %d " - "(idx %d id %d)\n", - tag, ctrl->status.idx, ctrl->id); - return; - } - - dev_dbg(CARD_TO_DEV(ctrl->card), - "Completing DMA%d" - "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n", - ctrl->id, dma->laddr, tag, status, count, - ctrl->status.idx); - - atomic_dec(&ctrl->stats.hw_q_depth); - - mod_timer(&ctrl->activity_timer, - jiffies + DMA_ACTIVITY_TIMEOUT); - - if (status) - rsxx_handle_dma_error(ctrl, dma, status); - else - rsxx_complete_dma(ctrl, dma, 0); - - push_tracker(ctrl->trackers, tag); - - ctrl->status.idx = (ctrl->status.idx + 1) & - RSXX_CS_IDX_MASK; - ctrl->e_cnt++; - - count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); - } - - dma_intr_coal_auto_tune(ctrl->card); - - if (atomic_read(&ctrl->stats.hw_q_depth) == 0) - del_timer_sync(&ctrl->activity_timer); - - spin_lock_irqsave(&ctrl->card->irq_lock, flags); - rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); - spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); - - spin_lock_bh(&ctrl->queue_lock); - if (ctrl->stats.sw_q_depth) - queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); - spin_unlock_bh(&ctrl->queue_lock); -} - -static void rsxx_schedule_issue(struct work_struct *work) -{ - struct rsxx_dma_ctrl *ctrl; - - ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); - - mutex_lock(&ctrl->work_lock); - rsxx_issue_dmas(ctrl); - mutex_unlock(&ctrl->work_lock); -} - -static void rsxx_schedule_done(struct work_struct *work) -{ - struct rsxx_dma_ctrl *ctrl; - - ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); - - mutex_lock(&ctrl->work_lock); - rsxx_dma_done(ctrl); - mutex_unlock(&ctrl->work_lock); -} - -static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card, - struct list_head *q, - unsigned int laddr, - rsxx_dma_cb cb, - void *cb_data) -{ - struct rsxx_dma *dma; - - dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); - if (!dma) - return BLK_STS_RESOURCE; - - dma->cmd = HW_CMD_BLK_DISCARD; - dma->laddr = laddr; - dma->dma_addr = 0; - dma->sub_page.off = 0; - dma->sub_page.cnt = 0; - dma->page = NULL; - dma->pg_off = 0; - dma->cb = cb; - dma->cb_data = cb_data; - - dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr); - - list_add_tail(&dma->list, q); - - return 0; -} - -static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card, - struct list_head *q, - int dir, - unsigned int dma_off, - unsigned int dma_len, - unsigned int laddr, - struct page *page, - unsigned int pg_off, - rsxx_dma_cb cb, - void *cb_data) -{ - struct rsxx_dma *dma; - - dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); - if (!dma) - return BLK_STS_RESOURCE; - - dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; - dma->laddr = laddr; - dma->sub_page.off = (dma_off >> 9); - dma->sub_page.cnt = (dma_len >> 9); - dma->page = page; - dma->pg_off = pg_off; - dma->cb = cb; - dma->cb_data = cb_data; - - dev_dbg(CARD_TO_DEV(card), - "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n", - dir ? 'W' : 'R', dma->laddr, dma->sub_page.off, - dma->sub_page.cnt, dma->page, dma->pg_off); - - /* Queue the DMA */ - list_add_tail(&dma->list, q); - - return 0; -} - -blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card, - struct bio *bio, - atomic_t *n_dmas, - rsxx_dma_cb cb, - void *cb_data) -{ - struct list_head dma_list[RSXX_MAX_TARGETS]; - struct bio_vec bvec; - struct bvec_iter iter; - unsigned long long addr8; - unsigned int laddr; - unsigned int bv_len; - unsigned int bv_off; - unsigned int dma_off; - unsigned int dma_len; - int dma_cnt[RSXX_MAX_TARGETS]; - int tgt; - blk_status_t st; - int i; - - addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */ - atomic_set(n_dmas, 0); - - for (i = 0; i < card->n_targets; i++) { - INIT_LIST_HEAD(&dma_list[i]); - dma_cnt[i] = 0; - } - - if (bio_op(bio) == REQ_OP_DISCARD) { - bv_len = bio->bi_iter.bi_size; - - while (bv_len > 0) { - tgt = rsxx_get_dma_tgt(card, addr8); - laddr = rsxx_addr8_to_laddr(addr8, card); - - st = rsxx_queue_discard(card, &dma_list[tgt], laddr, - cb, cb_data); - if (st) - goto bvec_err; - - dma_cnt[tgt]++; - atomic_inc(n_dmas); - addr8 += RSXX_HW_BLK_SIZE; - bv_len -= RSXX_HW_BLK_SIZE; - } - } else { - bio_for_each_segment(bvec, bio, iter) { - bv_len = bvec.bv_len; - bv_off = bvec.bv_offset; - - while (bv_len > 0) { - tgt = rsxx_get_dma_tgt(card, addr8); - laddr = rsxx_addr8_to_laddr(addr8, card); - dma_off = addr8 & RSXX_HW_BLK_MASK; - dma_len = min(bv_len, - RSXX_HW_BLK_SIZE - dma_off); - - st = rsxx_queue_dma(card, &dma_list[tgt], - bio_data_dir(bio), - dma_off, dma_len, - laddr, bvec.bv_page, - bv_off, cb, cb_data); - if (st) - goto bvec_err; - - dma_cnt[tgt]++; - atomic_inc(n_dmas); - addr8 += dma_len; - bv_off += dma_len; - bv_len -= dma_len; - } - } - } - - for (i = 0; i < card->n_targets; i++) { - if (!list_empty(&dma_list[i])) { - spin_lock_bh(&card->ctrl[i].queue_lock); - card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; - list_splice_tail(&dma_list[i], &card->ctrl[i].queue); - spin_unlock_bh(&card->ctrl[i].queue_lock); - - queue_work(card->ctrl[i].issue_wq, - &card->ctrl[i].issue_dma_work); - } - } - - return 0; - -bvec_err: - for (i = 0; i < card->n_targets; i++) - rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i], - FREE_DMA); - return st; -} - - -/*----------------- DMA Engine Initialization & Setup -------------------*/ -int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) -{ - ctrl->status.buf = dma_alloc_coherent(&dev->dev, STATUS_BUFFER_SIZE8, - &ctrl->status.dma_addr, GFP_KERNEL); - ctrl->cmd.buf = dma_alloc_coherent(&dev->dev, COMMAND_BUFFER_SIZE8, - &ctrl->cmd.dma_addr, GFP_KERNEL); - if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) - return -ENOMEM; - - memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); - iowrite32(lower_32_bits(ctrl->status.dma_addr), - ctrl->regmap + SB_ADD_LO); - iowrite32(upper_32_bits(ctrl->status.dma_addr), - ctrl->regmap + SB_ADD_HI); - - memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); - iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); - iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); - - ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); - if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { - dev_crit(&dev->dev, "Failed reading status cnt x%x\n", - ctrl->status.idx); - return -EINVAL; - } - iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); - iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); - - ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); - if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { - dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", - ctrl->status.idx); - return -EINVAL; - } - iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); - iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); - - return 0; -} - -static int rsxx_dma_ctrl_init(struct pci_dev *dev, - struct rsxx_dma_ctrl *ctrl) -{ - int i; - int st; - - memset(&ctrl->stats, 0, sizeof(ctrl->stats)); - - ctrl->trackers = vmalloc(struct_size(ctrl->trackers, list, - RSXX_MAX_OUTSTANDING_CMDS)); - if (!ctrl->trackers) - return -ENOMEM; - - ctrl->trackers->head = 0; - for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { - ctrl->trackers->list[i].next_tag = i + 1; - ctrl->trackers->list[i].dma = NULL; - } - ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1; - spin_lock_init(&ctrl->trackers->lock); - - spin_lock_init(&ctrl->queue_lock); - mutex_init(&ctrl->work_lock); - INIT_LIST_HEAD(&ctrl->queue); - - timer_setup(&ctrl->activity_timer, dma_engine_stalled, 0); - - ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0); - if (!ctrl->issue_wq) - return -ENOMEM; - - ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0); - if (!ctrl->done_wq) - return -ENOMEM; - - INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue); - INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done); - - st = rsxx_hw_buffers_init(dev, ctrl); - if (st) - return st; - - return 0; -} - -static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, - unsigned int stripe_size8) -{ - if (!is_power_of_2(stripe_size8)) { - dev_err(CARD_TO_DEV(card), - "stripe_size is NOT a power of 2!\n"); - return -EINVAL; - } - - card->_stripe.lower_mask = stripe_size8 - 1; - - card->_stripe.upper_mask = ~(card->_stripe.lower_mask); - card->_stripe.upper_shift = ffs(card->n_targets) - 1; - - card->_stripe.target_mask = card->n_targets - 1; - card->_stripe.target_shift = ffs(stripe_size8) - 1; - - dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n", - card->_stripe.lower_mask); - dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n", - card->_stripe.upper_shift); - dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n", - card->_stripe.upper_mask); - dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n", - card->_stripe.target_mask); - dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n", - card->_stripe.target_shift); - - return 0; -} - -int rsxx_dma_configure(struct rsxx_cardinfo *card) -{ - u32 intr_coal; - - intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, - card->config.data.intr_coal.count, - card->config.data.intr_coal.latency); - iowrite32(intr_coal, card->regmap + INTR_COAL); - - return rsxx_dma_stripe_setup(card, card->config.data.stripe_size); -} - -int rsxx_dma_setup(struct rsxx_cardinfo *card) -{ - unsigned long flags; - int st; - int i; - - dev_info(CARD_TO_DEV(card), - "Initializing %d DMA targets\n", - card->n_targets); - - /* Regmap is divided up into 4K chunks. One for each DMA channel */ - for (i = 0; i < card->n_targets; i++) - card->ctrl[i].regmap = card->regmap + (i * 4096); - - card->dma_fault = 0; - - /* Reset the DMA queues */ - rsxx_dma_queue_reset(card); - - /************* Setup DMA Control *************/ - for (i = 0; i < card->n_targets; i++) { - st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]); - if (st) - goto failed_dma_setup; - - card->ctrl[i].card = card; - card->ctrl[i].id = i; - } - - card->scrub_hard = 1; - - if (card->config_valid) - rsxx_dma_configure(card); - - /* Enable the interrupts after all setup has completed. */ - for (i = 0; i < card->n_targets; i++) { - spin_lock_irqsave(&card->irq_lock, flags); - rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i)); - spin_unlock_irqrestore(&card->irq_lock, flags); - } - - return 0; - -failed_dma_setup: - for (i = 0; i < card->n_targets; i++) { - struct rsxx_dma_ctrl *ctrl = &card->ctrl[i]; - - if (ctrl->issue_wq) { - destroy_workqueue(ctrl->issue_wq); - ctrl->issue_wq = NULL; - } - - if (ctrl->done_wq) { - destroy_workqueue(ctrl->done_wq); - ctrl->done_wq = NULL; - } - - vfree(ctrl->trackers); - - if (ctrl->status.buf) - dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8, - ctrl->status.buf, - ctrl->status.dma_addr); - if (ctrl->cmd.buf) - dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8, - ctrl->cmd.buf, ctrl->cmd.dma_addr); - } - - return st; -} - -int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl) -{ - struct rsxx_dma *dma; - int i; - int cnt = 0; - - /* Clean up issued DMAs */ - for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { - dma = get_tracker_dma(ctrl->trackers, i); - if (dma) { - atomic_dec(&ctrl->stats.hw_q_depth); - rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); - push_tracker(ctrl->trackers, i); - cnt++; - } - } - - return cnt; -} - -void rsxx_dma_destroy(struct rsxx_cardinfo *card) -{ - struct rsxx_dma_ctrl *ctrl; - int i; - - for (i = 0; i < card->n_targets; i++) { - ctrl = &card->ctrl[i]; - - if (ctrl->issue_wq) { - destroy_workqueue(ctrl->issue_wq); - ctrl->issue_wq = NULL; - } - - if (ctrl->done_wq) { - destroy_workqueue(ctrl->done_wq); - ctrl->done_wq = NULL; - } - - if (timer_pending(&ctrl->activity_timer)) - del_timer_sync(&ctrl->activity_timer); - - /* Clean up the DMA queue */ - spin_lock_bh(&ctrl->queue_lock); - rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA); - spin_unlock_bh(&ctrl->queue_lock); - - rsxx_dma_cancel(ctrl); - - vfree(ctrl->trackers); - - dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8, - ctrl->status.buf, ctrl->status.dma_addr); - dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8, - ctrl->cmd.buf, ctrl->cmd.dma_addr); - } -} - -int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) -{ - int i; - int j; - int cnt; - struct rsxx_dma *dma; - struct list_head *issued_dmas; - - issued_dmas = kcalloc(card->n_targets, sizeof(*issued_dmas), - GFP_KERNEL); - if (!issued_dmas) - return -ENOMEM; - - for (i = 0; i < card->n_targets; i++) { - INIT_LIST_HEAD(&issued_dmas[i]); - cnt = 0; - for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { - dma = get_tracker_dma(card->ctrl[i].trackers, j); - if (dma == NULL) - continue; - - if (dma->cmd == HW_CMD_BLK_WRITE) - card->ctrl[i].stats.writes_issued--; - else if (dma->cmd == HW_CMD_BLK_DISCARD) - card->ctrl[i].stats.discards_issued--; - else - card->ctrl[i].stats.reads_issued--; - - if (dma->cmd != HW_CMD_BLK_DISCARD) { - dma_unmap_page(&card->dev->dev, dma->dma_addr, - get_dma_size(dma), - dma->cmd == HW_CMD_BLK_WRITE ? - DMA_TO_DEVICE : - DMA_FROM_DEVICE); - } - - list_add_tail(&dma->list, &issued_dmas[i]); - push_tracker(card->ctrl[i].trackers, j); - cnt++; - } - - spin_lock_bh(&card->ctrl[i].queue_lock); - list_splice(&issued_dmas[i], &card->ctrl[i].queue); - - atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); - card->ctrl[i].stats.sw_q_depth += cnt; - card->ctrl[i].e_cnt = 0; - spin_unlock_bh(&card->ctrl[i].queue_lock); - } - - kfree(issued_dmas); - - return 0; -} - -int rsxx_dma_init(void) -{ - rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); - if (!rsxx_dma_pool) - return -ENOMEM; - - return 0; -} - - -void rsxx_dma_cleanup(void) -{ - kmem_cache_destroy(rsxx_dma_pool); -} - diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h deleted file mode 100644 index 4f84905a6fd2..000000000000 --- a/drivers/block/rsxx/rsxx.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* -* Filename: rsxx.h -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#ifndef __RSXX_H__ -#define __RSXX_H__ - -/*----------------- IOCTL Definitions -------------------*/ - -#define RSXX_MAX_DATA 8 - -struct rsxx_reg_access { - __u32 addr; - __u32 cnt; - __u32 stat; - __u32 stream; - __u32 data[RSXX_MAX_DATA]; -}; - -#define RSXX_MAX_REG_CNT (RSXX_MAX_DATA * (sizeof(__u32))) - -#define RSXX_IOC_MAGIC 'r' - -#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access) -#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access) - -#endif /* __RSXX_H_ */ diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h deleted file mode 100644 index 2b79015f5849..000000000000 --- a/drivers/block/rsxx/rsxx_cfg.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* -* Filename: rsXX_cfg.h -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#ifndef __RSXX_CFG_H__ -#define __RSXX_CFG_H__ - -/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */ -#include <linux/types.h> - -/* - * The card config version must match the driver's expected version. If it does - * not, the DMA interfaces will not be attached and the user will need to - * initialize/upgrade the card configuration using the card config utility. - */ -#define RSXX_CFG_VERSION 4 - -struct card_cfg_hdr { - __u32 version; - __u32 crc; -}; - -struct card_cfg_data { - __u32 block_size; - __u32 stripe_size; - __u32 vendor_id; - __u32 cache_order; - struct { - __u32 mode; /* Disabled, manual, auto-tune... */ - __u32 count; /* Number of intr to coalesce */ - __u32 latency;/* Max wait time (in ns) */ - } intr_coal; -}; - -struct rsxx_card_cfg { - struct card_cfg_hdr hdr; - struct card_cfg_data data; -}; - -/* Vendor ID Values */ -#define RSXX_VENDOR_ID_IBM 0 -#define RSXX_VENDOR_ID_DSI 1 -#define RSXX_VENDOR_COUNT 2 - -/* Interrupt Coalescing Values */ -#define RSXX_INTR_COAL_DISABLED 0 -#define RSXX_INTR_COAL_EXPLICIT 1 -#define RSXX_INTR_COAL_AUTO_TUNE 2 - - -#endif /* __RSXX_CFG_H__ */ - diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h deleted file mode 100644 index 26c320c0d924..000000000000 --- a/drivers/block/rsxx/rsxx_priv.h +++ /dev/null @@ -1,418 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* -* Filename: rsxx_priv.h -* -* Authors: Joshua Morris <josh.h.morris@us.ibm.com> -* Philip Kelleher <pjk1939@linux.vnet.ibm.com> -* -* (C) Copyright 2013 IBM Corporation -*/ - -#ifndef __RSXX_PRIV_H__ -#define __RSXX_PRIV_H__ - -#include <linux/semaphore.h> - -#include <linux/fs.h> -#include <linux/interrupt.h> -#include <linux/mutex.h> -#include <linux/pci.h> -#include <linux/spinlock.h> -#include <linux/sysfs.h> -#include <linux/workqueue.h> -#include <linux/bio.h> -#include <linux/vmalloc.h> -#include <linux/timer.h> -#include <linux/ioctl.h> -#include <linux/delay.h> - -#include "rsxx.h" -#include "rsxx_cfg.h" - -struct proc_cmd; - -#define PCI_DEVICE_ID_FS70_FLASH 0x04A9 -#define PCI_DEVICE_ID_FS80_FLASH 0x04AA - -#define RS70_PCI_REV_SUPPORTED 4 - -#define DRIVER_NAME "rsxx" -#define DRIVER_VERSION "4.0.3.2516" - -/* Block size is 4096 */ -#define RSXX_HW_BLK_SHIFT 12 -#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT) -#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1) - -#define MAX_CREG_DATA8 32 -#define LOG_BUF_SIZE8 128 - -#define RSXX_MAX_OUTSTANDING_CMDS 255 -#define RSXX_CS_IDX_MASK 0xff - -#define STATUS_BUFFER_SIZE8 4096 -#define COMMAND_BUFFER_SIZE8 4096 - -#define RSXX_MAX_TARGETS 8 - -struct dma_tracker_list; - -/* DMA Command/Status Buffer structure */ -struct rsxx_cs_buffer { - dma_addr_t dma_addr; - void *buf; - u32 idx; -}; - -struct rsxx_dma_stats { - u32 crc_errors; - u32 hard_errors; - u32 soft_errors; - u32 writes_issued; - u32 writes_failed; - u32 reads_issued; - u32 reads_failed; - u32 reads_retried; - u32 discards_issued; - u32 discards_failed; - u32 done_rescheduled; - u32 issue_rescheduled; - u32 dma_sw_err; - u32 dma_hw_fault; - u32 dma_cancelled; - u32 sw_q_depth; /* Number of DMAs on the SW queue. */ - atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ -}; - -struct rsxx_dma_ctrl { - struct rsxx_cardinfo *card; - int id; - void __iomem *regmap; - struct rsxx_cs_buffer status; - struct rsxx_cs_buffer cmd; - u16 e_cnt; - spinlock_t queue_lock; - struct list_head queue; - struct workqueue_struct *issue_wq; - struct work_struct issue_dma_work; - struct workqueue_struct *done_wq; - struct work_struct dma_done_work; - struct timer_list activity_timer; - struct dma_tracker_list *trackers; - struct rsxx_dma_stats stats; - struct mutex work_lock; -}; - -struct rsxx_cardinfo { - struct pci_dev *dev; - unsigned int halt; - unsigned int eeh_state; - - void __iomem *regmap; - spinlock_t irq_lock; - unsigned int isr_mask; - unsigned int ier_mask; - - struct rsxx_card_cfg config; - int config_valid; - - /* Embedded CPU Communication */ - struct { - spinlock_t lock; - bool active; - struct creg_cmd *active_cmd; - struct workqueue_struct *creg_wq; - struct work_struct done_work; - struct list_head queue; - unsigned int q_depth; - /* Cache the creg status to prevent ioreads */ - struct { - u32 stat; - u32 failed_cancel_timer; - u32 creg_timeout; - } creg_stats; - struct timer_list cmd_timer; - struct mutex reset_lock; - int reset; - } creg_ctrl; - - struct { - char tmp[MAX_CREG_DATA8]; - char buf[LOG_BUF_SIZE8]; /* terminated */ - int buf_len; - } log; - - struct workqueue_struct *event_wq; - struct work_struct event_work; - unsigned int state; - u64 size8; - - /* Lock the device attach/detach function */ - struct mutex dev_lock; - - /* Block Device Variables */ - bool bdev_attached; - int disk_id; - int major; - struct gendisk *gendisk; - struct { - /* Used to convert a byte address to a device address. */ - u64 lower_mask; - u64 upper_shift; - u64 upper_mask; - u64 target_mask; - u64 target_shift; - } _stripe; - unsigned int dma_fault; - - int scrub_hard; - - int n_targets; - struct rsxx_dma_ctrl *ctrl; - - struct dentry *debugfs_dir; -}; - -enum rsxx_pci_regmap { - HWID = 0x00, /* Hardware Identification Register */ - SCRATCH = 0x04, /* Scratch/Debug Register */ - RESET = 0x08, /* Reset Register */ - ISR = 0x10, /* Interrupt Status Register */ - IER = 0x14, /* Interrupt Enable Register */ - IPR = 0x18, /* Interrupt Poll Register */ - CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */ - CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/ - HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */ - SW_CMD_IDX = 0x2C, /* Software Processed Command Index */ - SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */ - SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */ - HW_STATUS_CNT = 0x38, /* Hardware Status Counter */ - SW_STATUS_CNT = 0x3C, /* Deprecated */ - CREG_CMD = 0x40, /* CPU Command Register */ - CREG_ADD = 0x44, /* CPU Address Register */ - CREG_CNT = 0x48, /* CPU Count Register */ - CREG_STAT = 0x4C, /* CPU Status Register */ - CREG_DATA0 = 0x50, /* CPU Data Registers */ - CREG_DATA1 = 0x54, - CREG_DATA2 = 0x58, - CREG_DATA3 = 0x5C, - CREG_DATA4 = 0x60, - CREG_DATA5 = 0x64, - CREG_DATA6 = 0x68, - CREG_DATA7 = 0x6c, - INTR_COAL = 0x70, /* Interrupt Coalescing Register */ - HW_ERROR = 0x74, /* Card Error Register */ - PCI_DEBUG0 = 0x78, /* PCI Debug Registers */ - PCI_DEBUG1 = 0x7C, - PCI_DEBUG2 = 0x80, - PCI_DEBUG3 = 0x84, - PCI_DEBUG4 = 0x88, - PCI_DEBUG5 = 0x8C, - PCI_DEBUG6 = 0x90, - PCI_DEBUG7 = 0x94, - PCI_POWER_THROTTLE = 0x98, - PERF_CTRL = 0x9c, - PERF_TIMER_LO = 0xa0, - PERF_TIMER_HI = 0xa4, - PERF_RD512_LO = 0xa8, - PERF_RD512_HI = 0xac, - PERF_WR512_LO = 0xb0, - PERF_WR512_HI = 0xb4, - PCI_RECONFIG = 0xb8, -}; - -enum rsxx_intr { - CR_INTR_DMA0 = 0x00000001, - CR_INTR_CREG = 0x00000002, - CR_INTR_DMA1 = 0x00000004, - CR_INTR_EVENT = 0x00000008, - CR_INTR_DMA2 = 0x00000010, - CR_INTR_DMA3 = 0x00000020, - CR_INTR_DMA4 = 0x00000040, - CR_INTR_DMA5 = 0x00000080, - CR_INTR_DMA6 = 0x00000100, - CR_INTR_DMA7 = 0x00000200, - CR_INTR_ALL_C = 0x0000003f, - CR_INTR_ALL_G = 0x000003ff, - CR_INTR_DMA_ALL = 0x000003f5, - CR_INTR_ALL = 0xffffffff, -}; - -static inline int CR_INTR_DMA(int N) -{ - static const unsigned int _CR_INTR_DMA[] = { - CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3, - CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7 - }; - return _CR_INTR_DMA[N]; -} -enum rsxx_pci_reset { - DMA_QUEUE_RESET = 0x00000001, -}; - -enum rsxx_hw_fifo_flush { - RSXX_FLUSH_BUSY = 0x00000002, - RSXX_FLUSH_TIMEOUT = 0x00000004, -}; - -enum rsxx_pci_revision { - RSXX_DISCARD_SUPPORT = 2, - RSXX_EEH_SUPPORT = 3, -}; - -enum rsxx_creg_cmd { - CREG_CMD_TAG_MASK = 0x0000FF00, - CREG_OP_WRITE = 0x000000C0, - CREG_OP_READ = 0x000000E0, -}; - -enum rsxx_creg_addr { - CREG_ADD_CARD_CMD = 0x80001000, - CREG_ADD_CARD_STATE = 0x80001004, - CREG_ADD_CARD_SIZE = 0x8000100c, - CREG_ADD_CAPABILITIES = 0x80001050, - CREG_ADD_LOG = 0x80002000, - CREG_ADD_NUM_TARGETS = 0x80003000, - CREG_ADD_CRAM = 0xA0000000, - CREG_ADD_CONFIG = 0xB0000000, -}; - -enum rsxx_creg_card_cmd { - CARD_CMD_STARTUP = 1, - CARD_CMD_SHUTDOWN = 2, - CARD_CMD_LOW_LEVEL_FORMAT = 3, - CARD_CMD_FPGA_RECONFIG_BR = 4, - CARD_CMD_FPGA_RECONFIG_MAIN = 5, - CARD_CMD_BACKUP = 6, - CARD_CMD_RESET = 7, - CARD_CMD_deprecated = 8, - CARD_CMD_UNINITIALIZE = 9, - CARD_CMD_DSTROY_EMERGENCY = 10, - CARD_CMD_DSTROY_NORMAL = 11, - CARD_CMD_DSTROY_EXTENDED = 12, - CARD_CMD_DSTROY_ABORT = 13, -}; - -enum rsxx_card_state { - CARD_STATE_SHUTDOWN = 0x00000001, - CARD_STATE_STARTING = 0x00000002, - CARD_STATE_FORMATTING = 0x00000004, - CARD_STATE_UNINITIALIZED = 0x00000008, - CARD_STATE_GOOD = 0x00000010, - CARD_STATE_SHUTTING_DOWN = 0x00000020, - CARD_STATE_FAULT = 0x00000040, - CARD_STATE_RD_ONLY_FAULT = 0x00000080, - CARD_STATE_DSTROYING = 0x00000100, -}; - -enum rsxx_led { - LED_DEFAULT = 0x0, - LED_IDENTIFY = 0x1, - LED_SOAK = 0x2, -}; - -enum rsxx_creg_flash_lock { - CREG_FLASH_LOCK = 1, - CREG_FLASH_UNLOCK = 2, -}; - -enum rsxx_card_capabilities { - CARD_CAP_SUBPAGE_WRITES = 0x00000080, -}; - -enum rsxx_creg_stat { - CREG_STAT_STATUS_MASK = 0x00000003, - CREG_STAT_SUCCESS = 0x1, - CREG_STAT_ERROR = 0x2, - CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */ - CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */ - CREG_STAT_TAG_MASK = 0x0000ff00, -}; - -enum rsxx_dma_finish { - FREE_DMA = 0x0, - COMPLETE_DMA = 0x1, -}; - -static inline unsigned int CREG_DATA(int N) -{ - return CREG_DATA0 + (N << 2); -} - -/*----------------- Convenient Log Wrappers -------------------*/ -#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev) - -/***** config.c *****/ -int rsxx_load_config(struct rsxx_cardinfo *card); - -/***** core.c *****/ -void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr); -void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr); -void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, - unsigned int intr); -void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, - unsigned int intr); - -/***** dev.c *****/ -int rsxx_attach_dev(struct rsxx_cardinfo *card); -void rsxx_detach_dev(struct rsxx_cardinfo *card); -int rsxx_setup_dev(struct rsxx_cardinfo *card); -void rsxx_destroy_dev(struct rsxx_cardinfo *card); -int rsxx_dev_init(void); -void rsxx_dev_cleanup(void); - -/***** dma.c ****/ -typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, - void *cb_data, - unsigned int status); -int rsxx_dma_setup(struct rsxx_cardinfo *card); -void rsxx_dma_destroy(struct rsxx_cardinfo *card); -int rsxx_dma_init(void); -int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, - struct list_head *q, - unsigned int done); -int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl); -void rsxx_dma_cleanup(void); -void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); -int rsxx_dma_configure(struct rsxx_cardinfo *card); -blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card, - struct bio *bio, - atomic_t *n_dmas, - rsxx_dma_cb cb, - void *cb_data); -int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); -int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); -int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card); - -/***** cregs.c *****/ -int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, - unsigned int size8, - void *data, - int byte_stream); -int rsxx_creg_read(struct rsxx_cardinfo *card, - u32 addr, - unsigned int size8, - void *data, - int byte_stream); -int rsxx_read_hw_log(struct rsxx_cardinfo *card); -int rsxx_get_card_state(struct rsxx_cardinfo *card, - unsigned int *state); -int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8); -int rsxx_get_num_targets(struct rsxx_cardinfo *card, - unsigned int *n_targets); -int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, - u32 *capabilities); -int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd); -int rsxx_creg_setup(struct rsxx_cardinfo *card); -void rsxx_creg_destroy(struct rsxx_cardinfo *card); -int rsxx_creg_init(void); -void rsxx_creg_cleanup(void); -int rsxx_reg_access(struct rsxx_cardinfo *card, - struct rsxx_reg_access __user *ucmd, - int read); -void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card); -void rsxx_kick_creg_queue(struct rsxx_cardinfo *card); - - - -#endif /* __DRIVERS_BLOCK_RSXX_H__ */ diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 7fbd41e156c9..1c8a06b77c85 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -574,7 +574,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) int ret = 0; if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot), - "node %d received it's own msg\n", le32_to_cpu(msg->slot))) + "node %d received its own msg\n", le32_to_cpu(msg->slot))) return -1; switch (le32_to_cpu(msg->type)) { case METADATA_UPDATED: diff --git a/drivers/md/md.c b/drivers/md/md.c index 55f9d9caff31..5881d05a76eb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -418,6 +418,12 @@ check_suspended: rcu_read_lock(); if (is_suspended(mddev, bio)) { DEFINE_WAIT(__wait); + /* Bail out if REQ_NOWAIT is set for the bio */ + if (bio->bi_opf & REQ_NOWAIT) { + rcu_read_unlock(); + bio_wouldblock_error(bio); + return; + } for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, TASK_UNINTERRUPTIBLE); @@ -3603,6 +3609,7 @@ static struct attribute *rdev_default_attrs[] = { &rdev_ppl_size.attr, NULL, }; +ATTRIBUTE_GROUPS(rdev_default); static ssize_t rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { @@ -3652,7 +3659,7 @@ static const struct sysfs_ops rdev_sysfs_ops = { static struct kobj_type rdev_ktype = { .release = rdev_free, .sysfs_ops = &rdev_sysfs_ops, - .default_attrs = rdev_default_attrs, + .default_groups = rdev_default_groups, }; int md_rdev_init(struct md_rdev *rdev) @@ -5788,6 +5795,7 @@ int md_run(struct mddev *mddev) int err; struct md_rdev *rdev; struct md_personality *pers; + bool nowait = true; if (list_empty(&mddev->disks)) /* cannot run an array with no devices.. */ @@ -5858,8 +5866,13 @@ int md_run(struct mddev *mddev) } } sysfs_notify_dirent_safe(rdev->sysfs_state); + nowait = nowait && blk_queue_nowait(bdev_get_queue(rdev->bdev)); } + /* Set the NOWAIT flags if all underlying devices support it */ + if (nowait) + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue); + if (!bioset_initialized(&mddev->bio_set)) { err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) @@ -5870,13 +5883,6 @@ int md_run(struct mddev *mddev) if (err) goto exit_bio_set; } - if (mddev->level != 1 && mddev->level != 10 && - !bioset_initialized(&mddev->io_acct_set)) { - err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, - offsetof(struct md_io_acct, bio_clone), 0); - if (err) - goto exit_sync_set; - } spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -6053,9 +6059,6 @@ bitmap_abort: module_put(pers->owner); md_bitmap_destroy(mddev); abort: - if (mddev->level != 1 && mddev->level != 10) - bioset_exit(&mddev->io_acct_set); -exit_sync_set: bioset_exit(&mddev->sync_set); exit_bio_set: bioset_exit(&mddev->bio_set); @@ -7005,6 +7008,15 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) if (!mddev->thread) md_update_sb(mddev, 1); /* + * If the new disk does not support REQ_NOWAIT, + * disable on the whole MD. + */ + if (!blk_queue_nowait(bdev_get_queue(rdev->bdev))) { + pr_info("%s: Disabling nowait because %s does not support nowait\n", + mdname(mddev), bdevname(rdev->bdev, b)); + blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue); + } + /* * Kick recovery, maybe this spare has to be added to the * array immediately. */ @@ -8402,7 +8414,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes) spin_lock(&pers_lock); /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { - pr_warn("can't find md-cluster module or get it's reference.\n"); + pr_warn("can't find md-cluster module or get its reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } @@ -8589,6 +8601,23 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, } EXPORT_SYMBOL_GPL(md_submit_discard_bio); +int acct_bioset_init(struct mddev *mddev) +{ + int err = 0; + + if (!bioset_initialized(&mddev->io_acct_set)) + err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, + offsetof(struct md_io_acct, bio_clone), 0); + return err; +} +EXPORT_SYMBOL_GPL(acct_bioset_init); + +void acct_bioset_exit(struct mddev *mddev) +{ + bioset_exit(&mddev->io_acct_set); +} +EXPORT_SYMBOL_GPL(acct_bioset_exit); + static void md_end_io_acct(struct bio *bio) { struct md_io_acct *md_io_acct = bio->bi_private; diff --git a/drivers/md/md.h b/drivers/md/md.h index 53ea7a6961de..f1bf3625ef4c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -721,6 +721,8 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); +int acct_bioset_init(struct mddev *mddev); +void acct_bioset_exit(struct mddev *mddev); void md_account_bio(struct mddev *mddev, struct bio **bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 62c8b6adac70..b59a77b31b90 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -356,7 +356,21 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks return array_sectors; } -static void raid0_free(struct mddev *mddev, void *priv); +static void free_conf(struct mddev *mddev, struct r0conf *conf) +{ + kfree(conf->strip_zone); + kfree(conf->devlist); + kfree(conf); + mddev->private = NULL; +} + +static void raid0_free(struct mddev *mddev, void *priv) +{ + struct r0conf *conf = priv; + + free_conf(mddev, conf); + acct_bioset_exit(mddev); +} static int raid0_run(struct mddev *mddev) { @@ -370,11 +384,16 @@ static int raid0_run(struct mddev *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; + if (acct_bioset_init(mddev)) { + pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev)); + return -ENOMEM; + } + /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { ret = create_strip_zones(mddev, &conf); if (ret < 0) - return ret; + goto exit_acct_set; mddev->private = conf; } conf = mddev->private; @@ -413,17 +432,16 @@ static int raid0_run(struct mddev *mddev) dump_zones(mddev); ret = md_integrity_register(mddev); + if (ret) + goto free; return ret; -} -static void raid0_free(struct mddev *mddev, void *priv) -{ - struct r0conf *conf = priv; - - kfree(conf->strip_zone); - kfree(conf->devlist); - kfree(conf); +free: + free_conf(mddev, conf); +exit_acct_set: + acct_bioset_exit(mddev); + return ret; } static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 54db34163968..83f9a4f3d82e 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -22,12 +22,6 @@ #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) -/* When there are this many requests queue to be written by - * the raid thread, we become 'congested' to provide back-pressure - * for writeback. - */ -static int max_queued_requests = 1024; - /* for managing resync I/O pages */ struct resync_pages { void *raid_bio; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 85505424f7a4..e2d8acb1e988 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -929,8 +929,10 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr) wake_up(&conf->wait_barrier); } -static void _wait_barrier(struct r1conf *conf, int idx) +static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait) { + bool ret = true; + /* * We need to increase conf->nr_pending[idx] very early here, * then raise_barrier() can be blocked when it waits for @@ -961,7 +963,7 @@ static void _wait_barrier(struct r1conf *conf, int idx) */ if (!READ_ONCE(conf->array_frozen) && !atomic_read(&conf->barrier[idx])) - return; + return ret; /* * After holding conf->resync_lock, conf->nr_pending[idx] @@ -979,18 +981,27 @@ static void _wait_barrier(struct r1conf *conf, int idx) */ wake_up(&conf->wait_barrier); /* Wait for the barrier in same barrier unit bucket to drop. */ - wait_event_lock_irq(conf->wait_barrier, - !conf->array_frozen && - !atomic_read(&conf->barrier[idx]), - conf->resync_lock); - atomic_inc(&conf->nr_pending[idx]); + + /* Return false when nowait flag is set */ + if (nowait) { + ret = false; + } else { + wait_event_lock_irq(conf->wait_barrier, + !conf->array_frozen && + !atomic_read(&conf->barrier[idx]), + conf->resync_lock); + atomic_inc(&conf->nr_pending[idx]); + } + atomic_dec(&conf->nr_waiting[idx]); spin_unlock_irq(&conf->resync_lock); + return ret; } -static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr) +static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait) { int idx = sector_to_idx(sector_nr); + bool ret = true; /* * Very similar to _wait_barrier(). The difference is, for read @@ -1002,7 +1013,7 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr) atomic_inc(&conf->nr_pending[idx]); if (!READ_ONCE(conf->array_frozen)) - return; + return ret; spin_lock_irq(&conf->resync_lock); atomic_inc(&conf->nr_waiting[idx]); @@ -1013,19 +1024,28 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr) */ wake_up(&conf->wait_barrier); /* Wait for array to be unfrozen */ - wait_event_lock_irq(conf->wait_barrier, - !conf->array_frozen, - conf->resync_lock); - atomic_inc(&conf->nr_pending[idx]); + + /* Return false when nowait flag is set */ + if (nowait) { + /* Return false when nowait flag is set */ + ret = false; + } else { + wait_event_lock_irq(conf->wait_barrier, + !conf->array_frozen, + conf->resync_lock); + atomic_inc(&conf->nr_pending[idx]); + } + atomic_dec(&conf->nr_waiting[idx]); spin_unlock_irq(&conf->resync_lock); + return ret; } -static void wait_barrier(struct r1conf *conf, sector_t sector_nr) +static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait) { int idx = sector_to_idx(sector_nr); - _wait_barrier(conf, idx); + return _wait_barrier(conf, idx, nowait); } static void _allow_barrier(struct r1conf *conf, int idx) @@ -1236,7 +1256,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, * Still need barrier for READ in case that whole * array is frozen. */ - wait_read_barrier(conf, bio->bi_iter.bi_sector); + if (!wait_read_barrier(conf, bio->bi_iter.bi_sector, + bio->bi_opf & REQ_NOWAIT)) { + bio_wouldblock_error(bio); + return; + } if (!r1_bio) r1_bio = alloc_r1bio(mddev, bio); @@ -1336,6 +1360,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, bio->bi_iter.bi_sector, bio_end_sector(bio))) { DEFINE_WAIT(w); + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } for (;;) { prepare_to_wait(&conf->wait_barrier, &w, TASK_IDLE); @@ -1353,17 +1381,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ - wait_barrier(conf, bio->bi_iter.bi_sector); + if (!wait_barrier(conf, bio->bi_iter.bi_sector, + bio->bi_opf & REQ_NOWAIT)) { + bio_wouldblock_error(bio); + return; + } r1_bio = alloc_r1bio(mddev, bio); r1_bio->sectors = max_write_sectors; - if (conf->pending_count >= max_queued_requests) { - md_wakeup_thread(mddev->thread); - raid1_log(mddev, "wait queued"); - wait_event(conf->wait_barrier, - conf->pending_count < max_queued_requests); - } /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio @@ -1458,9 +1484,14 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, rdev_dec_pending(conf->mirrors[j].rdev, mddev); r1_bio->state = 0; allow_barrier(conf, bio->bi_iter.bi_sector); + + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk); md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf, bio->bi_iter.bi_sector); + wait_barrier(conf, bio->bi_iter.bi_sector, false); goto retry_write; } @@ -1688,7 +1719,7 @@ static void close_sync(struct r1conf *conf) int idx; for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++) { - _wait_barrier(conf, idx); + _wait_barrier(conf, idx, false); _allow_barrier(conf, idx); } @@ -3410,5 +3441,3 @@ MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD"); MODULE_ALIAS("md-personality-3"); /* RAID1 */ MODULE_ALIAS("md-raid1"); MODULE_ALIAS("md-level-1"); - -module_param(max_queued_requests, int, S_IRUGO|S_IWUSR); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index dde98f65bd04..2b969f70a31f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -952,8 +952,10 @@ static void lower_barrier(struct r10conf *conf) wake_up(&conf->wait_barrier); } -static void wait_barrier(struct r10conf *conf) +static bool wait_barrier(struct r10conf *conf, bool nowait) { + bool ret = true; + spin_lock_irq(&conf->resync_lock); if (conf->barrier) { struct bio_list *bio_list = current->bio_list; @@ -967,27 +969,35 @@ static void wait_barrier(struct r10conf *conf) * that queue to get the nr_pending * count down. */ - raid10_log(conf->mddev, "wait barrier"); - wait_event_lock_irq(conf->wait_barrier, - !conf->barrier || - (atomic_read(&conf->nr_pending) && - bio_list && - (!bio_list_empty(&bio_list[0]) || - !bio_list_empty(&bio_list[1]))) || - /* move on if recovery thread is - * blocked by us - */ - (conf->mddev->thread->tsk == current && - test_bit(MD_RECOVERY_RUNNING, - &conf->mddev->recovery) && - conf->nr_queued > 0), - conf->resync_lock); + /* Return false when nowait flag is set */ + if (nowait) { + ret = false; + } else { + raid10_log(conf->mddev, "wait barrier"); + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (atomic_read(&conf->nr_pending) && + bio_list && + (!bio_list_empty(&bio_list[0]) || + !bio_list_empty(&bio_list[1]))) || + /* move on if recovery thread is + * blocked by us + */ + (conf->mddev->thread->tsk == current && + test_bit(MD_RECOVERY_RUNNING, + &conf->mddev->recovery) && + conf->nr_queued > 0), + conf->resync_lock); + } conf->nr_waiting--; if (!conf->nr_waiting) wake_up(&conf->wait_barrier); } - atomic_inc(&conf->nr_pending); + /* Only increment nr_pending when we wait */ + if (ret) + atomic_inc(&conf->nr_pending); spin_unlock_irq(&conf->resync_lock); + return ret; } static void allow_barrier(struct r10conf *conf) @@ -1098,21 +1108,30 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) * currently. * 2. If IO spans the reshape position. Need to wait for reshape to pass. */ -static void regular_request_wait(struct mddev *mddev, struct r10conf *conf, +static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf, struct bio *bio, sector_t sectors) { - wait_barrier(conf); + /* Bail out if REQ_NOWAIT is set for the bio */ + if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) { + bio_wouldblock_error(bio); + return false; + } while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && bio->bi_iter.bi_sector < conf->reshape_progress && bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { - raid10_log(conf->mddev, "wait reshape"); allow_barrier(conf); + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return false; + } + raid10_log(conf->mddev, "wait reshape"); wait_event(conf->wait_barrier, conf->reshape_progress <= bio->bi_iter.bi_sector || conf->reshape_progress >= bio->bi_iter.bi_sector + sectors); - wait_barrier(conf); + wait_barrier(conf, false); } + return true; } static void raid10_read_request(struct mddev *mddev, struct bio *bio, @@ -1157,7 +1176,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, rcu_read_unlock(); } - regular_request_wait(mddev, conf, bio, r10_bio->sectors); + if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) + return; rdev = read_balance(conf, r10_bio, &max_sectors); if (!rdev) { if (err_rdev) { @@ -1179,7 +1199,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); - wait_barrier(conf); + wait_barrier(conf, false); bio = split; r10_bio->master_bio = bio; r10_bio->sectors = max_sectors; @@ -1338,7 +1358,7 @@ retry_wait: raid10_log(conf->mddev, "%s wait rdev %d blocked", __func__, blocked_rdev->raid_disk); md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf); + wait_barrier(conf, false); goto retry_wait; } } @@ -1356,6 +1376,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, bio->bi_iter.bi_sector, bio_end_sector(bio)))) { DEFINE_WAIT(w); + /* Bail out if REQ_NOWAIT is set for the bio */ + if (bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } for (;;) { prepare_to_wait(&conf->wait_barrier, &w, TASK_IDLE); @@ -1368,7 +1393,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, } sectors = r10_bio->sectors; - regular_request_wait(mddev, conf, bio, sectors); + if (!regular_request_wait(mddev, conf, bio, sectors)) + return; if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && (mddev->reshape_backwards ? (bio->bi_iter.bi_sector < conf->reshape_safe && @@ -1380,6 +1406,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, set_mask_bits(&mddev->sb_flags, 0, BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); md_wakeup_thread(mddev->thread); + if (bio->bi_opf & REQ_NOWAIT) { + allow_barrier(conf); + bio_wouldblock_error(bio); + return; + } raid10_log(conf->mddev, "wait reshape metadata"); wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); @@ -1387,12 +1418,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, conf->reshape_safe = mddev->reshape_position; } - if (conf->pending_count >= max_queued_requests) { - md_wakeup_thread(mddev->thread); - raid10_log(mddev, "wait queued"); - wait_event(conf->wait_barrier, - conf->pending_count < max_queued_requests); - } /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio @@ -1482,7 +1507,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); - wait_barrier(conf); + wait_barrier(conf, false); bio = split; r10_bio->master_bio = bio; } @@ -1607,7 +1632,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) return -EAGAIN; - wait_barrier(conf); + if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) { + bio_wouldblock_error(bio); + return 0; + } + wait_barrier(conf, false); /* * Check reshape again to avoid reshape happens after checking @@ -1649,7 +1678,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) allow_barrier(conf); /* Resend the fist split part */ submit_bio_noacct(split); - wait_barrier(conf); + wait_barrier(conf, false); } div_u64_rem(bio_end, stripe_size, &remainder); if (remainder) { @@ -1660,7 +1689,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) /* Resend the second split part */ submit_bio_noacct(bio); bio = split; - wait_barrier(conf); + wait_barrier(conf, false); } bio_start = bio->bi_iter.bi_sector; @@ -1816,7 +1845,7 @@ retry_discard: end_disk_offset += geo->stride; atomic_inc(&first_r10bio->remaining); raid_end_discard_bio(r10_bio); - wait_barrier(conf); + wait_barrier(conf, false); goto retry_discard; } @@ -2011,7 +2040,7 @@ static void print_conf(struct r10conf *conf) static void close_sync(struct r10conf *conf) { - wait_barrier(conf); + wait_barrier(conf, false); allow_barrier(conf); mempool_exit(&conf->r10buf_pool); @@ -4819,7 +4848,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, if (need_flush || time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { /* Need to update reshape_position in metadata */ - wait_barrier(conf); + wait_barrier(conf, false); mddev->reshape_position = conf->reshape_progress; if (mddev->reshape_backwards) mddev->curr_resync_completed = raid10_size(mddev, 0, 0) @@ -5242,5 +5271,3 @@ MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ MODULE_ALIAS("md-raid10"); MODULE_ALIAS("md-level-10"); - -module_param(max_queued_requests, int, S_IRUGO|S_IWUSR); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9c1a5877cf9f..ffe720c73b0a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2215,10 +2215,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) struct r5conf *conf = sh->raid_conf; int level = conf->level; struct raid5_percpu *percpu; - unsigned long cpu; - cpu = get_cpu(); - percpu = per_cpu_ptr(conf->percpu, cpu); + local_lock(&conf->percpu->lock); + percpu = this_cpu_ptr(conf->percpu); if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; @@ -2271,13 +2270,14 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) BUG(); } - if (overlap_clear && !sh->batch_head) + if (overlap_clear && !sh->batch_head) { for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } - put_cpu(); + } + local_unlock(&conf->percpu->lock); } static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) @@ -5686,6 +5686,10 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) struct stripe_head *sh; int stripe_sectors; + /* We need to handle this when io_uring supports discard/trim */ + if (WARN_ON_ONCE(bi->bi_opf & REQ_NOWAIT)) + return; + if (mddev->reshape_position != MaxSector) /* Skip discard while reshape is happening */ return; @@ -5819,6 +5823,17 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) last_sector = bio_end_sector(bi); bi->bi_next = NULL; + /* Bail out if conflicts with reshape and REQ_NOWAIT is set */ + if ((bi->bi_opf & REQ_NOWAIT) && + (conf->reshape_progress != MaxSector) && + (mddev->reshape_backwards + ? (logical_sector > conf->reshape_progress && logical_sector <= conf->reshape_safe) + : (logical_sector >= conf->reshape_safe && logical_sector < conf->reshape_progress))) { + bio_wouldblock_error(bi); + if (rw == WRITE) + md_write_end(mddev); + return true; + } md_account_bio(mddev, &bi); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { @@ -7052,6 +7067,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu return -ENOMEM; } + local_lock_init(&percpu->lock); return 0; } @@ -7446,12 +7462,19 @@ static int raid5_run(struct mddev *mddev) struct md_rdev *rdev; struct md_rdev *journal_dev = NULL; sector_t reshape_offset = 0; - int i; + int i, ret = 0; long long min_offset_diff = 0; int first = 1; - if (mddev_init_writes_pending(mddev) < 0) + if (acct_bioset_init(mddev)) { + pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev)); return -ENOMEM; + } + + if (mddev_init_writes_pending(mddev) < 0) { + ret = -ENOMEM; + goto exit_acct_set; + } if (mddev->recovery_cp != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", @@ -7482,7 +7505,8 @@ static int raid5_run(struct mddev *mddev) (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { pr_notice("md/raid:%s: array cannot have both journal and bitmap\n", mdname(mddev)); - return -EINVAL; + ret = -EINVAL; + goto exit_acct_set; } if (mddev->reshape_position != MaxSector) { @@ -7507,13 +7531,15 @@ static int raid5_run(struct mddev *mddev) if (journal_dev) { pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", mdname(mddev)); - return -EINVAL; + ret = -EINVAL; + goto exit_acct_set; } if (mddev->new_level != mddev->level) { pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", mdname(mddev)); - return -EINVAL; + ret = -EINVAL; + goto exit_acct_set; } old_disks = mddev->raid_disks - mddev->delta_disks; /* reshape_position must be on a new-stripe boundary, and one @@ -7529,7 +7555,8 @@ static int raid5_run(struct mddev *mddev) if (sector_div(here_new, chunk_sectors * new_data_disks)) { pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n", mdname(mddev)); - return -EINVAL; + ret = -EINVAL; + goto exit_acct_set; } reshape_offset = here_new * chunk_sectors; /* here_new is the stripe we will write to */ @@ -7551,7 +7578,8 @@ static int raid5_run(struct mddev *mddev) else if (mddev->ro == 0) { pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", mdname(mddev)); - return -EINVAL; + ret = -EINVAL; + goto exit_acct_set; } } else if (mddev->reshape_backwards ? (here_new * chunk_sectors + min_offset_diff <= @@ -7561,7 +7589,8 @@ static int raid5_run(struct mddev *mddev) /* Reading from the same stripe as writing to - bad */ pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", mdname(mddev)); - return -EINVAL; + ret = -EINVAL; + goto exit_acct_set; } pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev)); /* OK, we should be able to continue; */ @@ -7585,8 +7614,10 @@ static int raid5_run(struct mddev *mddev) else conf = mddev->private; - if (IS_ERR(conf)) - return PTR_ERR(conf); + if (IS_ERR(conf)) { + ret = PTR_ERR(conf); + goto exit_acct_set; + } if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { if (!journal_dev) { @@ -7783,7 +7814,10 @@ abort: free_conf(conf); mddev->private = NULL; pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); - return -EIO; + ret = -EIO; +exit_acct_set: + acct_bioset_exit(mddev); + return ret; } static void raid5_free(struct mddev *mddev, void *priv) @@ -7791,6 +7825,7 @@ static void raid5_free(struct mddev *mddev, void *priv) struct r5conf *conf = priv; free_conf(conf); + acct_bioset_exit(mddev); mddev->to_remove = &raid5_attrs_group; } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 5c05acf20e1f..9e8486a9e445 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -4,6 +4,7 @@ #include <linux/raid/xor.h> #include <linux/dmaengine.h> +#include <linux/local_lock.h> /* * @@ -640,7 +641,8 @@ struct r5conf { * lists and performing address * conversions */ - int scribble_obj_size; + int scribble_obj_size; + local_lock_t lock; } __percpu *percpu; int scribble_disks; int scribble_sectors; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 290f26ed74c2..5e0bfda04bd7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -991,7 +991,6 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) { struct nvme_command *cmd = nvme_req(req)->cmd; - struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; blk_status_t ret = BLK_STS_OK; if (!(req->rq_flags & RQF_DONTPREP)) @@ -1038,8 +1037,6 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) return BLK_STS_IOERR; } - if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN)) - nvme_req(req)->genctr++; cmd->common.command_id = nvme_cid(req); trace_nvme_setup_cmd(req, cmd); return ret; @@ -2762,9 +2759,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return -EINVAL; } subsys->awupf = le16_to_cpu(id->awupf); -#ifdef CONFIG_NVME_MULTIPATH - subsys->iopolicy = NVME_IOPOLICY_NUMA; -#endif + nvme_mpath_default_iopolicy(subsys); subsys->dev.class = nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 282d54117e0a..7ae041e2b3fb 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1069,6 +1069,26 @@ out_unlock: return ret ? ret : count; } +static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) +{ + const struct match_token *tok; + int idx; + + /* + * Add dummy entries for instance and cntlid to + * signal an invalid/non-existing controller + */ + seq_puts(seq_file, "instance=-1,cntlid=-1"); + for (idx = 0; idx < ARRAY_SIZE(opt_tokens); idx++) { + tok = &opt_tokens[idx]; + if (tok->token == NVMF_OPT_ERR) + continue; + seq_puts(seq_file, ","); + seq_puts(seq_file, tok->pattern); + } + seq_puts(seq_file, "\n"); +} + static int nvmf_dev_show(struct seq_file *seq_file, void *private) { struct nvme_ctrl *ctrl; @@ -1077,7 +1097,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private) mutex_lock(&nvmf_dev_mutex); ctrl = seq_file->private; if (!ctrl) { - ret = -EINVAL; + __nvmf_concat_opt_tokens(seq_file); goto out_unlock; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 13e5d503ed07..f8bf6606eb2f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -13,6 +13,42 @@ module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); +static const char *nvme_iopolicy_names[] = { + [NVME_IOPOLICY_NUMA] = "numa", + [NVME_IOPOLICY_RR] = "round-robin", +}; + +static int iopolicy = NVME_IOPOLICY_NUMA; + +static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp) +{ + if (!val) + return -EINVAL; + if (!strncmp(val, "numa", 4)) + iopolicy = NVME_IOPOLICY_NUMA; + else if (!strncmp(val, "round-robin", 11)) + iopolicy = NVME_IOPOLICY_RR; + else + return -EINVAL; + + return 0; +} + +static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp) +{ + return sprintf(buf, "%s\n", nvme_iopolicy_names[iopolicy]); +} + +module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy, + &iopolicy, 0644); +MODULE_PARM_DESC(iopolicy, + "Default multipath I/O policy; 'numa' (default) or 'round-robin'"); + +void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys) +{ + subsys->iopolicy = iopolicy; +} + void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) { struct nvme_ns_head *h; @@ -706,11 +742,6 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl) struct device_attribute subsys_attr_##_name = \ __ATTR(_name, _mode, _show, _store) -static const char *nvme_iopolicy_names[] = { - [NVME_IOPOLICY_NUMA] = "numa", - [NVME_IOPOLICY_RR] = "round-robin", -}; - static ssize_t nvme_subsys_iopolicy_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9b095ee01364..a162f6c6da6e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -614,6 +614,10 @@ static inline bool nvme_try_complete_req(struct request *req, __le16 status, union nvme_result result) { struct nvme_request *rq = nvme_req(req); + struct nvme_ctrl *ctrl = rq->ctrl; + + if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN)) + rq->genctr++; rq->status = le16_to_cpu(status) >> 1; rq->result = result; @@ -763,6 +767,7 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); +void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); @@ -860,6 +865,9 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) { } +static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys) +{ +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_revalidate_zones(struct nvme_ns *ns); diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 174601554b06..f9c5ac80d59b 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -152,14 +152,6 @@ struct packet_stacked_data }; #define PSD_POOL_SIZE 64 -struct pktcdvd_kobj -{ - struct kobject kobj; - struct pktcdvd_device *pd; -}; -#define to_pktcdvdkobj(_k) \ - ((struct pktcdvd_kobj*)container_of(_k,struct pktcdvd_kobj,kobj)) - struct pktcdvd_device { struct block_device *bdev; /* dev attached */ @@ -183,6 +175,8 @@ struct pktcdvd_device spinlock_t lock; /* Serialize access to bio_queue */ struct rb_root bio_queue; /* Work queue of bios we need to handle */ int bio_queue_size; /* Number of nodes in bio_queue */ + bool congested; /* Someone is waiting for bio_queue_size + * to drop. */ sector_t current_sector; /* Keep track of where the elevator is */ atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ /* needs to be run. */ @@ -195,8 +189,6 @@ struct pktcdvd_device int write_congestion_on; struct device *dev; /* sysfs pktcdvd[0-7] dev */ - struct pktcdvd_kobj *kobj_stat; /* sysfs pktcdvd[0-7]/stat/ */ - struct pktcdvd_kobj *kobj_wqueue; /* sysfs pktcdvd[0-7]/write_queue/ */ struct dentry *dfs_d_root; /* debugfs: devname directory */ struct dentry *dfs_f_info; /* debugfs: info file */ diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 154e954b711d..d6e5a1feb947 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -81,7 +81,7 @@ struct raid6_calls { void (*xor_syndrome)(int, int, int, size_t, void **); int (*valid)(void); /* Returns 1 if this routine set is usable */ const char *name; /* Name of this routine set */ - int prefer; /* Has special performance attribute */ + int priority; /* Relative priority ranking if non-zero */ }; /* Selected algorithm */ diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d5e5000fdd7..39b74221f4a7 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -145,13 +145,13 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) { - unsigned long perf, bestgenperf, bestxorperf, j0, j1; + unsigned long perf, bestgenperf, j0, j1; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { - if (!best || (*algo)->prefer >= best->prefer) { + for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + if (!best || (*algo)->priority >= best->priority) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -180,50 +180,48 @@ static inline const struct raid6_calls *raid6_choose_gen( pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf * HZ * (disks-2)) >> (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + } + } - if (!(*algo)->xor_syndrome) - continue; + if (!best) { + pr_err("raid6: Yikes! No algorithm found!\n"); + goto out; + } - perf = 0; + raid6_call = *best; - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { - (*algo)->xor_syndrome(disks, start, stop, - PAGE_SIZE, *dptrs); - perf++; - } - preempt_enable(); - - if (best == *algo) - bestxorperf = perf; + if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { + pr_info("raid6: skipped pq benchmark and selected %s\n", + best->name); + goto out; + } - pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, - (perf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", + best->name, + (bestgenperf * HZ * (disks - 2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + + if (best->xor_syndrome) { + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1 << RAID6_TIME_JIFFIES_LG2))) { + best->xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; } - } + preempt_enable(); - if (best) { - if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2)); - if (best->xor_syndrome) - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (bestxorperf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); - } else - pr_info("raid6: skip pq benchmark and using algorithm %s\n", - best->name); - raid6_call = *best; - } else - pr_err("raid6: Yikes! No algorithm found!\n"); + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (perf * HZ * (disks - 2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + } +out: return best; } diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index f299476e1d76..059024234dce 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c @@ -132,7 +132,7 @@ const struct raid6_calls raid6_avx2x1 = { raid6_avx21_xor_syndrome, raid6_have_avx2, "avx2x1", - 1 /* Has cache hints */ + .priority = 2 /* Prefer AVX2 over priority 1 (SSE2 and others) */ }; /* @@ -262,7 +262,7 @@ const struct raid6_calls raid6_avx2x2 = { raid6_avx22_xor_syndrome, raid6_have_avx2, "avx2x2", - 1 /* Has cache hints */ + .priority = 2 /* Prefer AVX2 over priority 1 (SSE2 and others) */ }; #ifdef CONFIG_X86_64 @@ -465,6 +465,6 @@ const struct raid6_calls raid6_avx2x4 = { raid6_avx24_xor_syndrome, raid6_have_avx2, "avx2x4", - 1 /* Has cache hints */ + .priority = 2 /* Prefer AVX2 over priority 1 (SSE2 and others) */ }; -#endif +#endif /* CONFIG_X86_64 */ diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c index bb684d144ee2..9c3e822e1adf 100644 --- a/lib/raid6/avx512.c +++ b/lib/raid6/avx512.c @@ -162,7 +162,7 @@ const struct raid6_calls raid6_avx512x1 = { raid6_avx5121_xor_syndrome, raid6_have_avx512, "avx512x1", - 1 /* Has cache hints */ + .priority = 2 /* Prefer AVX512 over priority 1 (SSE2 and others) */ }; /* @@ -319,7 +319,7 @@ const struct raid6_calls raid6_avx512x2 = { raid6_avx5122_xor_syndrome, raid6_have_avx512, "avx512x2", - 1 /* Has cache hints */ + .priority = 2 /* Prefer AVX512 over priority 1 (SSE2 and others) */ }; #ifdef CONFIG_X86_64 @@ -557,7 +557,7 @@ const struct raid6_calls raid6_avx512x4 = { raid6_avx5124_xor_syndrome, raid6_have_avx512, "avx512x4", - 1 /* Has cache hints */ + .priority = 2 /* Prefer AVX512 over priority 1 (SSE2 and others) */ }; #endif |