From c6517b7942fad663cc1cf3235cbe4207cf769332 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Wed, 21 Jan 2009 14:45:50 -0500 Subject: [SCSI] sg: fix races during device removal sg has the following problems related to device removal: * opening a sg fd races with removing a device * closing a sg fd races with removing a device * /proc/scsi/sg/* access races with removing a device * command completion races with removing a device * command completion races with closing a sg fd * can rmmod sg with active commands These problems can cause kernel oopses, memory-use-after-free, or double-free errors. This patch fixes these problems by using krefs to manage the lifetime of sg_device and sg_fd. Each command submitted to the midlevel holds a reference to sg_fd until the completion callback. This ensures that sg_fd doesn't go away if the fd is closed with commands still outstanding. sg_fd gets the reference of sg_device (with scsi_device) and also makes sure that the sg module doesn't go away. /proc/scsi/sg/* functions don't play nicely with krefs because they give information about sg_fds which have been closed but not yet freed due to still having outstanding commands and sg_devices which have been removed but not yet freed due to still being referenced by one or more sg_fds. To deal with this safely without removing functionality, /proc functions now access sg_device and sg_fd while holding a lock instead of using kref_get()/kref_put(). Signed-off-by: Tony Battersby Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 418 ++++++++++++++++++++++++++---------------------------- 1 file changed, 201 insertions(+), 217 deletions(-) (limited to 'drivers/scsi/sg.c') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 516925d8b570..b447527555a7 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -101,6 +101,7 @@ static int scatter_elem_sz_prev = SG_SCATTER_SZ; #define SG_SECTOR_MSK (SG_SECTOR_SZ - 1) static int sg_add(struct device *, struct class_interface *); +static void sg_device_destroy(struct kref *kref); static void sg_remove(struct device *, struct class_interface *); static DEFINE_IDR(sg_index_idr); @@ -158,6 +159,8 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ char next_cmd_len; /* 0 -> automatic (def), >0 -> use on next write() */ char keep_orphan; /* 0 -> drop orphan (def), 1 -> keep for read() */ char mmap_called; /* 0 -> mmap() never called on this fd */ + struct kref f_ref; + struct execute_work ew; } Sg_fd; typedef struct sg_device { /* holds the state of each scsi generic device */ @@ -171,6 +174,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ struct gendisk *disk; struct cdev * cdev; /* char_dev [sysfs: /sys/cdev/major/sg] */ + struct kref d_ref; } Sg_device; static int sg_fasync(int fd, struct file *filp, int mode); @@ -194,13 +198,14 @@ static void sg_build_reserve(Sg_fd * sfp, int req_size); static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); -static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); -static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); +static void sg_remove_sfp(struct kref *); static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static int sg_res_in_use(Sg_fd * sfp); +static Sg_device *sg_lookup_dev(int dev); static Sg_device *sg_get_dev(int dev); +static void sg_put_dev(Sg_device *sdp); #ifdef CONFIG_SCSI_PROC_FS static int sg_last_dev(void); #endif @@ -237,22 +242,17 @@ sg_open(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); SCSI_LOG_TIMEOUT(3, printk("sg_open: dev=%d, flags=0x%x\n", dev, flags)); sdp = sg_get_dev(dev); - if ((!sdp) || (!sdp->device)) { - unlock_kernel(); - return -ENXIO; - } - if (sdp->detached) { - unlock_kernel(); - return -ENODEV; + if (IS_ERR(sdp)) { + retval = PTR_ERR(sdp); + sdp = NULL; + goto sg_put; } /* This driver's module count bumped by fops_get in */ /* Prevent the device driver from vanishing while we sleep */ retval = scsi_device_get(sdp->device); - if (retval) { - unlock_kernel(); - return retval; - } + if (retval) + goto sg_put; if (!((flags & O_NONBLOCK) || scsi_block_when_processing_errors(sdp->device))) { @@ -303,16 +303,20 @@ sg_open(struct inode *inode, struct file *filp) if ((sfp = sg_add_sfp(sdp, dev))) filp->private_data = sfp; else { - if (flags & O_EXCL) + if (flags & O_EXCL) { sdp->exclude = 0; /* undo if error */ + wake_up_interruptible(&sdp->o_excl_wait); + } retval = -ENOMEM; goto error_out; } - unlock_kernel(); - return 0; - - error_out: - scsi_device_put(sdp->device); + retval = 0; +error_out: + if (retval) + scsi_device_put(sdp->device); +sg_put: + if (sdp) + sg_put_dev(sdp); unlock_kernel(); return retval; } @@ -327,13 +331,13 @@ sg_release(struct inode *inode, struct file *filp) if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, printk("sg_release: %s\n", sdp->disk->disk_name)); - if (0 == sg_remove_sfp(sdp, sfp)) { /* Returns 1 when sdp gone */ - if (!sdp->detached) { - scsi_device_put(sdp->device); - } - sdp->exclude = 0; - wake_up_interruptible(&sdp->o_excl_wait); - } + + sfp->closed = 1; + + sdp->exclude = 0; + wake_up_interruptible(&sdp->o_excl_wait); + + kref_put(&sfp->f_ref, sg_remove_sfp); return 0; } @@ -755,6 +759,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, hp->duration = jiffies_to_msecs(jiffies); srp->rq->timeout = timeout; + kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */ blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, srp->rq, 1, sg_rq_end_io); return 0; @@ -1247,24 +1252,23 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) static void sg_rq_end_io(struct request *rq, int uptodate) { struct sg_request *srp = rq->end_io_data; - Sg_device *sdp = NULL; + Sg_device *sdp; Sg_fd *sfp; unsigned long iflags; unsigned int ms; char *sense; - int result, resid; + int result, resid, done = 1; - if (NULL == srp) { - printk(KERN_ERR "sg_cmd_done: NULL request\n"); + if (WARN_ON(srp->done != 0)) return; - } + sfp = srp->parentfp; - if (sfp) - sdp = sfp->parentdp; - if ((NULL == sdp) || sdp->detached) { - printk(KERN_INFO "sg_cmd_done: device detached\n"); + if (WARN_ON(sfp == NULL)) return; - } + + sdp = sfp->parentdp; + if (unlikely(sdp->detached)) + printk(KERN_INFO "sg_rq_end_io: device detached\n"); sense = rq->sense; result = rq->errors; @@ -1303,33 +1307,26 @@ static void sg_rq_end_io(struct request *rq, int uptodate) } /* Rely on write phase to clean out srp status values, so no "else" */ - if (sfp->closed) { /* whoops this fd already released, cleanup */ - SCSI_LOG_TIMEOUT(1, printk("sg_cmd_done: already closed, freeing ...\n")); - sg_finish_rem_req(srp); - srp = NULL; - if (NULL == sfp->headrp) { - SCSI_LOG_TIMEOUT(1, printk("sg_cmd_done: already closed, final cleanup\n")); - if (0 == sg_remove_sfp(sdp, sfp)) { /* device still present */ - scsi_device_put(sdp->device); - } - sfp = NULL; - } - } else if (srp && srp->orphan) { + write_lock_irqsave(&sfp->rq_list_lock, iflags); + if (unlikely(srp->orphan)) { if (sfp->keep_orphan) srp->sg_io_owned = 0; - else { - sg_finish_rem_req(srp); - srp = NULL; - } + else + done = 0; } - if (sfp && srp) { - /* Now wake up any sg_read() that is waiting for this packet. */ - kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); - write_lock_irqsave(&sfp->rq_list_lock, iflags); - srp->done = 1; + srp->done = done; + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + + if (likely(done)) { + /* Now wake up any sg_read() that is waiting for this + * packet. + */ wake_up_interruptible(&sfp->read_wait); - write_unlock_irqrestore(&sfp->rq_list_lock, iflags); - } + kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); + } else + sg_finish_rem_req(srp); /* call with srp->done == 0 */ + + kref_put(&sfp->f_ref, sg_remove_sfp); } static struct file_operations sg_fops = { @@ -1364,17 +1361,18 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) printk(KERN_WARNING "kmalloc Sg_device failure\n"); return ERR_PTR(-ENOMEM); } - error = -ENOMEM; + if (!idr_pre_get(&sg_index_idr, GFP_KERNEL)) { printk(KERN_WARNING "idr expansion Sg_device failure\n"); + error = -ENOMEM; goto out; } write_lock_irqsave(&sg_index_lock, iflags); - error = idr_get_new(&sg_index_idr, sdp, &k); - write_unlock_irqrestore(&sg_index_lock, iflags); + error = idr_get_new(&sg_index_idr, sdp, &k); if (error) { + write_unlock_irqrestore(&sg_index_lock, iflags); printk(KERN_WARNING "idr allocation Sg_device failure: %d\n", error); goto out; @@ -1391,6 +1389,9 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) init_waitqueue_head(&sdp->o_excl_wait); sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments); sdp->index = k; + kref_init(&sdp->d_ref); + + write_unlock_irqrestore(&sg_index_lock, iflags); error = 0; out: @@ -1401,6 +1402,8 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) return sdp; overflow: + idr_remove(&sg_index_idr, k); + write_unlock_irqrestore(&sg_index_lock, iflags); sdev_printk(KERN_WARNING, scsidp, "Unable to attach sg device type=%d, minor " "number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1); @@ -1488,49 +1491,46 @@ out: return error; } -static void -sg_remove(struct device *cl_dev, struct class_interface *cl_intf) +static void sg_device_destroy(struct kref *kref) +{ + struct sg_device *sdp = container_of(kref, struct sg_device, d_ref); + unsigned long flags; + + /* CAUTION! Note that the device can still be found via idr_find() + * even though the refcount is 0. Therefore, do idr_remove() BEFORE + * any other cleanup. + */ + + write_lock_irqsave(&sg_index_lock, flags); + idr_remove(&sg_index_idr, sdp->index); + write_unlock_irqrestore(&sg_index_lock, flags); + + SCSI_LOG_TIMEOUT(3, + printk("sg_device_destroy: %s\n", + sdp->disk->disk_name)); + + put_disk(sdp->disk); + kfree(sdp); +} + +static void sg_remove(struct device *cl_dev, struct class_interface *cl_intf) { struct scsi_device *scsidp = to_scsi_device(cl_dev->parent); Sg_device *sdp = dev_get_drvdata(cl_dev); unsigned long iflags; Sg_fd *sfp; - Sg_fd *tsfp; - Sg_request *srp; - Sg_request *tsrp; - int delay; - if (!sdp) + if (!sdp || sdp->detached) return; - delay = 0; + SCSI_LOG_TIMEOUT(3, printk("sg_remove: %s\n", sdp->disk->disk_name)); + + /* Need a write lock to set sdp->detached. */ write_lock_irqsave(&sg_index_lock, iflags); - if (sdp->headfp) { - sdp->detached = 1; - for (sfp = sdp->headfp; sfp; sfp = tsfp) { - tsfp = sfp->nextfp; - for (srp = sfp->headrp; srp; srp = tsrp) { - tsrp = srp->nextrp; - if (sfp->closed || (0 == sg_srp_done(srp, sfp))) - sg_finish_rem_req(srp); - } - if (sfp->closed) { - scsi_device_put(sdp->device); - __sg_remove_sfp(sdp, sfp); - } else { - delay = 1; - wake_up_interruptible(&sfp->read_wait); - kill_fasync(&sfp->async_qp, SIGPOLL, - POLL_HUP); - } - } - SCSI_LOG_TIMEOUT(3, printk("sg_remove: dev=%d, dirty\n", sdp->index)); - if (NULL == sdp->headfp) { - idr_remove(&sg_index_idr, sdp->index); - } - } else { /* nothing active, simple case */ - SCSI_LOG_TIMEOUT(3, printk("sg_remove: dev=%d\n", sdp->index)); - idr_remove(&sg_index_idr, sdp->index); + sdp->detached = 1; + for (sfp = sdp->headfp; sfp; sfp = sfp->nextfp) { + wake_up_interruptible(&sfp->read_wait); + kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); } write_unlock_irqrestore(&sg_index_lock, iflags); @@ -1538,13 +1538,8 @@ sg_remove(struct device *cl_dev, struct class_interface *cl_intf) device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); cdev_del(sdp->cdev); sdp->cdev = NULL; - put_disk(sdp->disk); - sdp->disk = NULL; - if (NULL == sdp->headfp) - kfree(sdp); - if (delay) - msleep(10); /* dirty detach so delay device destruction */ + sg_put_dev(sdp); } module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR); @@ -1941,22 +1936,6 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id) return resp; } -#ifdef CONFIG_SCSI_PROC_FS -static Sg_request * -sg_get_nth_request(Sg_fd * sfp, int nth) -{ - Sg_request *resp; - unsigned long iflags; - int k; - - read_lock_irqsave(&sfp->rq_list_lock, iflags); - for (k = 0, resp = sfp->headrp; resp && (k < nth); - ++k, resp = resp->nextrp) ; - read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return resp; -} -#endif - /* always adds to end of list */ static Sg_request * sg_add_request(Sg_fd * sfp) @@ -2032,22 +2011,6 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp) return res; } -#ifdef CONFIG_SCSI_PROC_FS -static Sg_fd * -sg_get_nth_sfp(Sg_device * sdp, int nth) -{ - Sg_fd *resp; - unsigned long iflags; - int k; - - read_lock_irqsave(&sg_index_lock, iflags); - for (k = 0, resp = sdp->headfp; resp && (k < nth); - ++k, resp = resp->nextfp) ; - read_unlock_irqrestore(&sg_index_lock, iflags); - return resp; -} -#endif - static Sg_fd * sg_add_sfp(Sg_device * sdp, int dev) { @@ -2062,6 +2025,7 @@ sg_add_sfp(Sg_device * sdp, int dev) init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); + kref_init(&sfp->f_ref); sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; @@ -2089,15 +2053,54 @@ sg_add_sfp(Sg_device * sdp, int dev) sg_build_reserve(sfp, bufflen); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: bufflen=%d, k_use_sg=%d\n", sfp->reserve.bufflen, sfp->reserve.k_use_sg)); + + kref_get(&sdp->d_ref); + __module_get(THIS_MODULE); return sfp; } -static void -__sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) +static void sg_remove_sfp_usercontext(struct work_struct *work) +{ + struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); + struct sg_device *sdp = sfp->parentdp; + + /* Cleanup any responses which were never read(). */ + while (sfp->headrp) + sg_finish_rem_req(sfp->headrp); + + if (sfp->reserve.bufflen > 0) { + SCSI_LOG_TIMEOUT(6, + printk("sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", + (int) sfp->reserve.bufflen, + (int) sfp->reserve.k_use_sg)); + sg_remove_scat(&sfp->reserve); + } + + SCSI_LOG_TIMEOUT(6, + printk("sg_remove_sfp: %s, sfp=0x%p\n", + sdp->disk->disk_name, + sfp)); + kfree(sfp); + + scsi_device_put(sdp->device); + sg_put_dev(sdp); + module_put(THIS_MODULE); +} + +static void sg_remove_sfp(struct kref *kref) { + struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); + struct sg_device *sdp = sfp->parentdp; Sg_fd *fp; Sg_fd *prev_fp; + unsigned long iflags; + + /* CAUTION! Note that sfp can still be found by walking sdp->headfp + * even though the refcount is now 0. Therefore, unlink sfp from + * sdp->headfp BEFORE doing any other cleanup. + */ + write_lock_irqsave(&sg_index_lock, iflags); prev_fp = sdp->headfp; if (sfp == prev_fp) sdp->headfp = prev_fp->nextfp; @@ -2110,54 +2113,10 @@ __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) prev_fp = fp; } } - if (sfp->reserve.bufflen > 0) { - SCSI_LOG_TIMEOUT(6, - printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", - (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); - sg_remove_scat(&sfp->reserve); - } - sfp->parentdp = NULL; - SCSI_LOG_TIMEOUT(6, printk("__sg_remove_sfp: sfp=0x%p\n", sfp)); - kfree(sfp); -} - -/* Returns 0 in normal case, 1 when detached and sdp object removed */ -static int -sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) -{ - Sg_request *srp; - Sg_request *tsrp; - int dirty = 0; - int res = 0; - - for (srp = sfp->headrp; srp; srp = tsrp) { - tsrp = srp->nextrp; - if (sg_srp_done(srp, sfp)) - sg_finish_rem_req(srp); - else - ++dirty; - } - if (0 == dirty) { - unsigned long iflags; + write_unlock_irqrestore(&sg_index_lock, iflags); + wake_up_interruptible(&sdp->o_excl_wait); - write_lock_irqsave(&sg_index_lock, iflags); - __sg_remove_sfp(sdp, sfp); - if (sdp->detached && (NULL == sdp->headfp)) { - idr_remove(&sg_index_idr, sdp->index); - kfree(sdp); - res = 1; - } - write_unlock_irqrestore(&sg_index_lock, iflags); - } else { - /* MOD_INC's to inhibit unloading sg and associated adapter driver */ - /* only bump the access_count if we actually succeeded in - * throwing another counter on the host module */ - scsi_device_get(sdp->device); /* XXX: retval ignored? */ - sfp->closed = 1; /* flag dirty state on this fd */ - SCSI_LOG_TIMEOUT(1, printk("sg_remove_sfp: worrisome, %d writes pending\n", - dirty)); - } - return res; + execute_in_process_context(sg_remove_sfp_usercontext, &sfp->ew); } static int @@ -2199,19 +2158,38 @@ sg_last_dev(void) } #endif -static Sg_device * -sg_get_dev(int dev) +/* must be called with sg_index_lock held */ +static Sg_device *sg_lookup_dev(int dev) { - Sg_device *sdp; - unsigned long iflags; + return idr_find(&sg_index_idr, dev); +} - read_lock_irqsave(&sg_index_lock, iflags); - sdp = idr_find(&sg_index_idr, dev); - read_unlock_irqrestore(&sg_index_lock, iflags); +static Sg_device *sg_get_dev(int dev) +{ + struct sg_device *sdp; + unsigned long flags; + + read_lock_irqsave(&sg_index_lock, flags); + sdp = sg_lookup_dev(dev); + if (!sdp) + sdp = ERR_PTR(-ENXIO); + else if (sdp->detached) { + /* If sdp->detached, then the refcount may already be 0, in + * which case it would be a bug to do kref_get(). + */ + sdp = ERR_PTR(-ENODEV); + } else + kref_get(&sdp->d_ref); + read_unlock_irqrestore(&sg_index_lock, flags); return sdp; } +static void sg_put_dev(struct sg_device *sdp) +{ + kref_put(&sdp->d_ref, sg_device_destroy); +} + #ifdef CONFIG_SCSI_PROC_FS static struct proc_dir_entry *sg_proc_sgp = NULL; @@ -2468,8 +2446,10 @@ static int sg_proc_seq_show_dev(struct seq_file *s, void *v) struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; Sg_device *sdp; struct scsi_device *scsidp; + unsigned long iflags; - sdp = it ? sg_get_dev(it->index) : NULL; + read_lock_irqsave(&sg_index_lock, iflags); + sdp = it ? sg_lookup_dev(it->index) : NULL; if (sdp && (scsidp = sdp->device) && (!sdp->detached)) seq_printf(s, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", scsidp->host->host_no, scsidp->channel, @@ -2480,6 +2460,7 @@ static int sg_proc_seq_show_dev(struct seq_file *s, void *v) (int) scsi_device_online(scsidp)); else seq_printf(s, "-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\n"); + read_unlock_irqrestore(&sg_index_lock, iflags); return 0; } @@ -2493,16 +2474,20 @@ static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; Sg_device *sdp; struct scsi_device *scsidp; + unsigned long iflags; - sdp = it ? sg_get_dev(it->index) : NULL; + read_lock_irqsave(&sg_index_lock, iflags); + sdp = it ? sg_lookup_dev(it->index) : NULL; if (sdp && (scsidp = sdp->device) && (!sdp->detached)) seq_printf(s, "%8.8s\t%16.16s\t%4.4s\n", scsidp->vendor, scsidp->model, scsidp->rev); else seq_printf(s, "\n"); + read_unlock_irqrestore(&sg_index_lock, iflags); return 0; } +/* must be called while holding sg_index_lock */ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) { int k, m, new_interface, blen, usg; @@ -2512,7 +2497,8 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) const char * cp; unsigned int ms; - for (k = 0; (fp = sg_get_nth_sfp(sdp, k)); ++k) { + for (k = 0, fp = sdp->headfp; fp != NULL; ++k, fp = fp->nextfp) { + read_lock(&fp->rq_list_lock); /* irqs already disabled */ seq_printf(s, " FD(%d): timeout=%dms bufflen=%d " "(res)sgat=%d low_dma=%d\n", k + 1, jiffies_to_msecs(fp->timeout), @@ -2522,7 +2508,9 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=%d\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan, (int) fp->closed); - for (m = 0; (srp = sg_get_nth_request(fp, m)); ++m) { + for (m = 0, srp = fp->headrp; + srp != NULL; + ++m, srp = srp->nextrp) { hp = &srp->header; new_interface = (hp->interface_id == '\0') ? 0 : 1; if (srp->res_used) { @@ -2559,6 +2547,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) } if (0 == m) seq_printf(s, " No requests active\n"); + read_unlock(&fp->rq_list_lock); } } @@ -2571,39 +2560,34 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; Sg_device *sdp; + unsigned long iflags; if (it && (0 == it->index)) { seq_printf(s, "max_active_device=%d(origin 1)\n", (int)it->max); seq_printf(s, " def_reserved_size=%d\n", sg_big_buff); } - sdp = it ? sg_get_dev(it->index) : NULL; - if (sdp) { - struct scsi_device *scsidp = sdp->device; - if (NULL == scsidp) { - seq_printf(s, "device %d detached ??\n", - (int)it->index); - return 0; - } + read_lock_irqsave(&sg_index_lock, iflags); + sdp = it ? sg_lookup_dev(it->index) : NULL; + if (sdp && sdp->headfp) { + struct scsi_device *scsidp = sdp->device; - if (sg_get_nth_sfp(sdp, 0)) { - seq_printf(s, " >>> device=%s ", - sdp->disk->disk_name); - if (sdp->detached) - seq_printf(s, "detached pending close "); - else - seq_printf - (s, "scsi%d chan=%d id=%d lun=%d em=%d", - scsidp->host->host_no, - scsidp->channel, scsidp->id, - scsidp->lun, - scsidp->host->hostt->emulated); - seq_printf(s, " sg_tablesize=%d excl=%d\n", - sdp->sg_tablesize, sdp->exclude); - } + seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); + if (sdp->detached) + seq_printf(s, "detached pending close "); + else + seq_printf + (s, "scsi%d chan=%d id=%d lun=%d em=%d", + scsidp->host->host_no, + scsidp->channel, scsidp->id, + scsidp->lun, + scsidp->host->hostt->emulated); + seq_printf(s, " sg_tablesize=%d excl=%d\n", + sdp->sg_tablesize, sdp->exclude); sg_proc_debug_helper(s, sdp); } + read_unlock_irqrestore(&sg_index_lock, iflags); return 0; } -- cgit v1.2.3 From a2dd3b4cea335713b58996bb07b3abcde1175f47 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Tue, 20 Jan 2009 17:00:09 -0500 Subject: [SCSI] sg: fix races with ioctl(SG_IO) sg_io_owned needs to be set before the command is sent to the midlevel; otherwise, a quickly-completing command may cause a different CPU to see "srp->done == 1 && !srp->sg_io_owned", which would lead to incorrect behavior. Check srp->done and set srp->orphan while holding rq_list_lock to prevent races with sg_rq_end_io(). There is no need to check sfp->closed from read/write/ioctl/poll/etc. since the kernel guarantees that this won't happen. The usefulness of sg_srp_done() was questionable before; now it is definitely not needed. Signed-off-by: Tony Battersby Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'drivers/scsi/sg.c') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index b447527555a7..18d079e3990e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -189,7 +189,7 @@ static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp); static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, size_t count, int blocking, - int read_only, Sg_request **o_srp); + int read_only, int sg_io_owned, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); @@ -561,7 +561,8 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) return -EFAULT; blocking = !(filp->f_flags & O_NONBLOCK); if (old_hdr.reply_len < 0) - return sg_new_write(sfp, filp, buf, count, blocking, 0, NULL); + return sg_new_write(sfp, filp, buf, count, + blocking, 0, 0, NULL); if (count < (SZ_SG_HEADER + 6)) return -EIO; /* The minimum scsi command length is 6 bytes. */ @@ -642,7 +643,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, - size_t count, int blocking, int read_only, + size_t count, int blocking, int read_only, int sg_io_owned, Sg_request **o_srp) { int k; @@ -662,6 +663,7 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, SCSI_LOG_TIMEOUT(1, printk("sg_new_write: queue full\n")); return -EDOM; } + srp->sg_io_owned = sg_io_owned; hp = &srp->header; if (__copy_from_user(hp, buf, SZ_SG_IO_HDR)) { sg_remove_request(sfp, srp); @@ -765,18 +767,6 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, return 0; } -static int -sg_srp_done(Sg_request *srp, Sg_fd *sfp) -{ - unsigned long iflags; - int done; - - read_lock_irqsave(&sfp->rq_list_lock, iflags); - done = srp->done; - read_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return done; -} - static int sg_ioctl(struct inode *inode, struct file *filp, unsigned int cmd_in, unsigned long arg) @@ -809,27 +799,26 @@ sg_ioctl(struct inode *inode, struct file *filp, return -EFAULT; result = sg_new_write(sfp, filp, p, SZ_SG_IO_HDR, - blocking, read_only, &srp); + blocking, read_only, 1, &srp); if (result < 0) return result; - srp->sg_io_owned = 1; while (1) { result = 0; /* following macro to beat race condition */ __wait_event_interruptible(sfp->read_wait, - (sdp->detached || sfp->closed || sg_srp_done(srp, sfp)), - result); + (srp->done || sdp->detached), + result); if (sdp->detached) return -ENODEV; - if (sfp->closed) - return 0; /* request packet dropped already */ - if (0 == result) + write_lock_irq(&sfp->rq_list_lock); + if (srp->done) { + srp->done = 2; + write_unlock_irq(&sfp->rq_list_lock); break; + } srp->orphan = 1; + write_unlock_irq(&sfp->rq_list_lock); return result; /* -ERESTARTSYS because signal hit process */ } - write_lock_irqsave(&sfp->rq_list_lock, iflags); - srp->done = 2; - write_unlock_irqrestore(&sfp->rq_list_lock, iflags); result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp); return (result < 0) ? result : 0; } -- cgit v1.2.3 From c96952ed7031e7c576ecf90cf95b8ec099d5295a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 4 Feb 2009 11:36:27 +0900 Subject: [SCSI] sg: avoid blk_put_request/blk_rq_unmap_user in interrupt This fixes the following oops: http://marc.info/?l=linux-kernel&m=123316111415677&w=2 You can reproduce this bug by interrupting a program before a sg response completes. This leads to the special sg state (the orphan state), then sg calls blk_put_request in interrupt (rq->end_io). The above bug report shows the recursive lock problem because sg calls blk_put_request in interrupt. We could call __blk_put_request here instead however we also need to handle blk_rq_unmap_user here, which can't be called in interrupt too. In the orphan state, we don't need to care about the data transfer (the program revoked the command) so adding 'just free the resource' mode to blk_rq_unmap_user is a possible option. I prefer to avoid complicating the blk mapping API when possible. I change the orphan state to call sg_finish_rem_req via execute_in_process_context. We hold sg_fd->kref so sg_fd doesn't go away until keventd_wq finishes our work. copy_from_user/to_user fails so blk_rq_unmap_user just frees the resource without the data transfer. Signed-off-by: FUJITA Tomonori Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/scsi/sg.c') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 18d079e3990e..cdd83cf97100 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -138,6 +138,7 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ volatile char done; /* 0->before bh, 1->before read, 2->read */ struct request *rq; struct bio *bio; + struct execute_work ew; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ @@ -1234,6 +1235,15 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } +static void sg_rq_end_io_usercontext(struct work_struct *work) +{ + struct sg_request *srp = container_of(work, struct sg_request, ew.work); + struct sg_fd *sfp = srp->parentfp; + + sg_finish_rem_req(srp); + kref_put(&sfp->f_ref, sg_remove_sfp); +} + /* * This function is a "bottom half" handler that is called by the mid * level when a command is completed (or has failed). @@ -1312,10 +1322,9 @@ static void sg_rq_end_io(struct request *rq, int uptodate) */ wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); + kref_put(&sfp->f_ref, sg_remove_sfp); } else - sg_finish_rem_req(srp); /* call with srp->done == 0 */ - - kref_put(&sfp->f_ref, sg_remove_sfp); + execute_in_process_context(sg_rq_end_io_usercontext, &srp->ew); } static struct file_operations sg_fops = { -- cgit v1.2.3 From 2134bc72ddc04d412c0c2ce93a9c6f19de6cac35 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 12 Feb 2009 02:42:56 +0900 Subject: [SCSI] sg: remove unnecessary function declarations Signed-off-by: FUJITA Tomonori Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/scsi/sg.c') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cdd83cf97100..f493d7fca348 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -101,7 +101,6 @@ static int scatter_elem_sz_prev = SG_SCATTER_SZ; #define SG_SECTOR_MSK (SG_SECTOR_SZ - 1) static int sg_add(struct device *, struct class_interface *); -static void sg_device_destroy(struct kref *kref); static void sg_remove(struct device *, struct class_interface *); static DEFINE_IDR(sg_index_idr); @@ -178,14 +177,11 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ struct kref d_ref; } Sg_device; -static int sg_fasync(int fd, struct file *filp, int mode); /* tasklet or soft irq callback */ static void sg_rq_end_io(struct request *rq, int uptodate); static int sg_start_req(Sg_request *srp, unsigned char *cmd); static void sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); -static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, - int tablesize); static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp); static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, @@ -204,12 +200,8 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static int sg_res_in_use(Sg_fd * sfp); -static Sg_device *sg_lookup_dev(int dev); static Sg_device *sg_get_dev(int dev); static void sg_put_dev(Sg_device *sdp); -#ifdef CONFIG_SCSI_PROC_FS -static int sg_last_dev(void); -#endif #define SZ_SG_HEADER sizeof(struct sg_header) #define SZ_SG_IO_HDR sizeof(sg_io_hdr_t) -- cgit v1.2.3 From b2ed6c69aa3c1a3f496e2a72f770d53069371df3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 12 Feb 2009 02:42:57 +0900 Subject: [SCSI] sg: use ALIGN macro This changes sg_build_indirect() to use ALIGN macro instead of calculating by hand. Signed-off-by: FUJITA Tomonori Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/scsi/sg.c') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index f493d7fca348..d45ac4aab92f 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -98,7 +98,6 @@ static int scatter_elem_sz = SG_SCATTER_SZ; static int scatter_elem_sz_prev = SG_SCATTER_SZ; #define SG_SECTOR_SZ 512 -#define SG_SECTOR_MSK (SG_SECTOR_SZ - 1) static int sg_add(struct device *, struct class_interface *); static void sg_remove(struct device *, struct class_interface *); @@ -1723,8 +1722,8 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) return -EFAULT; if (0 == blk_size) ++blk_size; /* don't know why */ -/* round request up to next highest SG_SECTOR_SZ byte boundary */ - blk_size = (blk_size + SG_SECTOR_MSK) & (~SG_SECTOR_MSK); + /* round request up to next highest SG_SECTOR_SZ byte boundary */ + blk_size = ALIGN(blk_size, SG_SECTOR_SZ); SCSI_LOG_TIMEOUT(4, printk("sg_build_indirect: buff_size=%d, blk_size=%d\n", buff_size, blk_size)); -- cgit v1.2.3 From 3442f802a8169a0c18d411d95f0e71b9205ed607 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Feb 2009 13:26:53 +0900 Subject: [SCSI] sg: remove the own list management for struct sg_fd This replaces the own list management for struct sg_fd with the standard list_head structure. Signed-off-by: FUJITA Tomonori Acked-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/sg.c | 50 ++++++++++++++------------------------------------ 1 file changed, 14 insertions(+), 36 deletions(-) (limited to 'drivers/scsi/sg.c') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d45ac4aab92f..7bf54c9a33bb 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -140,7 +140,7 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ - struct sg_fd *nextfp; /* NULL when last opened fd on this device */ + struct list_head sfd_siblings; struct sg_device *parentdp; /* owning device */ wait_queue_head_t read_wait; /* queue read until command done */ rwlock_t rq_list_lock; /* protect access to list in req_arr */ @@ -167,7 +167,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ wait_queue_head_t o_excl_wait; /* queue open() when O_EXCL in use */ int sg_tablesize; /* adapter's max scatter-gather table size */ u32 index; /* device index number */ - Sg_fd *headfp; /* first open fd belonging to this device */ + struct list_head sfds; volatile char detached; /* 0->attached, 1->detached pending removal */ volatile char exclude; /* opened for exclusive access */ char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ @@ -258,13 +258,13 @@ sg_open(struct inode *inode, struct file *filp) retval = -EPERM; /* Can't lock it with read only access */ goto error_out; } - if (sdp->headfp && (flags & O_NONBLOCK)) { + if (!list_empty(&sdp->sfds) && (flags & O_NONBLOCK)) { retval = -EBUSY; goto error_out; } res = 0; __wait_event_interruptible(sdp->o_excl_wait, - ((sdp->headfp || sdp->exclude) ? 0 : (sdp->exclude = 1)), res); + ((!list_empty(&sdp->sfds) || sdp->exclude) ? 0 : (sdp->exclude = 1)), res); if (res) { retval = res; /* -ERESTARTSYS because signal hit process */ goto error_out; @@ -286,7 +286,7 @@ sg_open(struct inode *inode, struct file *filp) retval = -ENODEV; goto error_out; } - if (!sdp->headfp) { /* no existing opens on this device */ + if (list_empty(&sdp->sfds)) { /* no existing opens on this device */ sdp->sgdebug = 0; q = sdp->device->request_queue; sdp->sg_tablesize = min(q->max_hw_segments, @@ -1375,6 +1375,7 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) disk->first_minor = k; sdp->disk = disk; sdp->device = scsidp; + INIT_LIST_HEAD(&sdp->sfds); init_waitqueue_head(&sdp->o_excl_wait); sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments); sdp->index = k; @@ -1517,7 +1518,7 @@ static void sg_remove(struct device *cl_dev, struct class_interface *cl_intf) /* Need a write lock to set sdp->detached. */ write_lock_irqsave(&sg_index_lock, iflags); sdp->detached = 1; - for (sfp = sdp->headfp; sfp; sfp = sfp->nextfp) { + list_for_each_entry(sfp, &sdp->sfds, sfd_siblings) { wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); } @@ -2024,14 +2025,7 @@ sg_add_sfp(Sg_device * sdp, int dev) sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; write_lock_irqsave(&sg_index_lock, iflags); - if (!sdp->headfp) - sdp->headfp = sfp; - else { /* add to tail of existing list */ - Sg_fd *pfp = sdp->headfp; - while (pfp->nextfp) - pfp = pfp->nextfp; - pfp->nextfp = sfp; - } + list_add_tail(&sfp->sfd_siblings, &sdp->sfds); write_unlock_irqrestore(&sg_index_lock, iflags); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp)); if (unlikely(sg_big_buff != def_reserved_size)) @@ -2080,28 +2074,10 @@ static void sg_remove_sfp(struct kref *kref) { struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); struct sg_device *sdp = sfp->parentdp; - Sg_fd *fp; - Sg_fd *prev_fp; unsigned long iflags; - /* CAUTION! Note that sfp can still be found by walking sdp->headfp - * even though the refcount is now 0. Therefore, unlink sfp from - * sdp->headfp BEFORE doing any other cleanup. - */ - write_lock_irqsave(&sg_index_lock, iflags); - prev_fp = sdp->headfp; - if (sfp == prev_fp) - sdp->headfp = prev_fp->nextfp; - else { - while ((fp = prev_fp->nextfp)) { - if (sfp == fp) { - prev_fp->nextfp = fp->nextfp; - break; - } - prev_fp = fp; - } - } + list_del(&sfp->sfd_siblings); write_unlock_irqrestore(&sg_index_lock, iflags); wake_up_interruptible(&sdp->o_excl_wait); @@ -2486,10 +2462,12 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) const char * cp; unsigned int ms; - for (k = 0, fp = sdp->headfp; fp != NULL; ++k, fp = fp->nextfp) { + k = 0; + list_for_each_entry(fp, &sdp->sfds, sfd_siblings) { + k++; read_lock(&fp->rq_list_lock); /* irqs already disabled */ seq_printf(s, " FD(%d): timeout=%dms bufflen=%d " - "(res)sgat=%d low_dma=%d\n", k + 1, + "(res)sgat=%d low_dma=%d\n", k, jiffies_to_msecs(fp->timeout), fp->reserve.bufflen, (int) fp->reserve.k_use_sg, @@ -2559,7 +2537,7 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v) read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; - if (sdp && sdp->headfp) { + if (sdp && !list_empty(&sdp->sfds)) { struct scsi_device *scsidp = sdp->device; seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); -- cgit v1.2.3