diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-04 12:39:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-04 12:39:14 -0700 |
commit | 58e57fbd1c7e8833314459555e337364fe5521f3 (patch) | |
tree | 242a3859387588889c9dcc45915b0dec951f84c3 | |
parent | 8a0382f6fceaf0c6479e582e1054f36333ea3d24 (diff) | |
parent | 0f78ab9899e9d6acb09d5465def618704255963b (diff) | |
download | lwn-58e57fbd1c7e8833314459555e337364fe5521f3.tar.gz lwn-58e57fbd1c7e8833314459555e337364fe5521f3.zip |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (41 commits)
Revert "Seperate read and write statistics of in_flight requests"
cfq-iosched: don't delay async queue if it hasn't dispatched at all
block: Topology ioctls
cfq-iosched: use assigned slice sync value, not default
cfq-iosched: rename 'desktop' sysfs entry to 'low_latency'
cfq-iosched: implement slower async initiate and queue ramp up
cfq-iosched: delay async IO dispatch, if sync IO was just done
cfq-iosched: add a knob for desktop interactiveness
Add a tracepoint for block request remapping
block: allow large discard requests
block: use normal I/O path for discard requests
swapfile: avoid NULL pointer dereference in swapon when s_bdev is NULL
fs/bio.c: move EXPORT* macros to line after function
Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs
cciss: fix build when !PROC_FS
block: Do not clamp max_hw_sectors for stacking devices
block: Set max_sectors correctly for stacking devices
cciss: cciss_host_attr_groups should be const
cciss: Dynamically allocate the drive_info_struct for each logical drive.
cciss: Add usage_count attribute to each logical drive in /sys
...
-rw-r--r-- | Documentation/ABI/testing/sysfs-bus-pci-devices-cciss | 28 | ||||
-rw-r--r-- | block/blk-barrier.c | 45 | ||||
-rw-r--r-- | block/blk-core.c | 21 | ||||
-rw-r--r-- | block/blk-merge.c | 2 | ||||
-rw-r--r-- | block/blk-settings.c | 34 | ||||
-rw-r--r-- | block/blk-sysfs.c | 11 | ||||
-rw-r--r-- | block/cfq-iosched.c | 63 | ||||
-rw-r--r-- | block/compat_ioctl.c | 13 | ||||
-rw-r--r-- | block/genhd.c | 4 | ||||
-rw-r--r-- | block/ioctl.c | 17 | ||||
-rw-r--r-- | drivers/block/DAC960.c | 156 | ||||
-rw-r--r-- | drivers/block/cciss.c | 753 | ||||
-rw-r--r-- | drivers/block/cciss.h | 12 | ||||
-rw-r--r-- | drivers/block/cpqarray.c | 63 | ||||
-rw-r--r-- | drivers/md/dm.c | 16 | ||||
-rw-r--r-- | drivers/mtd/mtd_blkdevs.c | 19 | ||||
-rw-r--r-- | drivers/staging/dst/dcore.c | 2 | ||||
-rw-r--r-- | fs/bio.c | 49 | ||||
-rw-r--r-- | fs/partitions/check.c | 12 | ||||
-rw-r--r-- | include/linux/blkdev.h | 48 | ||||
-rw-r--r-- | include/linux/blktrace_api.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/genhd.h | 21 | ||||
-rw-r--r-- | include/trace/events/block.h | 33 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 39 | ||||
-rw-r--r-- | mm/swapfile.c | 12 |
26 files changed, 999 insertions, 480 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index 0a92a7c93a62..4f29e5f1ebfa 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss @@ -31,3 +31,31 @@ Date: March 2009 Kernel Version: 2.6.30 Contact: iss_storagedev@hp.com Description: A symbolic link to /sys/block/cciss!cXdY + +Where: /sys/bus/pci/devices/<dev>/ccissX/rescan +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Kicks of a rescan of the controller to discover logical + drive topology changes. + +Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the 8-byte LUN ID used to address logical + drive Y of controller X. + +Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the RAID level of logical drive Y of + controller X. + +Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the usage count (number of opens) of logical drive Y + of controller X. diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6593ab39cfe9..8873b9b439ff 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err) if (bio->bi_private) complete(bio->bi_private); + __free_page(bio_page(bio)); bio_put(bio); } @@ -372,30 +373,50 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); int type = flags & DISCARD_FL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; + struct bio *bio; + struct page *page; int ret = 0; if (!q) return -ENXIO; - if (!q->prepare_discard_fn) + if (!blk_queue_discard(q)) return -EOPNOTSUPP; while (nr_sects && !ret) { - struct bio *bio = bio_alloc(gfp_mask, 0); - if (!bio) - return -ENOMEM; + unsigned int sector_size = q->limits.logical_block_size; + unsigned int max_discard_sectors = + min(q->limits.max_discard_sectors, UINT_MAX >> 9); + bio = bio_alloc(gfp_mask, 1); + if (!bio) + goto out; + bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; if (flags & DISCARD_FL_WAIT) bio->bi_private = &wait; - bio->bi_sector = sector; + /* + * Add a zeroed one-sector payload as that's what + * our current implementations need. If we'll ever need + * more the interface will need revisiting. + */ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto out_free_bio; + if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) + goto out_free_page; - if (nr_sects > queue_max_hw_sectors(q)) { - bio->bi_size = queue_max_hw_sectors(q) << 9; - nr_sects -= queue_max_hw_sectors(q); - sector += queue_max_hw_sectors(q); + /* + * And override the bio size - the way discard works we + * touch many more blocks on disk than the actual payload + * length. + */ + if (nr_sects > max_discard_sectors) { + bio->bi_size = max_discard_sectors << 9; + nr_sects -= max_discard_sectors; + sector += max_discard_sectors; } else { bio->bi_size = nr_sects << 9; nr_sects = 0; @@ -414,5 +435,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_put(bio); } return ret; +out_free_page: + __free_page(page); +out_free_bio: + bio_put(bio); +out: + return -ENOMEM; } EXPORT_SYMBOL(blkdev_issue_discard); diff --git a/block/blk-core.c b/block/blk-core.c index 8135228e4b29..81f34311659a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,6 +34,7 @@ #include "blk.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); @@ -69,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io) part_stat_inc(cpu, part, merges[rw]); else { part_round_stats(cpu, part); - part_inc_in_flight(part, rw); + part_inc_in_flight(part); } part_stat_unlock(); @@ -1031,7 +1032,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part, if (part->in_flight) { __part_stat_add(cpu, part, time_in_queue, - part_in_flight(part) * (now - part->stamp)); + part->in_flight * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; @@ -1124,7 +1125,6 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_DISCARD; if (bio_rw_flagged(bio, BIO_RW_BARRIER)) req->cmd_flags |= REQ_SOFTBARRIER; - req->q->prepare_discard_fn(req->q, req); } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) req->cmd_flags |= REQ_HARDBARRIER; @@ -1437,7 +1437,8 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(nr_sectors > queue_max_hw_sectors(q))) { + if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), @@ -1470,7 +1471,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (bio_rw_flagged(bio, BIO_RW_DISCARD) && - !q->prepare_discard_fn) { + !blk_queue_discard(q)) { err = -EOPNOTSUPP; goto end_io; } @@ -1738,7 +1739,7 @@ static void blk_account_io_done(struct request *req) part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); part_round_stats(cpu, part); - part_dec_in_flight(part, rw); + part_dec_in_flight(part); part_stat_unlock(); } @@ -2491,6 +2492,14 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay) +{ + return queue_delayed_work(kblockd_workqueue, work, delay); +} +EXPORT_SYMBOL(kblockd_schedule_delayed_work); + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/blk-merge.c b/block/blk-merge.c index 99cb5cf1f447..b0de8574fdc8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req) part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_round_stats(cpu, part); - part_dec_in_flight(part, rq_data_dir(req)); + part_dec_in_flight(part); part_stat_unlock(); } diff --git a/block/blk-settings.c b/block/blk-settings.c index 83413ff83739..e0695bca7027 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) EXPORT_SYMBOL(blk_queue_prep_rq); /** - * blk_queue_set_discard - set a discard_sectors function for queue - * @q: queue - * @dfn: prepare_discard function - * - * It's possible for a queue to register a discard callback which is used - * to transform a discard request into the appropriate type for the - * hardware. If none is registered, then discard requests are failed - * with %EOPNOTSUPP. - * - */ -void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) -{ - q->prepare_discard_fn = dfn; -} -EXPORT_SYMBOL(blk_queue_set_discard); - -/** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue * @mbfn: merge_bvec_fn @@ -111,7 +94,9 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_hw_segments = MAX_HW_SEGMENTS; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = MAX_SEGMENT_SIZE; - lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS; + lim->max_sectors = BLK_DEF_MAX_SECTORS; + lim->max_hw_sectors = INT_MAX; + lim->max_discard_sectors = SAFE_MAX_SECTORS; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -164,6 +149,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->unplug_timer.data = (unsigned long)q; blk_set_default_limits(&q->limits); + blk_queue_max_sectors(q, SAFE_MAX_SECTORS); /* * If the caller didn't supply a lock, fall back to our embedded @@ -254,6 +240,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) EXPORT_SYMBOL(blk_queue_max_hw_sectors); /** + * blk_queue_max_discard_sectors - set max sectors for a single discard + * @q: the request queue for the device + * @max_discard: maximum number of sectors to discard + **/ +void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors) +{ + q->limits.max_discard_sectors = max_discard_sectors; +} +EXPORT_SYMBOL(blk_queue_max_discard_sectors); + +/** * blk_queue_max_phys_segments - set max phys segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b78c9c3e2670..8a6d81afb284 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk) if (ret) { kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); + blk_trace_remove_sysfs(disk_to_dev(disk)); return ret; } @@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk) if (WARN_ON(!q)) return; - if (q->request_fn) { + if (q->request_fn) elv_unregister_queue(q); - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); - kobject_put(&disk_to_dev(disk)->kobj); - } + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + blk_trace_remove_sysfs(disk_to_dev(disk)); + kobject_put(&disk_to_dev(disk)->kobj); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1ca813b16e78..9c4b679908f4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -150,7 +150,7 @@ struct cfq_data { * idle window management */ struct timer_list idle_slice_timer; - struct work_struct unplug_work; + struct delayed_work unplug_work; struct cfq_queue *active_queue; struct cfq_io_context *active_cic; @@ -173,6 +173,7 @@ struct cfq_data { unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; + unsigned int cfq_latency; struct list_head cic_list; @@ -180,6 +181,8 @@ struct cfq_data { * Fallback dummy cfqq for extreme OOM conditions */ struct cfq_queue oom_cfqq; + + unsigned long last_end_sync_rq; }; enum cfqq_state_flags { @@ -265,11 +268,13 @@ static inline int cfq_bio_sync(struct bio *bio) * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */ -static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) +static inline void cfq_schedule_dispatch(struct cfq_data *cfqd, + unsigned long delay) { if (cfqd->busy_queues) { cfq_log(cfqd, "schedule dispatch"); - kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); + kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work, + delay); } } @@ -1326,12 +1331,30 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) return 0; /* - * we are the only queue, allow up to 4 times of 'quantum' + * Sole queue user, allow bigger slice */ - if (cfqq->dispatched >= 4 * max_dispatch) - return 0; + max_dispatch *= 4; + } + + /* + * Async queues must wait a bit before being allowed dispatch. + * We also ramp up the dispatch depth gradually for async IO, + * based on the last sync IO we serviced + */ + if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { + unsigned long last_sync = jiffies - cfqd->last_end_sync_rq; + unsigned int depth; + + depth = last_sync / cfqd->cfq_slice[1]; + if (!depth && !cfqq->dispatched) + depth = 1; + if (depth < max_dispatch) + max_dispatch = depth; } + if (cfqq->dispatched >= max_dispatch) + return 0; + /* * Dispatch a request from this cfqq */ @@ -1376,7 +1399,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) if (unlikely(cfqd->active_queue == cfqq)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); } kmem_cache_free(cfq_pool, cfqq); @@ -1471,7 +1494,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (unlikely(cfqq == cfqd->active_queue)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); } cfq_put_queue(cfqq); @@ -1951,7 +1974,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (cfqd->hw_tag && CIC_SEEKY(cic))) + (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { if (cic->ttime_mean > cfqd->cfq_slice_idle) @@ -2157,8 +2180,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) if (cfq_cfqq_sync(cfqq)) cfqd->sync_flight--; - if (sync) + if (sync) { RQ_CIC(rq)->last_end_request = now; + cfqd->last_end_sync_rq = now; + } /* * If this is the active queue, check if it needs to be expired, @@ -2186,7 +2211,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } if (!rq_in_driver(cfqd)) - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); } /* @@ -2316,7 +2341,7 @@ queue_fail: if (cic) put_io_context(cic->ioc); - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); spin_unlock_irqrestore(q->queue_lock, flags); cfq_log(cfqd, "set_request fail"); return 1; @@ -2325,7 +2350,7 @@ queue_fail: static void cfq_kick_queue(struct work_struct *work) { struct cfq_data *cfqd = - container_of(work, struct cfq_data, unplug_work); + container_of(work, struct cfq_data, unplug_work.work); struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); @@ -2379,7 +2404,7 @@ static void cfq_idle_slice_timer(unsigned long data) expire: cfq_slice_expired(cfqd, timed_out); out_kick: - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); out_cont: spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } @@ -2387,7 +2412,7 @@ out_cont: static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) { del_timer_sync(&cfqd->idle_slice_timer); - cancel_work_sync(&cfqd->unplug_work); + cancel_delayed_work_sync(&cfqd->unplug_work); } static void cfq_put_async_queues(struct cfq_data *cfqd) @@ -2469,7 +2494,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->idle_slice_timer.function = cfq_idle_slice_timer; cfqd->idle_slice_timer.data = (unsigned long) cfqd; - INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); + INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue); cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; @@ -2480,8 +2505,9 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; + cfqd->cfq_latency = 1; cfqd->hw_tag = 1; - + cfqd->last_end_sync_rq = jiffies; return cfqd; } @@ -2549,6 +2575,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); +SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ @@ -2580,6 +2607,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); #undef STORE_FUNCTION #define CFQ_ATTR(name) \ @@ -2595,6 +2623,7 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(slice_async), CFQ_ATTR(slice_async_rq), CFQ_ATTR(slice_idle), + CFQ_ATTR(low_latency), __ATTR_NULL }; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7865a34e0faa..9bd086c1a4d5 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val) return put_user(val, (compat_int_t __user *)compat_ptr(arg)); } +static int compat_put_uint(unsigned long arg, unsigned int val) +{ + return put_user(val, (compat_uint_t __user *)compat_ptr(arg)); +} + static int compat_put_long(unsigned long arg, long val) { return put_user(val, (compat_long_t __user *)compat_ptr(arg)); @@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) switch (cmd) { case HDIO_GETGEO: return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); + case BLKPBSZGET: + return compat_put_uint(arg, bdev_physical_block_size(bdev)); + case BLKIOMIN: + return compat_put_uint(arg, bdev_io_min(bdev)); + case BLKIOOPT: + return compat_put_uint(arg, bdev_io_opt(bdev)); + case BLKALIGNOFF: + return compat_put_int(arg, bdev_alignment_offset(bdev)); case BLKFLSBUF: case BLKROSET: case BLKDISCARD: diff --git a/block/genhd.c b/block/genhd.c index 517e4332cb37..5a0861da324d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -869,7 +869,6 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); -static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -889,7 +888,6 @@ static struct attribute *disk_attrs[] = { &dev_attr_alignment_offset.attr, &dev_attr_capability.attr, &dev_attr_stat.attr, - &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif @@ -1055,7 +1053,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) part_stat_read(hd, merges[1]), (unsigned long long)part_stat_read(hd, sectors[1]), jiffies_to_msecs(part_stat_read(hd, ticks[1])), - part_in_flight(hd), + hd->in_flight, jiffies_to_msecs(part_stat_read(hd, io_ticks)), jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); diff --git a/block/ioctl.c b/block/ioctl.c index d3e6b5827a34..1f4d1de12b09 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val) return put_user(val, (int __user *)arg); } +static int put_uint(unsigned long arg, unsigned int val) +{ + return put_user(val, (unsigned int __user *)arg); +} + static int put_long(unsigned long arg, long val) { return put_user(val, (long __user *)arg); @@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); - case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ + case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ return put_int(arg, block_size(bdev)); - case BLKSSZGET: /* get block device hardware sector size */ + case BLKSSZGET: /* get block device logical block size */ return put_int(arg, bdev_logical_block_size(bdev)); + case BLKPBSZGET: /* get block device physical block size */ + return put_uint(arg, bdev_physical_block_size(bdev)); + case BLKIOMIN: + return put_uint(arg, bdev_io_min(bdev)); + case BLKIOOPT: + return put_uint(arg, bdev_io_opt(bdev)); + case BLKALIGNOFF: + return put_int(arg, bdev_alignment_offset(bdev)); case BLKSECTGET: return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); case BLKRASET: diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 6fa7b0fdbdfd..eb4fa1943944 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/reboot.h> #include <linux/spinlock.h> #include <linux/timer.h> @@ -6422,16 +6423,10 @@ static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, return true; } - -/* - DAC960_ProcReadStatus implements reading /proc/rd/status. -*/ - -static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_proc_show(struct seq_file *m, void *v) { unsigned char *StatusMessage = "OK\n"; - int ControllerNumber, BytesAvailable; + int ControllerNumber; for (ControllerNumber = 0; ControllerNumber < DAC960_ControllerCount; ControllerNumber++) @@ -6444,52 +6439,49 @@ static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset, break; } } - BytesAvailable = strlen(StatusMessage) - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &StatusMessage[Offset], Count); - return Count; + seq_puts(m, StatusMessage); + return 0; } +static int dac960_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_proc_show, NULL); +} -/* - DAC960_ProcReadInitialStatus implements reading /proc/rd/cN/initial_status. -*/ +static const struct file_operations dac960_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int DAC960_ProcReadInitialStatus(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_initial_status_proc_show(struct seq_file *m, void *v) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; - int BytesAvailable = Controller->InitialStatusLength - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &Controller->CombinedStatusBuffer[Offset], Count); - return Count; + DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; + seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer); + return 0; } +static int dac960_initial_status_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_initial_status_proc_show, PDE(inode)->data); +} -/* - DAC960_ProcReadCurrentStatus implements reading /proc/rd/cN/current_status. -*/ +static const struct file_operations dac960_initial_status_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_initial_status_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_current_status_proc_show(struct seq_file *m, void *v) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; + DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; unsigned char *StatusMessage = "No Rebuild or Consistency Check in Progress\n"; int ProgressMessageLength = strlen(StatusMessage); - int BytesAvailable; if (jiffies != Controller->LastCurrentStatusTime) { Controller->CurrentStatusLength = 0; @@ -6513,49 +6505,41 @@ static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset, } Controller->LastCurrentStatusTime = jiffies; } - BytesAvailable = Controller->CurrentStatusLength - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &Controller->CurrentStatusBuffer[Offset], Count); - return Count; + seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer); + return 0; } +static int dac960_current_status_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_current_status_proc_show, PDE(inode)->data); +} -/* - DAC960_ProcReadUserCommand implements reading /proc/rd/cN/user_command. -*/ +static const struct file_operations dac960_current_status_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_current_status_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int DAC960_ProcReadUserCommand(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_user_command_proc_show(struct seq_file *m, void *v) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; - int BytesAvailable = Controller->UserStatusLength - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &Controller->UserStatusBuffer[Offset], Count); - return Count; -} + DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; + seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer); + return 0; +} -/* - DAC960_ProcWriteUserCommand implements writing /proc/rd/cN/user_command. -*/ +static int dac960_user_command_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_user_command_proc_show, PDE(inode)->data); +} -static int DAC960_ProcWriteUserCommand(struct file *file, +static ssize_t dac960_user_command_proc_write(struct file *file, const char __user *Buffer, - unsigned long Count, void *Data) + size_t Count, loff_t *pos) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; + DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data; unsigned char CommandBuffer[80]; int Length; if (Count > sizeof(CommandBuffer)-1) return -EINVAL; @@ -6572,6 +6556,14 @@ static int DAC960_ProcWriteUserCommand(struct file *file, ? Count : -EBUSY); } +static const struct file_operations dac960_user_command_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_user_command_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = dac960_user_command_proc_write, +}; /* DAC960_CreateProcEntries creates the /proc/rd/... entries for the @@ -6586,23 +6578,17 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller) if (DAC960_ProcDirectoryEntry == NULL) { DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL); - StatusProcEntry = create_proc_read_entry("status", 0, + StatusProcEntry = proc_create("status", 0, DAC960_ProcDirectoryEntry, - DAC960_ProcReadStatus, NULL); + &dac960_proc_fops); } sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber); ControllerProcEntry = proc_mkdir(Controller->ControllerName, DAC960_ProcDirectoryEntry); - create_proc_read_entry("initial_status", 0, ControllerProcEntry, - DAC960_ProcReadInitialStatus, Controller); - create_proc_read_entry("current_status", 0, ControllerProcEntry, - DAC960_ProcReadCurrentStatus, Controller); - UserCommandProcEntry = - create_proc_read_entry("user_command", S_IWUSR | S_IRUSR, - ControllerProcEntry, DAC960_ProcReadUserCommand, - Controller); - UserCommandProcEntry->write_proc = DAC960_ProcWriteUserCommand; + proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller); + proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller); + UserCommandProcEntry = proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); Controller->ControllerProcEntry = ControllerProcEntry; } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 1ece0b47b581..fb5be2d95d52 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -36,9 +36,11 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> +#include <linux/jiffies.h> #include <linux/hdreg.h> #include <linux/spinlock.h> #include <linux/compat.h> +#include <linux/mutex.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -155,6 +157,10 @@ static struct board_type products[] = { static ctlr_info_t *hba[MAX_CTLR]; +static struct task_struct *cciss_scan_thread; +static DEFINE_MUTEX(scan_mutex); +static LIST_HEAD(scan_q); + static void do_cciss_request(struct request_queue *q); static irqreturn_t do_cciss_intr(int irq, void *dev_id); static int cciss_open(struct block_device *bdev, fmode_t mode); @@ -164,9 +170,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int cciss_revalidate(struct gendisk *disk); -static int rebuild_lun_table(ctlr_info_t *h, int first_time); +static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl); static int deregister_disk(ctlr_info_t *h, int drv_index, - int clear_all); + int clear_all, int via_ioctl); static void cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size, unsigned int *block_size); @@ -189,8 +195,13 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c, static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c); static void fail_all_cmds(unsigned long ctlr); +static int add_to_scan_list(struct ctlr_info *h); static int scan_thread(void *data); static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c); +static void cciss_hba_release(struct device *dev); +static void cciss_device_release(struct device *dev); +static void cciss_free_gendisk(ctlr_info_t *h, int drv_index); +static void cciss_free_drive_info(ctlr_info_t *h, int drv_index); #ifdef CONFIG_PROC_FS static void cciss_procinit(int i); @@ -245,7 +256,10 @@ static inline void removeQ(CommandList_struct *c) #include "cciss_scsi.c" /* For SCSI tape support */ -#define RAID_UNKNOWN 6 +static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", + "UNKNOWN" +}; +#define RAID_UNKNOWN (sizeof(raid_label) / sizeof(raid_label[0])-1) #ifdef CONFIG_PROC_FS @@ -255,9 +269,6 @@ static inline void removeQ(CommandList_struct *c) #define ENG_GIG 1000000000 #define ENG_GIG_FACTOR (ENG_GIG/512) #define ENGAGE_SCSI "engage scsi" -static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", - "UNKNOWN" -}; static struct proc_dir_entry *proc_cciss; @@ -318,7 +329,7 @@ static int cciss_seq_show(struct seq_file *seq, void *v) ctlr_info_t *h = seq->private; unsigned ctlr = h->ctlr; loff_t *pos = v; - drive_info_struct *drv = &h->drv[*pos]; + drive_info_struct *drv = h->drv[*pos]; if (*pos > h->highest_lun) return 0; @@ -331,7 +342,7 @@ static int cciss_seq_show(struct seq_file *seq, void *v) vol_sz_frac *= 100; sector_div(vol_sz_frac, ENG_GIG_FACTOR); - if (drv->raid_level > 5) + if (drv->raid_level < 0 || drv->raid_level > RAID_UNKNOWN) drv->raid_level = RAID_UNKNOWN; seq_printf(seq, "cciss/c%dd%d:" "\t%4u.%02uGB\tRAID %s\n", @@ -454,9 +465,19 @@ static void __devinit cciss_procinit(int i) #define to_hba(n) container_of(n, struct ctlr_info, dev) #define to_drv(n) container_of(n, drive_info_struct, dev) -static struct device_type cciss_host_type = { - .name = "cciss_host", -}; +static ssize_t host_store_rescan(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ctlr_info *h = to_hba(dev); + + add_to_scan_list(h); + wake_up_process(cciss_scan_thread); + wait_for_completion_interruptible(&h->scan_wait); + + return count; +} +DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan); static ssize_t dev_show_unique_id(struct device *dev, struct device_attribute *attr, @@ -560,11 +581,101 @@ static ssize_t dev_show_rev(struct device *dev, } DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL); +static ssize_t cciss_show_lunid(struct device *dev, + struct device_attribute *attr, char *buf) +{ + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); + unsigned long flags; + unsigned char lunid[8]; + + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + if (h->busy_configuring) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -EBUSY; + } + if (!drv->heads) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -ENOTTY; + } + memcpy(lunid, drv->LunID, sizeof(lunid)); + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + lunid[0], lunid[1], lunid[2], lunid[3], + lunid[4], lunid[5], lunid[6], lunid[7]); +} +DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL); + +static ssize_t cciss_show_raid_level(struct device *dev, + struct device_attribute *attr, char *buf) +{ + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); + int raid; + unsigned long flags; + + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + if (h->busy_configuring) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -EBUSY; + } + raid = drv->raid_level; + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + if (raid < 0 || raid > RAID_UNKNOWN) + raid = RAID_UNKNOWN; + + return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n", + raid_label[raid]); +} +DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL); + +static ssize_t cciss_show_usage_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); + unsigned long flags; + int count; + + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + if (h->busy_configuring) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -EBUSY; + } + count = drv->usage_count; + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return snprintf(buf, 20, "%d\n", count); +} +DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL); + +static struct attribute *cciss_host_attrs[] = { + &dev_attr_rescan.attr, + NULL +}; + +static struct attribute_group cciss_host_attr_group = { + .attrs = cciss_host_attrs, +}; + +static const struct attribute_group *cciss_host_attr_groups[] = { + &cciss_host_attr_group, + NULL +}; + +static struct device_type cciss_host_type = { + .name = "cciss_host", + .groups = cciss_host_attr_groups, + .release = cciss_hba_release, +}; + static struct attribute *cciss_dev_attrs[] = { &dev_attr_unique_id.attr, &dev_attr_model.attr, &dev_attr_vendor.attr, &dev_attr_rev.attr, + &dev_attr_lunid.attr, + &dev_attr_raid_level.attr, + &dev_attr_usage_count.attr, NULL }; @@ -580,12 +691,24 @@ static const struct attribute_group *cciss_dev_attr_groups[] = { static struct device_type cciss_dev_type = { .name = "cciss_device", .groups = cciss_dev_attr_groups, + .release = cciss_device_release, }; static struct bus_type cciss_bus_type = { .name = "cciss", }; +/* + * cciss_hba_release is called when the reference count + * of h->dev goes to zero. + */ +static void cciss_hba_release(struct device *dev) +{ + /* + * nothing to do, but need this to avoid a warning + * about not having a release handler from lib/kref.c. + */ +} /* * Initialize sysfs entry for each controller. This sets up and registers @@ -609,6 +732,16 @@ static int cciss_create_hba_sysfs_entry(struct ctlr_info *h) static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h) { device_del(&h->dev); + put_device(&h->dev); /* final put. */ +} + +/* cciss_device_release is called when the reference count + * of h->drv[x]dev goes to zero. + */ +static void cciss_device_release(struct device *dev) +{ + drive_info_struct *drv = to_drv(dev); + kfree(drv); } /* @@ -617,24 +750,39 @@ static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h) * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from * /sys/block/cciss!c#d# to this entry. */ -static int cciss_create_ld_sysfs_entry(struct ctlr_info *h, - drive_info_struct *drv, +static long cciss_create_ld_sysfs_entry(struct ctlr_info *h, int drv_index) { - device_initialize(&drv->dev); - drv->dev.type = &cciss_dev_type; - drv->dev.bus = &cciss_bus_type; - dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index); - drv->dev.parent = &h->dev; - return device_add(&drv->dev); + struct device *dev; + + if (h->drv[drv_index]->device_initialized) + return 0; + + dev = &h->drv[drv_index]->dev; + device_initialize(dev); + dev->type = &cciss_dev_type; + dev->bus = &cciss_bus_type; + dev_set_name(dev, "c%dd%d", h->ctlr, drv_index); + dev->parent = &h->dev; + h->drv[drv_index]->device_initialized = 1; + return device_add(dev); } /* * Remove sysfs entries for a logical drive. */ -static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv) +static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index, + int ctlr_exiting) { - device_del(&drv->dev); + struct device *dev = &h->drv[drv_index]->dev; + + /* special case for c*d0, we only destroy it on controller exit */ + if (drv_index == 0 && !ctlr_exiting) + return; + + device_del(dev); + put_device(dev); /* the "final" put. */ + h->drv[drv_index] = NULL; } /* @@ -751,7 +899,7 @@ static int cciss_open(struct block_device *bdev, fmode_t mode) printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name); #endif /* CCISS_DEBUG */ - if (host->busy_initializing || drv->busy_configuring) + if (drv->busy_configuring) return -EBUSY; /* * Root is allowed to open raw volume zero even if it's not configured @@ -767,7 +915,8 @@ static int cciss_open(struct block_device *bdev, fmode_t mode) if (MINOR(bdev->bd_dev) & 0x0f) { return -ENXIO; /* if it is, make sure we have a LUN ID */ - } else if (drv->LunID == 0) { + } else if (memcmp(drv->LunID, CTLR_LUNID, + sizeof(drv->LunID))) { return -ENXIO; } } @@ -1132,12 +1281,13 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, case CCISS_DEREGDISK: case CCISS_REGNEWD: case CCISS_REVALIDVOLS: - return rebuild_lun_table(host, 0); + return rebuild_lun_table(host, 0, 1); case CCISS_GETLUNINFO:{ LogvolInfo_struct luninfo; - luninfo.LunID = drv->LunID; + memcpy(&luninfo.LunID, drv->LunID, + sizeof(luninfo.LunID)); luninfo.num_opens = drv->usage_count; luninfo.num_parts = 0; if (copy_to_user(argp, &luninfo, @@ -1475,7 +1625,10 @@ static void cciss_check_queues(ctlr_info_t *h) /* make sure the disk has been added and the drive is real * because this can be called from the middle of init_one. */ - if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads)) + if (!h->drv[curr_queue]) + continue; + if (!(h->drv[curr_queue]->queue) || + !(h->drv[curr_queue]->heads)) continue; blk_start_queue(h->gendisk[curr_queue]->queue); @@ -1532,13 +1685,11 @@ static void cciss_softirq_done(struct request *rq) spin_unlock_irqrestore(&h->lock, flags); } -static void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[], - uint32_t log_unit) +static inline void log_unit_to_scsi3addr(ctlr_info_t *h, + unsigned char scsi3addr[], uint32_t log_unit) { - log_unit = h->drv[log_unit].LunID & 0x03fff; - memset(&scsi3addr[4], 0, 4); - memcpy(&scsi3addr[0], &log_unit, 4); - scsi3addr[3] |= 0x40; + memcpy(scsi3addr, h->drv[log_unit]->LunID, + sizeof(h->drv[log_unit]->LunID)); } /* This function gets the SCSI vendor, model, and revision of a logical drive @@ -1615,16 +1766,23 @@ static void cciss_get_serial_no(int ctlr, int logvol, int withirq, return; } -static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, +/* + * cciss_add_disk sets up the block device queue for a logical drive + */ +static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, int drv_index) { disk->queue = blk_init_queue(do_cciss_request, &h->lock); + if (!disk->queue) + goto init_queue_failure; sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index); disk->major = h->major; disk->first_minor = drv_index << NWD_SHIFT; disk->fops = &cciss_fops; - disk->private_data = &h->drv[drv_index]; - disk->driverfs_dev = &h->drv[drv_index].dev; + if (cciss_create_ld_sysfs_entry(h, drv_index)) + goto cleanup_queue; + disk->private_data = h->drv[drv_index]; + disk->driverfs_dev = &h->drv[drv_index]->dev; /* Set up queue information */ blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); @@ -1642,14 +1800,21 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->queue->queuedata = h; blk_queue_logical_block_size(disk->queue, - h->drv[drv_index].block_size); + h->drv[drv_index]->block_size); /* Make sure all queue data is written out before */ - /* setting h->drv[drv_index].queue, as setting this */ + /* setting h->drv[drv_index]->queue, as setting this */ /* allows the interrupt handler to start the queue */ wmb(); - h->drv[drv_index].queue = disk->queue; + h->drv[drv_index]->queue = disk->queue; add_disk(disk); + return 0; + +cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; +init_queue_failure: + return -1; } /* This function will check the usage_count of the drive to be updated/added. @@ -1662,7 +1827,8 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, * is also the controller node. Any changes to disk 0 will show up on * the next reboot. */ -static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) +static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, + int via_ioctl) { ctlr_info_t *h = hba[ctlr]; struct gendisk *disk; @@ -1672,21 +1838,13 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) unsigned long flags = 0; int ret = 0; drive_info_struct *drvinfo; - int was_only_controller_node; /* Get information about the disk and modify the driver structure */ inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL); - drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL); + drvinfo = kzalloc(sizeof(*drvinfo), GFP_KERNEL); if (inq_buff == NULL || drvinfo == NULL) goto mem_msg; - /* See if we're trying to update the "controller node" - * this will happen the when the first logical drive gets - * created by ACU. - */ - was_only_controller_node = (drv_index == 0 && - h->drv[0].raid_level == -1); - /* testing to see if 16-byte CDBs are already being used */ if (h->cciss_read == CCISS_READ_16) { cciss_read_capacity_16(h->ctlr, drv_index, 1, @@ -1719,16 +1877,19 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) drvinfo->model, drvinfo->rev); cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no, sizeof(drvinfo->serial_no)); + /* Save the lunid in case we deregister the disk, below. */ + memcpy(drvinfo->LunID, h->drv[drv_index]->LunID, + sizeof(drvinfo->LunID)); /* Is it the same disk we already know, and nothing's changed? */ - if (h->drv[drv_index].raid_level != -1 && + if (h->drv[drv_index]->raid_level != -1 && ((memcmp(drvinfo->serial_no, - h->drv[drv_index].serial_no, 16) == 0) && - drvinfo->block_size == h->drv[drv_index].block_size && - drvinfo->nr_blocks == h->drv[drv_index].nr_blocks && - drvinfo->heads == h->drv[drv_index].heads && - drvinfo->sectors == h->drv[drv_index].sectors && - drvinfo->cylinders == h->drv[drv_index].cylinders)) + h->drv[drv_index]->serial_no, 16) == 0) && + drvinfo->block_size == h->drv[drv_index]->block_size && + drvinfo->nr_blocks == h->drv[drv_index]->nr_blocks && + drvinfo->heads == h->drv[drv_index]->heads && + drvinfo->sectors == h->drv[drv_index]->sectors && + drvinfo->cylinders == h->drv[drv_index]->cylinders)) /* The disk is unchanged, nothing to update */ goto freeret; @@ -1738,18 +1899,17 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) * If the disk already exists then deregister it before proceeding * (unless it's the first disk (for the controller node). */ - if (h->drv[drv_index].raid_level != -1 && drv_index != 0) { + if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) { printk(KERN_WARNING "disk %d has changed.\n", drv_index); spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - h->drv[drv_index].busy_configuring = 1; + h->drv[drv_index]->busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - /* deregister_disk sets h->drv[drv_index].queue = NULL + /* deregister_disk sets h->drv[drv_index]->queue = NULL * which keeps the interrupt handler from starting * the queue. */ - ret = deregister_disk(h, drv_index, 0); - h->drv[drv_index].busy_configuring = 0; + ret = deregister_disk(h, drv_index, 0, via_ioctl); } /* If the disk is in use return */ @@ -1757,22 +1917,31 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) goto freeret; /* Save the new information from cciss_geometry_inquiry - * and serial number inquiry. + * and serial number inquiry. If the disk was deregistered + * above, then h->drv[drv_index] will be NULL. */ - h->drv[drv_index].block_size = drvinfo->block_size; - h->drv[drv_index].nr_blocks = drvinfo->nr_blocks; - h->drv[drv_index].heads = drvinfo->heads; - h->drv[drv_index].sectors = drvinfo->sectors; - h->drv[drv_index].cylinders = drvinfo->cylinders; - h->drv[drv_index].raid_level = drvinfo->raid_level; - memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16); - memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1); - memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1); - memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1); + if (h->drv[drv_index] == NULL) { + drvinfo->device_initialized = 0; + h->drv[drv_index] = drvinfo; + drvinfo = NULL; /* so it won't be freed below. */ + } else { + /* special case for cxd0 */ + h->drv[drv_index]->block_size = drvinfo->block_size; + h->drv[drv_index]->nr_blocks = drvinfo->nr_blocks; + h->drv[drv_index]->heads = drvinfo->heads; + h->drv[drv_index]->sectors = drvinfo->sectors; + h->drv[drv_index]->cylinders = drvinfo->cylinders; + h->drv[drv_index]->raid_level = drvinfo->raid_level; + memcpy(h->drv[drv_index]->serial_no, drvinfo->serial_no, 16); + memcpy(h->drv[drv_index]->vendor, drvinfo->vendor, + VENDOR_LEN + 1); + memcpy(h->drv[drv_index]->model, drvinfo->model, MODEL_LEN + 1); + memcpy(h->drv[drv_index]->rev, drvinfo->rev, REV_LEN + 1); + } ++h->num_luns; disk = h->gendisk[drv_index]; - set_capacity(disk, h->drv[drv_index].nr_blocks); + set_capacity(disk, h->drv[drv_index]->nr_blocks); /* If it's not disk 0 (drv_index != 0) * or if it was disk 0, but there was previously @@ -1780,8 +1949,15 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) * (raid_leve == -1) then we want to update the * logical drive's information. */ - if (drv_index || first_time) - cciss_add_disk(h, disk, drv_index); + if (drv_index || first_time) { + if (cciss_add_disk(h, disk, drv_index) != 0) { + cciss_free_gendisk(h, drv_index); + cciss_free_drive_info(h, drv_index); + printk(KERN_WARNING "cciss:%d could not update " + "disk %d\n", h->ctlr, drv_index); + --h->num_luns; + } + } freeret: kfree(inq_buff); @@ -1793,28 +1969,70 @@ mem_msg: } /* This function will find the first index of the controllers drive array - * that has a -1 for the raid_level and will return that index. This is - * where new drives will be added. If the index to be returned is greater - * than the highest_lun index for the controller then highest_lun is set - * to this new index. If there are no available indexes then -1 is returned. - * "controller_node" is used to know if this is a real logical drive, or just - * the controller node, which determines if this counts towards highest_lun. + * that has a null drv pointer and allocate the drive info struct and + * will return that index This is where new drives will be added. + * If the index to be returned is greater than the highest_lun index for + * the controller then highest_lun is set * to this new index. + * If there are no available indexes or if tha allocation fails, then -1 + * is returned. * "controller_node" is used to know if this is a real + * logical drive, or just the controller node, which determines if this + * counts towards highest_lun. */ -static int cciss_find_free_drive_index(int ctlr, int controller_node) +static int cciss_alloc_drive_info(ctlr_info_t *h, int controller_node) { int i; + drive_info_struct *drv; + /* Search for an empty slot for our drive info */ for (i = 0; i < CISS_MAX_LUN; i++) { - if (hba[ctlr]->drv[i].raid_level == -1) { - if (i > hba[ctlr]->highest_lun) - if (!controller_node) - hba[ctlr]->highest_lun = i; + + /* if not cxd0 case, and it's occupied, skip it. */ + if (h->drv[i] && i != 0) + continue; + /* + * If it's cxd0 case, and drv is alloc'ed already, and a + * disk is configured there, skip it. + */ + if (i == 0 && h->drv[i] && h->drv[i]->raid_level != -1) + continue; + + /* + * We've found an empty slot. Update highest_lun + * provided this isn't just the fake cxd0 controller node. + */ + if (i > h->highest_lun && !controller_node) + h->highest_lun = i; + + /* If adding a real disk at cxd0, and it's already alloc'ed */ + if (i == 0 && h->drv[i] != NULL) return i; - } + + /* + * Found an empty slot, not already alloc'ed. Allocate it. + * Mark it with raid_level == -1, so we know it's new later on. + */ + drv = kzalloc(sizeof(*drv), GFP_KERNEL); + if (!drv) + return -1; + drv->raid_level = -1; /* so we know it's new */ + h->drv[i] = drv; + return i; } return -1; } +static void cciss_free_drive_info(ctlr_info_t *h, int drv_index) +{ + kfree(h->drv[drv_index]); + h->drv[drv_index] = NULL; +} + +static void cciss_free_gendisk(ctlr_info_t *h, int drv_index) +{ + put_disk(h->gendisk[drv_index]); + h->gendisk[drv_index] = NULL; +} + /* cciss_add_gendisk finds a free hba[]->drv structure * and allocates a gendisk if needed, and sets the lunid * in the drvinfo structure. It returns the index into @@ -1824,13 +2042,15 @@ static int cciss_find_free_drive_index(int ctlr, int controller_node) * a means to talk to the controller in case no logical * drives have yet been configured. */ -static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) +static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[], + int controller_node) { int drv_index; - drv_index = cciss_find_free_drive_index(h->ctlr, controller_node); + drv_index = cciss_alloc_drive_info(h, controller_node); if (drv_index == -1) return -1; + /*Check if the gendisk needs to be allocated */ if (!h->gendisk[drv_index]) { h->gendisk[drv_index] = @@ -1839,23 +2059,24 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) printk(KERN_ERR "cciss%d: could not " "allocate a new disk %d\n", h->ctlr, drv_index); - return -1; + goto err_free_drive_info; } } - h->drv[drv_index].LunID = lunid; - if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index)) + memcpy(h->drv[drv_index]->LunID, lunid, + sizeof(h->drv[drv_index]->LunID)); + if (cciss_create_ld_sysfs_entry(h, drv_index)) goto err_free_disk; - /* Don't need to mark this busy because nobody */ /* else knows about this disk yet to contend */ /* for access to it. */ - h->drv[drv_index].busy_configuring = 0; + h->drv[drv_index]->busy_configuring = 0; wmb(); return drv_index; err_free_disk: - put_disk(h->gendisk[drv_index]); - h->gendisk[drv_index] = NULL; + cciss_free_gendisk(h, drv_index); +err_free_drive_info: + cciss_free_drive_info(h, drv_index); return -1; } @@ -1872,21 +2093,25 @@ static void cciss_add_controller_node(ctlr_info_t *h) if (h->gendisk[0] != NULL) /* already did this? Then bail. */ return; - drv_index = cciss_add_gendisk(h, 0, 1); - if (drv_index == -1) { - printk(KERN_WARNING "cciss%d: could not " - "add disk 0.\n", h->ctlr); - return; - } - h->drv[drv_index].block_size = 512; - h->drv[drv_index].nr_blocks = 0; - h->drv[drv_index].heads = 0; - h->drv[drv_index].sectors = 0; - h->drv[drv_index].cylinders = 0; - h->drv[drv_index].raid_level = -1; - memset(h->drv[drv_index].serial_no, 0, 16); + drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1); + if (drv_index == -1) + goto error; + h->drv[drv_index]->block_size = 512; + h->drv[drv_index]->nr_blocks = 0; + h->drv[drv_index]->heads = 0; + h->drv[drv_index]->sectors = 0; + h->drv[drv_index]->cylinders = 0; + h->drv[drv_index]->raid_level = -1; + memset(h->drv[drv_index]->serial_no, 0, 16); disk = h->gendisk[drv_index]; - cciss_add_disk(h, disk, drv_index); + if (cciss_add_disk(h, disk, drv_index) == 0) + return; + cciss_free_gendisk(h, drv_index); + cciss_free_drive_info(h, drv_index); +error: + printk(KERN_WARNING "cciss%d: could not " + "add disk 0.\n", h->ctlr); + return; } /* This function will add and remove logical drives from the Logical @@ -1897,7 +2122,8 @@ static void cciss_add_controller_node(ctlr_info_t *h) * INPUT * h = The controller to perform the operations on */ -static int rebuild_lun_table(ctlr_info_t *h, int first_time) +static int rebuild_lun_table(ctlr_info_t *h, int first_time, + int via_ioctl) { int ctlr = h->ctlr; int num_luns; @@ -1907,7 +2133,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) int i; int drv_found; int drv_index = 0; - __u32 lunid = 0; + unsigned char lunid[8] = CTLR_LUNID; unsigned long flags; if (!capable(CAP_SYS_RAWIO)) @@ -1960,13 +2186,13 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) drv_found = 0; /* skip holes in the array from already deleted drives */ - if (h->drv[i].raid_level == -1) + if (h->drv[i] == NULL) continue; for (j = 0; j < num_luns; j++) { - memcpy(&lunid, &ld_buff->LUN[j][0], 4); - lunid = le32_to_cpu(lunid); - if (h->drv[i].LunID == lunid) { + memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid)); + if (memcmp(h->drv[i]->LunID, lunid, + sizeof(lunid)) == 0) { drv_found = 1; break; } @@ -1974,11 +2200,11 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) if (!drv_found) { /* Deregister it from the OS, it's gone. */ spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - h->drv[i].busy_configuring = 1; + h->drv[i]->busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - return_code = deregister_disk(h, i, 1); - cciss_destroy_ld_sysfs_entry(&h->drv[i]); - h->drv[i].busy_configuring = 0; + return_code = deregister_disk(h, i, 1, via_ioctl); + if (h->drv[i] != NULL) + h->drv[i]->busy_configuring = 0; } } @@ -1992,17 +2218,16 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) drv_found = 0; - memcpy(&lunid, &ld_buff->LUN[i][0], 4); - lunid = le32_to_cpu(lunid); - + memcpy(lunid, &ld_buff->LUN[i][0], sizeof(lunid)); /* Find if the LUN is already in the drive array * of the driver. If so then update its info * if not in use. If it does not exist then find * the first free index and add it. */ for (j = 0; j <= h->highest_lun; j++) { - if (h->drv[j].raid_level != -1 && - h->drv[j].LunID == lunid) { + if (h->drv[j] != NULL && + memcmp(h->drv[j]->LunID, lunid, + sizeof(h->drv[j]->LunID)) == 0) { drv_index = j; drv_found = 1; break; @@ -2015,7 +2240,8 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) if (drv_index == -1) goto freeret; } - cciss_update_drive_info(ctlr, drv_index, first_time); + cciss_update_drive_info(ctlr, drv_index, first_time, + via_ioctl); } /* end for */ freeret: @@ -2032,6 +2258,25 @@ mem_msg: goto freeret; } +static void cciss_clear_drive_info(drive_info_struct *drive_info) +{ + /* zero out the disk size info */ + drive_info->nr_blocks = 0; + drive_info->block_size = 0; + drive_info->heads = 0; + drive_info->sectors = 0; + drive_info->cylinders = 0; + drive_info->raid_level = -1; + memset(drive_info->serial_no, 0, sizeof(drive_info->serial_no)); + memset(drive_info->model, 0, sizeof(drive_info->model)); + memset(drive_info->rev, 0, sizeof(drive_info->rev)); + memset(drive_info->vendor, 0, sizeof(drive_info->vendor)); + /* + * don't clear the LUNID though, we need to remember which + * one this one is. + */ +} + /* This function will deregister the disk and it's queue from the * kernel. It must be called with the controller lock held and the * drv structures busy_configuring flag set. It's parameters are: @@ -2046,43 +2291,48 @@ mem_msg: * the disk in preparation for re-adding it. In this case * the highest_lun should be left unchanged and the LunID * should not be cleared. + * via_ioctl + * This indicates whether we've reached this path via ioctl. + * This affects the maximum usage count allowed for c0d0 to be messed with. + * If this path is reached via ioctl(), then the max_usage_count will + * be 1, as the process calling ioctl() has got to have the device open. + * If we get here via sysfs, then the max usage count will be zero. */ static int deregister_disk(ctlr_info_t *h, int drv_index, - int clear_all) + int clear_all, int via_ioctl) { int i; struct gendisk *disk; drive_info_struct *drv; + int recalculate_highest_lun; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - drv = &h->drv[drv_index]; + drv = h->drv[drv_index]; disk = h->gendisk[drv_index]; /* make sure logical volume is NOT is use */ if (clear_all || (h->gendisk[0] == disk)) { - if (drv->usage_count > 1) + if (drv->usage_count > via_ioctl) return -EBUSY; } else if (drv->usage_count > 0) return -EBUSY; + recalculate_highest_lun = (drv == h->drv[h->highest_lun]); + /* invalidate the devices and deregister the disk. If it is disk * zero do not deregister it but just zero out it's values. This * allows us to delete disk zero but keep the controller registered. */ if (h->gendisk[0] != disk) { struct request_queue *q = disk->queue; - if (disk->flags & GENHD_FL_UP) + if (disk->flags & GENHD_FL_UP) { + cciss_destroy_ld_sysfs_entry(h, drv_index, 0); del_gendisk(disk); - if (q) { - blk_cleanup_queue(q); - /* Set drv->queue to NULL so that we do not try - * to call blk_start_queue on this queue in the - * interrupt handler - */ - drv->queue = NULL; } + if (q) + blk_cleanup_queue(q); /* If clear_all is set then we are deleting the logical * drive, not just refreshing its info. For drives * other than disk 0 we will call put_disk. We do not @@ -2105,34 +2355,20 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, } } else { set_capacity(disk, 0); + cciss_clear_drive_info(drv); } --h->num_luns; - /* zero out the disk size info */ - drv->nr_blocks = 0; - drv->block_size = 0; - drv->heads = 0; - drv->sectors = 0; - drv->cylinders = 0; - drv->raid_level = -1; /* This can be used as a flag variable to - * indicate that this element of the drive - * array is free. - */ - - if (clear_all) { - /* check to see if it was the last disk */ - if (drv == h->drv + h->highest_lun) { - /* if so, find the new hightest lun */ - int i, newhighest = -1; - for (i = 0; i <= h->highest_lun; i++) { - /* if the disk has size > 0, it is available */ - if (h->drv[i].heads) - newhighest = i; - } - h->highest_lun = newhighest; - } - drv->LunID = 0; + /* if it was the last disk, find the new hightest lun */ + if (clear_all && recalculate_highest_lun) { + int i, newhighest = -1; + for (i = 0; i <= h->highest_lun; i++) { + /* if the disk has size > 0, it is available */ + if (h->drv[i] && h->drv[i]->heads) + newhighest = i; + } + h->highest_lun = newhighest; } return 0; } @@ -2479,8 +2715,6 @@ static void cciss_geometry_inquiry(int ctlr, int logvol, } else { /* Get geometry failed */ printk(KERN_WARNING "cciss: reading geometry failed\n"); } - printk(KERN_INFO " heads=%d, sectors=%d, cylinders=%d\n\n", - drv->heads, drv->sectors, drv->cylinders); } static void @@ -2514,9 +2748,6 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size, *total_size = 0; *block_size = BLOCK_SIZE; } - if (*total_size != 0) - printk(KERN_INFO " blocks= %llu block_size= %d\n", - (unsigned long long)*total_size+1, *block_size); kfree(buf); } @@ -2568,7 +2799,8 @@ static int cciss_revalidate(struct gendisk *disk) InquiryData_struct *inq_buff = NULL; for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { - if (h->drv[logvol].LunID == drv->LunID) { + if (memcmp(h->drv[logvol]->LunID, drv->LunID, + sizeof(drv->LunID)) == 0) { FOUND = 1; break; } @@ -3053,8 +3285,7 @@ static void do_cciss_request(struct request_queue *q) /* The first 2 bits are reserved for controller error reporting. */ c->Header.Tag.lower = (c->cmdindex << 3); c->Header.Tag.lower |= 0x04; /* flag for direct lookup. */ - c->Header.LUN.LogDev.VolId = drv->LunID; - c->Header.LUN.LogDev.Mode = 1; + memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID)); c->Request.CDBLen = 10; // 12 byte commands not in FW yet; c->Request.Type.Type = TYPE_CMD; // It is a command. c->Request.Type.Attribute = ATTR_SIMPLE; @@ -3232,20 +3463,121 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) return IRQ_HANDLED; } +/** + * add_to_scan_list() - add controller to rescan queue + * @h: Pointer to the controller. + * + * Adds the controller to the rescan queue if not already on the queue. + * + * returns 1 if added to the queue, 0 if skipped (could be on the + * queue already, or the controller could be initializing or shutting + * down). + **/ +static int add_to_scan_list(struct ctlr_info *h) +{ + struct ctlr_info *test_h; + int found = 0; + int ret = 0; + + if (h->busy_initializing) + return 0; + + if (!mutex_trylock(&h->busy_shutting_down)) + return 0; + + mutex_lock(&scan_mutex); + list_for_each_entry(test_h, &scan_q, scan_list) { + if (test_h == h) { + found = 1; + break; + } + } + if (!found && !h->busy_scanning) { + INIT_COMPLETION(h->scan_wait); + list_add_tail(&h->scan_list, &scan_q); + ret = 1; + } + mutex_unlock(&scan_mutex); + mutex_unlock(&h->busy_shutting_down); + + return ret; +} + +/** + * remove_from_scan_list() - remove controller from rescan queue + * @h: Pointer to the controller. + * + * Removes the controller from the rescan queue if present. Blocks if + * the controller is currently conducting a rescan. + **/ +static void remove_from_scan_list(struct ctlr_info *h) +{ + struct ctlr_info *test_h, *tmp_h; + int scanning = 0; + + mutex_lock(&scan_mutex); + list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) { + if (test_h == h) { + list_del(&h->scan_list); + complete_all(&h->scan_wait); + mutex_unlock(&scan_mutex); + return; + } + } + if (&h->busy_scanning) + scanning = 0; + mutex_unlock(&scan_mutex); + + if (scanning) + wait_for_completion(&h->scan_wait); +} + +/** + * scan_thread() - kernel thread used to rescan controllers + * @data: Ignored. + * + * A kernel thread used scan for drive topology changes on + * controllers. The thread processes only one controller at a time + * using a queue. Controllers are added to the queue using + * add_to_scan_list() and removed from the queue either after done + * processing or using remove_from_scan_list(). + * + * returns 0. + **/ static int scan_thread(void *data) { - ctlr_info_t *h = data; - int rc; - DECLARE_COMPLETION_ONSTACK(wait); - h->rescan_wait = &wait; + struct ctlr_info *h; - for (;;) { - rc = wait_for_completion_interruptible(&wait); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); if (kthread_should_stop()) break; - if (!rc) - rebuild_lun_table(h, 0); + + while (1) { + mutex_lock(&scan_mutex); + if (list_empty(&scan_q)) { + mutex_unlock(&scan_mutex); + break; + } + + h = list_entry(scan_q.next, + struct ctlr_info, + scan_list); + list_del(&h->scan_list); + h->busy_scanning = 1; + mutex_unlock(&scan_mutex); + + if (h) { + rebuild_lun_table(h, 0, 0); + complete_all(&h->scan_wait); + mutex_lock(&scan_mutex); + h->busy_scanning = 0; + mutex_unlock(&scan_mutex); + } + } } + return 0; } @@ -3268,8 +3600,8 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c) case REPORT_LUNS_CHANGED: printk(KERN_WARNING "cciss%d: report LUN data " "changed\n", h->ctlr); - if (h->rescan_wait) - complete(h->rescan_wait); + add_to_scan_list(h); + wake_up_process(cciss_scan_thread); return 1; break; case POWER_OR_RESET: @@ -3489,7 +3821,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) if (scratchpad == CCISS_FIRMWARE_READY) break; set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); /* wait 100ms */ + schedule_timeout(msecs_to_jiffies(100)); /* wait 100ms */ } if (scratchpad != CCISS_FIRMWARE_READY) { printk(KERN_WARNING "cciss: Board not ready. Timed out.\n"); @@ -3615,7 +3947,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) break; /* delay and try again */ set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(10); + schedule_timeout(msecs_to_jiffies(1)); } #ifdef CCISS_DEBUG @@ -3669,15 +4001,16 @@ Enomem: return -1; } -static void free_hba(int i) +static void free_hba(int n) { - ctlr_info_t *p = hba[i]; - int n; + ctlr_info_t *h = hba[n]; + int i; - hba[i] = NULL; - for (n = 0; n < CISS_MAX_LUN; n++) - put_disk(p->gendisk[n]); - kfree(p); + hba[n] = NULL; + for (i = 0; i < h->highest_lun + 1; i++) + if (h->gendisk[i] != NULL) + put_disk(h->gendisk[i]); + kfree(h); } /* Send a message CDB to the firmware. */ @@ -3918,6 +4251,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->busy_initializing = 1; INIT_HLIST_HEAD(&hba[i]->cmpQ); INIT_HLIST_HEAD(&hba[i]->reqQ); + mutex_init(&hba[i]->busy_shutting_down); if (cciss_pci_init(hba[i], pdev) != 0) goto clean0; @@ -3926,6 +4260,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->ctlr = i; hba[i]->pdev = pdev; + init_completion(&hba[i]->scan_wait); + if (cciss_create_hba_sysfs_entry(hba[i])) goto clean0; @@ -4001,8 +4337,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->num_luns = 0; hba[i]->highest_lun = -1; for (j = 0; j < CISS_MAX_LUN; j++) { - hba[i]->drv[j].raid_level = -1; - hba[i]->drv[j].queue = NULL; + hba[i]->drv[j] = NULL; hba[i]->gendisk[j] = NULL; } @@ -4035,14 +4370,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->cciss_max_sectors = 2048; + rebuild_lun_table(hba[i], 1, 0); hba[i]->busy_initializing = 0; - - rebuild_lun_table(hba[i], 1); - hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i], - "cciss_scan%02d", i); - if (IS_ERR(hba[i]->cciss_scan_thread)) - return PTR_ERR(hba[i]->cciss_scan_thread); - return 1; clean4: @@ -4063,12 +4392,7 @@ clean1: cciss_destroy_hba_sysfs_entry(hba[i]); clean0: hba[i]->busy_initializing = 0; - /* cleanup any queues that may have been initialized */ - for (j=0; j <= hba[i]->highest_lun; j++){ - drive_info_struct *drv = &(hba[i]->drv[j]); - if (drv->queue) - blk_cleanup_queue(drv->queue); - } + /* * Deliberately omit pci_disable_device(): it does something nasty to * Smart Array controllers that pci_enable_device does not undo @@ -4125,8 +4449,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) return; } - kthread_stop(hba[i]->cciss_scan_thread); + mutex_lock(&hba[i]->busy_shutting_down); + remove_from_scan_list(hba[i]); remove_proc_entry(hba[i]->devname, proc_cciss); unregister_blkdev(hba[i]->major, hba[i]->devname); @@ -4136,8 +4461,10 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) if (disk) { struct request_queue *q = disk->queue; - if (disk->flags & GENHD_FL_UP) + if (disk->flags & GENHD_FL_UP) { + cciss_destroy_ld_sysfs_entry(hba[i], j, 1); del_gendisk(disk); + } if (q) blk_cleanup_queue(q); } @@ -4170,6 +4497,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); cciss_destroy_hba_sysfs_entry(hba[i]); + mutex_unlock(&hba[i]->busy_shutting_down); free_hba(i); } @@ -4202,15 +4530,25 @@ static int __init cciss_init(void) if (err) return err; + /* Start the scan thread */ + cciss_scan_thread = kthread_run(scan_thread, NULL, "cciss_scan"); + if (IS_ERR(cciss_scan_thread)) { + err = PTR_ERR(cciss_scan_thread); + goto err_bus_unregister; + } + /* Register for our PCI devices */ err = pci_register_driver(&cciss_pci_driver); if (err) - goto err_bus_register; + goto err_thread_stop; - return 0; + return err; -err_bus_register: +err_thread_stop: + kthread_stop(cciss_scan_thread); +err_bus_unregister: bus_unregister(&cciss_bus_type); + return err; } @@ -4227,6 +4565,7 @@ static void __exit cciss_cleanup(void) cciss_remove_one(hba[i]->pdev); } } + kthread_stop(cciss_scan_thread); remove_proc_entry("driver/cciss", NULL); bus_unregister(&cciss_bus_type); } diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 06a5db25b298..31524cf42c77 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -2,6 +2,7 @@ #define CCISS_H #include <linux/genhd.h> +#include <linux/mutex.h> #include "cciss_cmd.h" @@ -29,7 +30,7 @@ struct access_method { }; typedef struct _drive_info_struct { - __u32 LunID; + unsigned char LunID[8]; int usage_count; struct request_queue *queue; sector_t nr_blocks; @@ -51,6 +52,7 @@ typedef struct _drive_info_struct char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */ char model[MODEL_LEN + 1]; /* SCSI model string */ char rev[REV_LEN + 1]; /* SCSI revision string */ + char device_initialized; /* indicates whether dev is initialized */ } drive_info_struct; struct ctlr_info @@ -86,7 +88,7 @@ struct ctlr_info BYTE cciss_read_capacity; // information about each logical volume - drive_info_struct drv[CISS_MAX_LUN]; + drive_info_struct *drv[CISS_MAX_LUN]; struct access_method access; @@ -108,6 +110,8 @@ struct ctlr_info int nr_frees; int busy_configuring; int busy_initializing; + int busy_scanning; + struct mutex busy_shutting_down; /* This element holds the zero based queue number of the last * queue to be started. It is used for fairness. @@ -122,8 +126,8 @@ struct ctlr_info /* and saved for later processing */ #endif unsigned char alive; - struct completion *rescan_wait; - struct task_struct *cciss_scan_thread; + struct list_head scan_list; + struct completion scan_wait; struct device dev; }; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index b82d438e2607..6422651ec364 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -32,6 +32,7 @@ #include <linux/blkpg.h> #include <linux/timer.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/init.h> #include <linux/hdreg.h> #include <linux/spinlock.h> @@ -177,7 +178,6 @@ static int cpqarray_register_ctlr(int ctlr, struct pci_dev *pdev); #ifdef CONFIG_PROC_FS static void ida_procinit(int i); -static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data); #else static void ida_procinit(int i) {} #endif @@ -206,6 +206,7 @@ static const struct block_device_operations ida_fops = { #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_array; +static const struct file_operations ida_proc_fops; /* * Get us a file in /proc/array that says something about each controller. @@ -218,19 +219,16 @@ static void __init ida_procinit(int i) if (!proc_array) return; } - create_proc_read_entry(hba[i]->devname, 0, proc_array, - ida_proc_get_info, hba[i]); + proc_create_data(hba[i]->devname, 0, proc_array, &ida_proc_fops, hba[i]); } /* * Report information about this controller. */ -static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +static int ida_proc_show(struct seq_file *m, void *v) { - off_t pos = 0; - off_t len = 0; - int size, i, ctlr; - ctlr_info_t *h = (ctlr_info_t*)data; + int i, ctlr; + ctlr_info_t *h = (ctlr_info_t*)m->private; drv_info_t *drv; #ifdef CPQ_PROC_PRINT_QUEUES cmdlist_t *c; @@ -238,7 +236,7 @@ static int ida_proc_get_info(char *buffer, char **start, off_t offset, int lengt #endif ctlr = h->ctlr; - size = sprintf(buffer, "%s: Compaq %s Controller\n" + seq_printf(m, "%s: Compaq %s Controller\n" " Board ID: 0x%08lx\n" " Firmware Revision: %c%c%c%c\n" " Controller Sig: 0x%08lx\n" @@ -258,55 +256,54 @@ static int ida_proc_get_info(char *buffer, char **start, off_t offset, int lengt h->log_drives, h->phys_drives, h->Qdepth, h->maxQsinceinit); - pos += size; len += size; - - size = sprintf(buffer+len, "Logical Drive Info:\n"); - pos += size; len += size; + seq_puts(m, "Logical Drive Info:\n"); for(i=0; i<h->log_drives; i++) { drv = &h->drv[i]; - size = sprintf(buffer+len, "ida/c%dd%d: blksz=%d nr_blks=%d\n", + seq_printf(m, "ida/c%dd%d: blksz=%d nr_blks=%d\n", ctlr, i, drv->blk_size, drv->nr_blks); - pos += size; len += size; } #ifdef CPQ_PROC_PRINT_QUEUES spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); - size = sprintf(buffer+len, "\nCurrent Queues:\n"); - pos += size; len += size; + seq_puts(m, "\nCurrent Queues:\n"); c = h->reqQ; - size = sprintf(buffer+len, "reqQ = %p", c); pos += size; len += size; + seq_printf(m, "reqQ = %p", c); if (c) c=c->next; while(c && c != h->reqQ) { - size = sprintf(buffer+len, "->%p", c); - pos += size; len += size; + seq_printf(m, "->%p", c); c=c->next; } c = h->cmpQ; - size = sprintf(buffer+len, "\ncmpQ = %p", c); pos += size; len += size; + seq_printf(m, "\ncmpQ = %p", c); if (c) c=c->next; while(c && c != h->cmpQ) { - size = sprintf(buffer+len, "->%p", c); - pos += size; len += size; + seq_printf(m, "->%p", c); c=c->next; } - size = sprintf(buffer+len, "\n"); pos += size; len += size; + seq_putc(m, '\n'); spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); #endif - size = sprintf(buffer+len, "nr_allocs = %d\nnr_frees = %d\n", + seq_printf(m, "nr_allocs = %d\nnr_frees = %d\n", h->nr_allocs, h->nr_frees); - pos += size; len += size; - - *eof = 1; - *start = buffer+offset; - len -= offset; - if (len>length) - len = length; - return len; + return 0; +} + +static int ida_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ida_proc_show, PDE(inode)->data); } + +static const struct file_operations ida_proc_fops = { + .owner = THIS_MODULE, + .open = ida_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ module_param_array(eisa, int, NULL, 0); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 376f1ab48a24..23e76fe0d359 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -130,7 +130,7 @@ struct mapped_device { /* * A list of ios that arrived while we were suspended. */ - atomic_t pending[2]; + atomic_t pending; wait_queue_head_t wait; struct work_struct work; struct bio_list deferred; @@ -453,14 +453,13 @@ static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; int cpu; - int rw = bio_data_dir(io->bio); io->start_time = jiffies; cpu = part_stat_lock(); part_round_stats(cpu, &dm_disk(md)->part0); part_stat_unlock(); - dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static void end_io_acct(struct dm_io *io) @@ -480,9 +479,8 @@ static void end_io_acct(struct dm_io *io) * After this is decremented the bio must not be touched if it is * a barrier. */ - dm_disk(md)->part0.in_flight[rw] = pending = - atomic_dec_return(&md->pending[rw]); - pending += atomic_read(&md->pending[rw^0x1]); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); /* nudge anyone waiting on suspend queue */ if (!pending) @@ -1787,8 +1785,7 @@ static struct mapped_device *alloc_dev(int minor) if (!md->disk) goto bad_disk; - atomic_set(&md->pending[0], 0); - atomic_set(&md->pending[1], 0); + atomic_set(&md->pending, 0); init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); @@ -2091,8 +2088,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) break; } spin_unlock_irqrestore(q->queue_lock, flags); - } else if (!atomic_read(&md->pending[0]) && - !atomic_read(&md->pending[1])) + } else if (!atomic_read(&md->pending)) break; if (interruptible == TASK_INTERRUPTIBLE && diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0acbf4f5be50..8ca17a3e96ea 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,14 +32,6 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; -static int blktrans_discard_request(struct request_queue *q, - struct request *req) -{ - req->cmd_type = REQ_TYPE_LINUX_BLOCK; - req->cmd[0] = REQ_LB_OP_DISCARD; - return 0; -} - static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, buf = req->buffer; - if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && - req->cmd[0] == REQ_LB_OP_DISCARD) - return tr->discard(dev, block, nsect); - if (!blk_fs_request(req)) return -EIO; @@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, get_capacity(req->rq_disk)) return -EIO; + if (blk_discard_rq(req)) + return tr->discard(dev, block, nsect); + switch(rq_data_dir(req)) { case READ: for (; nsect > 0; nsect--, block++, buf += tr->blksize) @@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkcore_priv->rq->queuedata = tr; blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); if (tr->discard) - blk_queue_set_discard(tr->blkcore_priv->rq, - blktrans_discard_request); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + tr->blkcore_priv->rq); tr->blkshift = ffs(tr->blksize) - 1; diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c index ee1601026fb0..c24e4e0367a2 100644 --- a/drivers/staging/dst/dcore.c +++ b/drivers/staging/dst/dcore.c @@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio) struct dst_node *n = q->queuedata; int err = -EIO; - if (bio_empty_barrier(bio) && !q->prepare_discard_fn) { + if (bio_empty_barrier(bio) && !blk_queue_discard(q)) { /* * This is a dirty^Wnice hack, but if we complete this * operation with -EOPNOTSUPP like intended, XFS @@ -249,6 +249,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) mempool_free(p, bs->bio_pool); } +EXPORT_SYMBOL(bio_free); void bio_init(struct bio *bio) { @@ -257,6 +258,7 @@ void bio_init(struct bio *bio) bio->bi_comp_cpu = -1; atomic_set(&bio->bi_cnt, 1); } +EXPORT_SYMBOL(bio_init); /** * bio_alloc_bioset - allocate a bio for I/O @@ -311,6 +313,7 @@ err_free: mempool_free(p, bs->bio_pool); return NULL; } +EXPORT_SYMBOL(bio_alloc_bioset); static void bio_fs_destructor(struct bio *bio) { @@ -337,6 +340,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) return bio; } +EXPORT_SYMBOL(bio_alloc); static void bio_kmalloc_destructor(struct bio *bio) { @@ -380,6 +384,7 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) return bio; } +EXPORT_SYMBOL(bio_kmalloc); void zero_fill_bio(struct bio *bio) { @@ -416,6 +421,7 @@ void bio_put(struct bio *bio) bio->bi_destructor(bio); } } +EXPORT_SYMBOL(bio_put); inline int bio_phys_segments(struct request_queue *q, struct bio *bio) { @@ -424,6 +430,7 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) return bio->bi_phys_segments; } +EXPORT_SYMBOL(bio_phys_segments); /** * __bio_clone - clone a bio @@ -451,6 +458,7 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) bio->bi_size = bio_src->bi_size; bio->bi_idx = bio_src->bi_idx; } +EXPORT_SYMBOL(__bio_clone); /** * bio_clone - clone a bio @@ -482,6 +490,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) return b; } +EXPORT_SYMBOL(bio_clone); /** * bio_get_nr_vecs - return approx number of vecs @@ -505,6 +514,7 @@ int bio_get_nr_vecs(struct block_device *bdev) return nr_pages; } +EXPORT_SYMBOL(bio_get_nr_vecs); static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, @@ -635,6 +645,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, return __bio_add_page(q, bio, page, len, offset, queue_max_hw_sectors(q)); } +EXPORT_SYMBOL(bio_add_pc_page); /** * bio_add_page - attempt to add page to bio @@ -655,6 +666,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, struct request_queue *q = bdev_get_queue(bio->bi_bdev); return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q)); } +EXPORT_SYMBOL(bio_add_page); struct bio_map_data { struct bio_vec *iovecs; @@ -776,6 +788,7 @@ int bio_uncopy_user(struct bio *bio) bio_put(bio); return ret; } +EXPORT_SYMBOL(bio_uncopy_user); /** * bio_copy_user_iov - copy user data to bio @@ -920,6 +933,7 @@ struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); } +EXPORT_SYMBOL(bio_copy_user); static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, @@ -1050,6 +1064,7 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); } +EXPORT_SYMBOL(bio_map_user); /** * bio_map_user_iov - map user sg_iovec table into bio @@ -1117,13 +1132,13 @@ void bio_unmap_user(struct bio *bio) __bio_unmap_user(bio); bio_put(bio); } +EXPORT_SYMBOL(bio_unmap_user); static void bio_map_kern_endio(struct bio *bio, int err) { bio_put(bio); } - static struct bio *__bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) { @@ -1189,6 +1204,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, bio_put(bio); return ERR_PTR(-EINVAL); } +EXPORT_SYMBOL(bio_map_kern); static void bio_copy_kern_endio(struct bio *bio, int err) { @@ -1250,6 +1266,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, return bio; } +EXPORT_SYMBOL(bio_copy_kern); /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions @@ -1400,6 +1417,7 @@ void bio_endio(struct bio *bio, int error) if (bio->bi_end_io) bio->bi_end_io(bio, error); } +EXPORT_SYMBOL(bio_endio); void bio_pair_release(struct bio_pair *bp) { @@ -1410,6 +1428,7 @@ void bio_pair_release(struct bio_pair *bp) mempool_free(bp, bp->bio2.bi_private); } } +EXPORT_SYMBOL(bio_pair_release); static void bio_pair_end_1(struct bio *bi, int err) { @@ -1477,6 +1496,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) return bp; } +EXPORT_SYMBOL(bio_split); /** * bio_sector_offset - Find hardware sector offset in bio @@ -1547,6 +1567,7 @@ void bioset_free(struct bio_set *bs) kfree(bs); } +EXPORT_SYMBOL(bioset_free); /** * bioset_create - Create a bio_set @@ -1592,6 +1613,7 @@ bad: bioset_free(bs); return NULL; } +EXPORT_SYMBOL(bioset_create); static void __init biovec_init_slabs(void) { @@ -1636,29 +1658,4 @@ static int __init init_bio(void) return 0; } - subsys_initcall(init_bio); - -EXPORT_SYMBOL(bio_alloc); -EXPORT_SYMBOL(bio_kmalloc); -EXPORT_SYMBOL(bio_put); -EXPORT_SYMBOL(bio_free); -EXPORT_SYMBOL(bio_endio); -EXPORT_SYMBOL(bio_init); -EXPORT_SYMBOL(__bio_clone); -EXPORT_SYMBOL(bio_clone); -EXPORT_SYMBOL(bio_phys_segments); -EXPORT_SYMBOL(bio_add_page); -EXPORT_SYMBOL(bio_add_pc_page); -EXPORT_SYMBOL(bio_get_nr_vecs); -EXPORT_SYMBOL(bio_map_user); -EXPORT_SYMBOL(bio_unmap_user); -EXPORT_SYMBOL(bio_map_kern); -EXPORT_SYMBOL(bio_copy_kern); -EXPORT_SYMBOL(bio_pair_release); -EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_copy_user); -EXPORT_SYMBOL(bio_uncopy_user); -EXPORT_SYMBOL(bioset_create); -EXPORT_SYMBOL(bioset_free); -EXPORT_SYMBOL(bio_alloc_bioset); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7b685e10cbad..f38fee0311a7 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -248,19 +248,11 @@ ssize_t part_stat_show(struct device *dev, part_stat_read(p, merges[WRITE]), (unsigned long long)part_stat_read(p, sectors[WRITE]), jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), - part_in_flight(p), + p->in_flight, jiffies_to_msecs(part_stat_read(p, io_ticks)), jiffies_to_msecs(part_stat_read(p, time_in_queue))); } -ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); -} - #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -289,7 +281,6 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); -static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -301,7 +292,6 @@ static struct attribute *part_attrs[] = { &dev_attr_size.attr, &dev_attr_alignment_offset.attr, &dev_attr_stat.attr, - &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86cae5ac..25119041e034 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,7 +82,6 @@ enum rq_cmd_type_bits { enum { REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush request */ - REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* @@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); -typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -313,6 +311,7 @@ struct queue_limits { unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; + unsigned int max_discard_sectors; unsigned short logical_block_size; unsigned short max_hw_segments; @@ -340,7 +339,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; - prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -460,6 +458,7 @@ struct request_queue #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ +#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -591,6 +590,7 @@ enum { #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) +#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) @@ -929,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); +extern void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, @@ -955,7 +957,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); @@ -1080,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } +static inline int bdev_physical_block_size(struct block_device *bdev) +{ + return queue_physical_block_size(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_min(struct request_queue *q) { return q->limits.io_min; } +static inline int bdev_io_min(struct block_device *bdev) +{ + return queue_io_min(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_opt(struct request_queue *q) { return q->limits.io_opt; } +static inline int bdev_io_opt(struct block_device *bdev) +{ + return queue_io_opt(bdev_get_queue(bdev)); +} + static inline int queue_alignment_offset(struct request_queue *q) { - if (q && q->limits.misaligned) + if (q->limits.misaligned) return -1; - if (q && q->limits.alignment_offset) - return q->limits.alignment_offset; - - return 0; + return q->limits.alignment_offset; } static inline int queue_sector_alignment_offset(struct request_queue *q, @@ -1108,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q, & (q->limits.io_min - 1); } +static inline int bdev_alignment_offset(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->limits.misaligned) + return -1; + + if (bdev != bdev->bd_contains) + return bdev->bd_part->alignment_offset; + + return q->limits.alignment_offset; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; @@ -1146,7 +1172,11 @@ static inline void put_dev_sector(Sector p) } struct work_struct; +struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7e4350ece0f8..3b73b9992b26 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern void blk_trace_remove_sysfs(struct device *dev); extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_trace_remove_sysfs(dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index a1e6899d4b6c..2620a8c63571 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -300,6 +300,10 @@ struct inodes_stat_t { #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) #define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 297df45ffd0a..7beaa21b3880 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight[2]; + int in_flight; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part, int rw) +static inline void part_inc_in_flight(struct hd_struct *part) { - part->in_flight[rw]++; + part->in_flight++; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]++; + part_to_disk(part)->part0.in_flight++; } -static inline void part_dec_in_flight(struct hd_struct *part, int rw) +static inline void part_dec_in_flight(struct hd_struct *part) { - part->in_flight[rw]--; + part->in_flight--; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]--; -} - -static inline int part_in_flight(struct hd_struct *part) -{ - return part->in_flight[0] + part->in_flight[1]; + part_to_disk(part)->part0.in_flight--; } /* block/blk-core.c */ @@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d86af94691c2..00405b5f624a 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -488,6 +488,39 @@ TRACE_EVENT(block_remap, (unsigned long long)__entry->old_sector) ); +TRACE_EVENT(block_rq_remap, + + TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, + sector_t from), + + TP_ARGS(q, rq, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(rq->rq_disk); + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = blk_rq_sectors(rq); + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs_rq(__entry->rwbs, rq); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + (unsigned long long)__entry->old_sector) +); + #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3eb159c277c8..d9d6206e0b14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -856,6 +856,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, } /** + * blk_add_trace_rq_remap - Add a trace for a request-remap operation + * @q: queue the io is for + * @rq: the source request + * @dev: target device + * @from: source sector + * + * Description: + * Device mapper remaps request to other devices. + * Add a trace for that action. + * + **/ +static void blk_add_trace_rq_remap(struct request_queue *q, + struct request *rq, dev_t dev, + sector_t from) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); + r.sector_from = cpu_to_be64(from); + + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, + sizeof(r), &r); +} + +/** * blk_add_driver_data - Add binary message with driver-specific data * @q: queue the io is for * @rq: io request @@ -922,10 +953,13 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); + ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); + WARN_ON(ret); } static void blk_unregister_tracepoints(void) { + unregister_trace_block_rq_remap(blk_add_trace_rq_remap); unregister_trace_block_remap(blk_add_trace_remap); unregister_trace_block_split(blk_add_trace_split); unregister_trace_block_unplug_io(blk_add_trace_unplug_io); @@ -1657,6 +1691,11 @@ int blk_trace_init_sysfs(struct device *dev) return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); } +void blk_trace_remove_sysfs(struct device *dev) +{ + sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING diff --git a/mm/swapfile.c b/mm/swapfile.c index 4de7f02f820b..a1bc6b9af9a2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1974,12 +1974,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap; } - if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { - p->flags |= SWP_SOLIDSTATE; - p->cluster_next = 1 + (random32() % p->highest_bit); + if (p->bdev) { + if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { + p->flags |= SWP_SOLIDSTATE; + p->cluster_next = 1 + (random32() % p->highest_bit); + } + if (discard_swap(p) == 0) + p->flags |= SWP_DISCARDABLE; } - if (discard_swap(p) == 0) - p->flags |= SWP_DISCARDABLE; mutex_lock(&swapon_mutex); spin_lock(&swap_lock); |