diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-07 08:38:33 +1100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-07 08:38:33 +1100 |
commit | 2110cf029a67237db572299bb51e0de9e3e3d4dd (patch) | |
tree | 6837ff139a807037f06952c3eb72c3e350bc05c8 | |
parent | 1589a3e7777631ff56dd58cd7dcdf275185e62b5 (diff) | |
parent | 1383923d1985cef2bceb8128094fbe5e05de7435 (diff) | |
download | lwn-2110cf029a67237db572299bb51e0de9e3e3d4dd.tar.gz lwn-2110cf029a67237db572299bb51e0de9e3e3d4dd.zip |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"I've got a few bits pending for 3.8 final, that I better get sent out.
It's all been sitting for a while, I consider it safe.
It contains:
- Two bug fixes for mtip32xx, fixing a driver hang and a crash.
- A few-liner protocol error fix for drbd.
- A few fixes for the xen block front/back driver, fixing a potential
data corruption issue.
- A race fix for disk_clear_events(), causing spurious warnings. Out
of the Chrome OS base.
- A deadlock fix for disk_clear_events(), moving it to the a
unfreezable workqueue. Also from the Chrome OS base."
* 'for-linus' of git://git.kernel.dk/linux-block:
drbd: fix potential protocol error and resulting disconnect/reconnect
mtip32xx: fix for crash when the device surprise removed during rebuild
mtip32xx: fix for driver hang after a command timeout
block: prevent race/cleanup
block: remove deadlock in disk_clear_events
xen-blkfront: handle bvecs with partial data
llist/xen-blkfront: implement safe version of llist_for_each_entry
xen-blkback: implement safe iterator for the list of persistent grants
-rw-r--r-- | block/genhd.c | 42 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 7 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 24 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 18 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 10 | ||||
-rw-r--r-- | include/linux/llist.h | 25 |
8 files changed, 101 insertions, 28 deletions
diff --git a/block/genhd.c b/block/genhd.c index 9a289d7c84bb..3993ebf4135f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -35,6 +35,8 @@ static DEFINE_IDR(ext_devt_idr); static struct device_type disk_type; +static void disk_check_events(struct disk_events *ev, + unsigned int *clearing_ptr); static void disk_alloc_events(struct gendisk *disk); static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); @@ -1549,6 +1551,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) const struct block_device_operations *bdops = disk->fops; struct disk_events *ev = disk->ev; unsigned int pending; + unsigned int clearing = mask; if (!ev) { /* for drivers still using the old ->media_changed method */ @@ -1558,34 +1561,53 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) return 0; } - /* tell the workfn about the events being cleared */ + disk_block_events(disk); + + /* + * store the union of mask and ev->clearing on the stack so that the + * race with disk_flush_events does not cause ambiguity (ev->clearing + * can still be modified even if events are blocked). + */ spin_lock_irq(&ev->lock); - ev->clearing |= mask; + clearing |= ev->clearing; + ev->clearing = 0; spin_unlock_irq(&ev->lock); - /* uncondtionally schedule event check and wait for it to finish */ - disk_block_events(disk); - queue_delayed_work(system_freezable_wq, &ev->dwork, 0); - flush_delayed_work(&ev->dwork); - __disk_unblock_events(disk, false); + disk_check_events(ev, &clearing); + /* + * if ev->clearing is not 0, the disk_flush_events got called in the + * middle of this function, so we want to run the workfn without delay. + */ + __disk_unblock_events(disk, ev->clearing ? true : false); /* then, fetch and clear pending events */ spin_lock_irq(&ev->lock); - WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ pending = ev->pending & mask; ev->pending &= ~mask; spin_unlock_irq(&ev->lock); + WARN_ON_ONCE(clearing & mask); return pending; } +/* + * Separate this part out so that a different pointer for clearing_ptr can be + * passed in for disk_clear_events. + */ static void disk_events_workfn(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct disk_events *ev = container_of(dwork, struct disk_events, dwork); + + disk_check_events(ev, &ev->clearing); +} + +static void disk_check_events(struct disk_events *ev, + unsigned int *clearing_ptr) +{ struct gendisk *disk = ev->disk; char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; - unsigned int clearing = ev->clearing; + unsigned int clearing = *clearing_ptr; unsigned int events; unsigned long intv; int nr_events = 0, i; @@ -1598,7 +1620,7 @@ static void disk_events_workfn(struct work_struct *work) events &= ~ev->pending; ev->pending |= events; - ev->clearing &= ~clearing; + *clearing_ptr &= ~clearing; intv = disk_events_poll_jiffies(disk); if (!ev->block && intv) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f58a4a4b4dfb..2b8303ad63c9 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -168,7 +168,7 @@ static void wake_all_senders(struct drbd_tconn *tconn) { } /* must hold resource->req_lock */ -static void start_new_tl_epoch(struct drbd_tconn *tconn) +void start_new_tl_epoch(struct drbd_tconn *tconn) { /* no point closing an epoch, if it is empty, anyways. */ if (tconn->current_tle_writes == 0) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 016de6b8bb57..c08d22964d06 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -267,6 +267,7 @@ struct bio_and_error { int error; }; +extern void start_new_tl_epoch(struct drbd_tconn *tconn); extern void drbd_req_destroy(struct kref *kref); extern void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 53bf6182bac4..0fe220cfb9e9 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -931,6 +931,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, enum drbd_state_rv rv = SS_SUCCESS; enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; + bool did_remote, should_do_remote; os = drbd_read_state(mdev); @@ -981,11 +982,17 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) atomic_inc(&mdev->local_cnt); + did_remote = drbd_should_do_remote(mdev->state); mdev->state.i = ns.i; + should_do_remote = drbd_should_do_remote(mdev->state); mdev->tconn->susp = ns.susp; mdev->tconn->susp_nod = ns.susp_nod; mdev->tconn->susp_fen = ns.susp_fen; + /* put replicated vs not-replicated requests in seperate epochs */ + if (did_remote != should_do_remote) + start_new_tl_epoch(mdev->tconn); + if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(mdev, "attached to UUIDs"); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 9694dd99bbbc..3fd100990453 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -626,12 +626,13 @@ static void mtip_timeout_function(unsigned long int data) } } - if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + if (cmdto_cnt) { print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); - - mtip_restart_port(port); + if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + mtip_restart_port(port); + wake_up_interruptible(&port->svc_wait); + } clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); } if (port->ic_pause_timer) { @@ -3887,7 +3888,12 @@ static int mtip_block_remove(struct driver_data *dd) * Delete our gendisk structure. This also removes the device * from /dev */ - del_gendisk(dd->disk); + if (dd->disk) { + if (dd->disk->queue) + del_gendisk(dd->disk); + else + put_disk(dd->disk); + } spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, dd->index); @@ -3921,7 +3927,13 @@ static int mtip_block_shutdown(struct driver_data *dd) "Shutting down %s ...\n", dd->disk->disk_name); /* Delete our gendisk structure, and cleanup the blk queue. */ - del_gendisk(dd->disk); + if (dd->disk) { + if (dd->disk->queue) + del_gendisk(dd->disk); + else + put_disk(dd->disk); + } + spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, dd->index); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 74374fb762aa..5ac841ff6cc7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -161,10 +161,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); -#define foreach_grant(pos, rbtree, node) \ - for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ +#define foreach_grant_safe(pos, n, rbtree, node) \ + for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \ + (n) = rb_next(&(pos)->node); \ &(pos)->node != NULL; \ - (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) + (pos) = container_of(n, typeof(*(pos)), node), \ + (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) static void add_persistent_gnt(struct rb_root *root, @@ -217,10 +219,11 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt; + struct rb_node *n; int ret = 0; int segs_to_unmap = 0; - foreach_grant(persistent_gnt, root, node) { + foreach_grant_safe(persistent_gnt, n, root, node) { BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); gnttab_set_unmap_op(&unmap[segs_to_unmap], @@ -230,9 +233,6 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) persistent_gnt->handle); pages[segs_to_unmap] = persistent_gnt->page; - rb_erase(&persistent_gnt->node, root); - kfree(persistent_gnt); - num--; if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { @@ -241,6 +241,10 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) BUG_ON(ret); segs_to_unmap = 0; } + + rb_erase(&persistent_gnt->node, root); + kfree(persistent_gnt); + num--; } BUG_ON(num != 0); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 96e9b00db081..11043c18ac5a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; struct grant *persistent_gnt; + struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); @@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry(persistent_gnt, all_gnts, node) { + llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); kfree(persistent_gnt); @@ -835,7 +836,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, struct blkif_response *bret) { - int i; + int i = 0; struct bio_vec *bvec; struct req_iterator iter; unsigned long flags; @@ -852,7 +853,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, */ rq_for_each_segment(bvec, s->request, iter) { BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); - i = offset >> PAGE_SHIFT; + if (bvec->bv_offset < offset) + i++; BUG_ON(i >= s->req.u.rw.nr_segments); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); @@ -861,7 +863,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, bvec->bv_len); bvec_kunmap_irq(bvec_data, &flags); kunmap_atomic(shared_data); - offset += bvec->bv_len; + offset = bvec->bv_offset + bvec->bv_len; } } /* Add the persistent grant into the list of free grants */ diff --git a/include/linux/llist.h b/include/linux/llist.h index a5199f6d0e82..d0ab98f73d38 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -125,6 +125,31 @@ static inline void init_llist_head(struct llist_head *list) (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** + * llist_for_each_entry_safe - iterate safely against remove over some entries + * of lock-less list of given type. + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as a temporary storage. + * @node: the fist entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. This variant allows removal of entries + * as we iterate. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry_safe(pos, n, node, member) \ + for ((pos) = llist_entry((node), typeof(*(pos)), member), \ + (n) = (pos)->member.next; \ + &(pos)->member != NULL; \ + (pos) = llist_entry(n, typeof(*(pos)), member), \ + (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) + +/** * llist_empty - tests whether a lock-less list is empty * @head: the list to test * |