diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-06-03 21:18:13 +0200 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-06-30 09:23:41 +0200 |
commit | 829c60878626be290a4c248e8f1b86a0d5cbd38b (patch) | |
tree | 3774c1a3abb72bcc5ab0b37a23fdd9727ca188e4 /drivers/block | |
parent | 0cfdd247d1779d5ffc8f685b172a526ecdc6773f (diff) | |
download | lwn-829c60878626be290a4c248e8f1b86a0d5cbd38b.tar.gz lwn-829c60878626be290a4c248e8f1b86a0d5cbd38b.zip |
drbd: add missing spinlock to bitmap receive
During bitmap exchange, when using the RLE bitmap compression scheme,
we have a code path that can set the whole bitmap at once.
To avoid holding spin_lock_irq() for too long, we used to lock out other
bitmap modifications during bitmap exchange by other means, and then,
knowing we have exclusive access to the bitmap, modify it without
the spinlock, and with IRQs enabled.
Since we now allow local IO to continue, potentially setting additional
bits during the bitmap receive phase, this is no longer true, and we get
uncoordinated updates of bitmap members, causing bm_set to no longer
accurately reflect the total number of set bits.
To actually see this, you'd need to have a large bitmap, use RLE bitmap
compression, and have busy IO during sync handshake and bitmap exchange.
Fix this by taking the spin_lock_irq() in this code path as well, but
calling cond_resched_lock() after each page worth of bits processed.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 34 |
1 files changed, 19 insertions, 15 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index f440a02dfdb1..1e89a74ddb17 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -112,9 +112,6 @@ struct drbd_bitmap { struct task_struct *bm_task; }; -static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, - unsigned long e, int val, const enum km_type km); - #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) { @@ -1256,7 +1253,7 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f * expected to be called for only a few bits (e - s about BITS_PER_LONG). * Must hold bitmap lock already. */ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, - unsigned long e, int val, const enum km_type km) + unsigned long e, int val) { struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr = NULL; @@ -1274,14 +1271,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); if (page_nr != last_page_nr) { if (p_addr) - __bm_unmap(p_addr, km); + __bm_unmap(p_addr, KM_IRQ1); if (c < 0) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); else if (c > 0) bm_set_page_need_writeout(b->bm_pages[last_page_nr]); changed_total += c; c = 0; - p_addr = __bm_map_pidx(b, page_nr, km); + p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1); last_page_nr = page_nr; } if (val) @@ -1290,7 +1287,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); } if (p_addr) - __bm_unmap(p_addr, km); + __bm_unmap(p_addr, KM_IRQ1); if (c < 0) bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); else if (c > 0) @@ -1318,7 +1315,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) bm_print_lock_info(mdev); - c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); + c = __bm_change_bits_to(mdev, s, e, val); spin_unlock_irqrestore(&b->bm_lock, flags); return c; @@ -1343,16 +1340,17 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, { int i; int bits; - unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0); + unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); for (i = first_word; i < last_word; i++) { bits = hweight_long(paddr[i]); paddr[i] = ~0UL; b->bm_set += BITS_PER_LONG - bits; } - kunmap_atomic(paddr, KM_USER0); + kunmap_atomic(paddr, KM_IRQ1); } -/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. +/* Same thing as drbd_bm_set_bits, + * but more efficient for a large bit range. * You must first drbd_bm_lock(). * Can be called to set the whole bitmap in one go. * Sets bits from s to e _inclusive_. */ @@ -1366,6 +1364,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * Do not use memset, because we must account for changes, * so we need to loop over the words with hweight() anyways. */ + struct drbd_bitmap *b = mdev->bitmap; unsigned long sl = ALIGN(s,BITS_PER_LONG); unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); int first_page; @@ -1376,15 +1375,19 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi if (e - s <= 3*BITS_PER_LONG) { /* don't bother; el and sl may even be wrong. */ - __bm_change_bits_to(mdev, s, e, 1, KM_USER0); + spin_lock_irq(&b->bm_lock); + __bm_change_bits_to(mdev, s, e, 1); + spin_unlock_irq(&b->bm_lock); return; } /* difference is large enough that we can trust sl and el */ + spin_lock_irq(&b->bm_lock); + /* bits filling the current long */ if (sl) - __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0); + __bm_change_bits_to(mdev, s, sl-1, 1); first_page = sl >> (3 + PAGE_SHIFT); last_page = el >> (3 + PAGE_SHIFT); @@ -1397,7 +1400,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi /* first and full pages, unless first page == last page */ for (page_nr = first_page; page_nr < last_page; page_nr++) { bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); - cond_resched(); + cond_resched_lock(&b->bm_lock); first_word = 0; } @@ -1411,7 +1414,8 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * it would trigger an assert in __bm_change_bits_to() */ if (el <= e) - __bm_change_bits_to(mdev, el, e, 1, KM_USER0); + __bm_change_bits_to(mdev, el, e, 1); + spin_unlock_irq(&b->bm_lock); } /* returns bit state |