summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-06-03 21:18:13 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-06-30 09:23:41 +0200
commit829c60878626be290a4c248e8f1b86a0d5cbd38b (patch)
tree3774c1a3abb72bcc5ab0b37a23fdd9727ca188e4 /drivers
parent0cfdd247d1779d5ffc8f685b172a526ecdc6773f (diff)
downloadlwn-829c60878626be290a4c248e8f1b86a0d5cbd38b.tar.gz
lwn-829c60878626be290a4c248e8f1b86a0d5cbd38b.zip
drbd: add missing spinlock to bitmap receive
During bitmap exchange, when using the RLE bitmap compression scheme, we have a code path that can set the whole bitmap at once. To avoid holding spin_lock_irq() for too long, we used to lock out other bitmap modifications during bitmap exchange by other means, and then, knowing we have exclusive access to the bitmap, modify it without the spinlock, and with IRQs enabled. Since we now allow local IO to continue, potentially setting additional bits during the bitmap receive phase, this is no longer true, and we get uncoordinated updates of bitmap members, causing bm_set to no longer accurately reflect the total number of set bits. To actually see this, you'd need to have a large bitmap, use RLE bitmap compression, and have busy IO during sync handshake and bitmap exchange. Fix this by taking the spin_lock_irq() in this code path as well, but calling cond_resched_lock() after each page worth of bits processed. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/drbd/drbd_bitmap.c34
1 files changed, 19 insertions, 15 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index f440a02dfdb1..1e89a74ddb17 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -112,9 +112,6 @@ struct drbd_bitmap {
struct task_struct *bm_task;
};
-static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
- unsigned long e, int val, const enum km_type km);
-
#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
{
@@ -1256,7 +1253,7 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f
* expected to be called for only a few bits (e - s about BITS_PER_LONG).
* Must hold bitmap lock already. */
static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
- unsigned long e, int val, const enum km_type km)
+ unsigned long e, int val)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long *p_addr = NULL;
@@ -1274,14 +1271,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
if (page_nr != last_page_nr) {
if (p_addr)
- __bm_unmap(p_addr, km);
+ __bm_unmap(p_addr, KM_IRQ1);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
changed_total += c;
c = 0;
- p_addr = __bm_map_pidx(b, page_nr, km);
+ p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
last_page_nr = page_nr;
}
if (val)
@@ -1290,7 +1287,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
}
if (p_addr)
- __bm_unmap(p_addr, km);
+ __bm_unmap(p_addr, KM_IRQ1);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
@@ -1318,7 +1315,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
bm_print_lock_info(mdev);
- c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
+ c = __bm_change_bits_to(mdev, s, e, val);
spin_unlock_irqrestore(&b->bm_lock, flags);
return c;
@@ -1343,16 +1340,17 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
{
int i;
int bits;
- unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0);
+ unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
b->bm_set += BITS_PER_LONG - bits;
}
- kunmap_atomic(paddr, KM_USER0);
+ kunmap_atomic(paddr, KM_IRQ1);
}
-/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave.
+/* Same thing as drbd_bm_set_bits,
+ * but more efficient for a large bit range.
* You must first drbd_bm_lock().
* Can be called to set the whole bitmap in one go.
* Sets bits from s to e _inclusive_. */
@@ -1366,6 +1364,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* Do not use memset, because we must account for changes,
* so we need to loop over the words with hweight() anyways.
*/
+ struct drbd_bitmap *b = mdev->bitmap;
unsigned long sl = ALIGN(s,BITS_PER_LONG);
unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
int first_page;
@@ -1376,15 +1375,19 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
if (e - s <= 3*BITS_PER_LONG) {
/* don't bother; el and sl may even be wrong. */
- __bm_change_bits_to(mdev, s, e, 1, KM_USER0);
+ spin_lock_irq(&b->bm_lock);
+ __bm_change_bits_to(mdev, s, e, 1);
+ spin_unlock_irq(&b->bm_lock);
return;
}
/* difference is large enough that we can trust sl and el */
+ spin_lock_irq(&b->bm_lock);
+
/* bits filling the current long */
if (sl)
- __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0);
+ __bm_change_bits_to(mdev, s, sl-1, 1);
first_page = sl >> (3 + PAGE_SHIFT);
last_page = el >> (3 + PAGE_SHIFT);
@@ -1397,7 +1400,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
/* first and full pages, unless first page == last page */
for (page_nr = first_page; page_nr < last_page; page_nr++) {
bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
- cond_resched();
+ cond_resched_lock(&b->bm_lock);
first_word = 0;
}
@@ -1411,7 +1414,8 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* it would trigger an assert in __bm_change_bits_to()
*/
if (el <= e)
- __bm_change_bits_to(mdev, el, e, 1, KM_USER0);
+ __bm_change_bits_to(mdev, el, e, 1);
+ spin_unlock_irq(&b->bm_lock);
}
/* returns bit state