From 4b758df21ee7081ab41448d21d60367efaa625b3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 14:25:51 +0800 Subject: bcache: Fix leak of bdev reference If blkdev_get_by_path() in register_bcache() fails, we try to lookup the block device using lookup_bdev() to detect which situation we are in to properly report error. However we never drop the reference returned to us from lookup_bdev(). Fix that. Signed-off-by: Jan Kara Acked-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 974d832e54a6..c3fedd265d18 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } -- cgit v1.2.3 From c81ffa32a214c84b08900fbc9d432187bd948eba Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:52 +0800 Subject: bcache: fix sequential large write IO bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sequential write IOs were tested with bs=1M by FIO in writeback cache mode, these IOs were expected to be bypassed, but actually they did not. We debug the code, and find in check_should_bypass(): if (!congested && mode == CACHE_MODE_WRITEBACK && op_is_write(bio_op(bio)) && (bio->bi_opf & REQ_SYNC)) goto rescale that means, If in writeback mode, a write IO with REQ_SYNC flag will not be bypassed though it is a sequential large IO, It's not a correct thing to do actually, so this patch remove these codes. Signed-off-by: tang.junhui Reviewed-by: Kent Overstreet Reviewed-by: Eric Wheeler Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 0e1463d0c334..de97d86ddff4 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (!congested && !dc->sequential_cutoff) goto rescale; - if (!congested && - mode == CACHE_MODE_WRITEBACK && - op_is_write(bio->bi_opf) && - op_is_sync(bio->bi_opf)) - goto rescale; - spin_lock(&dc->io_lock); hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) -- cgit v1.2.3 From 69daf03adef5f7bc13e0ac86b4b8007df1767aab Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:53 +0800 Subject: bcache: do not subtract sectors_to_gc for bypassed IO Since bypassed IOs use no bucket, so do not subtract sectors_to_gc to trigger gc thread. Signed-off-by: tang.junhui Acked-by: Coly Li Reviewed-by: Eric Wheeler Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index de97d86ddff4..681b4f12b05a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. -- cgit v1.2.3 From 09b3efec81def807fb225359e34a8e72866dd9c4 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 6 Sep 2017 14:25:54 +0800 Subject: bcache: Don't reinvent the wheel but use existing llist API Although llist provides proper APIs, they are not used. Make them used. Signed-off-by: Byungchul Park Acked-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 864e673aec39..7d5286b05036 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -70,21 +70,10 @@ void __closure_wake_up(struct closure_waitlist *wait_list) list = llist_del_all(&wait_list->list); /* We first reverse the list to preserve FIFO ordering and fairness */ - - while (list) { - struct llist_node *t = list; - list = llist_next(list); - - t->next = reverse; - reverse = t; - } + reverse = llist_reverse_order(list); /* Then do the wakeups */ - - while (reverse) { - cl = container_of(reverse, struct closure, list); - reverse = llist_next(reverse); - + llist_for_each_entry(cl, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } -- cgit v1.2.3 From 0b43f49dc4d6d3789e936731dc16af94cb57d568 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:55 +0800 Subject: bcache: gc does not work when triggering by manual command I try to execute the following command to trigger gc thread: [root@localhost internal]# echo 1 > trigger_gc But it does not work, I debug the code in gc_should_run(), It works only if in invalidating or sectors_to_gc < 0. So set sectors_to_gc to -1 to meet the condition when we trigger gc by manual command. (Code comments aded by Coly Li) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index f90f13616980..09295c07ea88 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -615,8 +615,21 @@ STORE(__bch_cache_set) bch_cache_accounting_clear(&c->accounting); } - if (attr == &sysfs_trigger_gc) + if (attr == &sysfs_trigger_gc) { + /* + * Garbage collection thread only works when sectors_to_gc < 0, + * when users write to sysfs entry trigger_gc, most of time + * they want to forcibly triger gargage collection. Here -1 is + * set to c->sectors_to_gc, to make gc_should_run() give a + * chance to permit gc thread to run. "give a chance" means + * before going into gc_should_run(), there is still chance + * that c->sectors_to_gc being set to other positive value. So + * writing sysfs entry trigger_gc won't always make sure gc + * thread takes effect. + */ + atomic_set(&c->sectors_to_gc, -1); wake_up_gc(c); + } if (attr == &sysfs_prune_cache) { struct shrink_control sc; -- cgit v1.2.3 From a8394090a9129b40f9d90dcb7f4a49d60c727ca6 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:56 +0800 Subject: bcache: correct cache_dirty_target in __update_writeback_rate() __update_write_rate() uses a Proportion-Differentiation Controller algorithm to control writeback rate. A dirty target number is used in this PD controller to control writeback rate. A larger target number will make the writeback rate smaller, on the versus, a smaller target number will make the writeback rate larger. bcache uses the following steps to calculate the target number, 1) cache_sectors = all-buckets-of-cache-set * buckets-size 2) cache_dirty_target = cache_sectors * cached-device-writeback_percent 3) target = cache_dirty_target * (sectors-of-cached-device/sectors-of-all-cached-devices-of-this-cache-set) The calculation at step 1) for cache_sectors is incorrect, which does not consider dirty blocks occupied by flash only volume. A flash only volume can be took as a bcache device without cached device. All data sectors allocated for it are persistent on cache device and marked dirty, they are not touched by bcache writeback and garbage collection code. So data blocks of flash only volume should be ignore when calculating cache_sectors of cache set. Current code does not subtract dirty sectors of flash only volume, which results a larger target number from the above 3 steps. And in sequence the cache device's writeback rate is smaller then a correct value, writeback speed is slower on all cached devices. This patch fixes the incorrect slower writeback rate by subtracting dirty sectors of flash only volumes in __update_writeback_rate(). (Commit log composed by Coly Li to pass checkpatch.pl checking) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 3 ++- drivers/md/bcache/writeback.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c49022a8dc9d..5abe6472b66b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -21,7 +21,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 629bd1a502fd..8789b9c8c484 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { -- cgit v1.2.3 From 77fa100f27475d08a569b9d51c17722130f089e7 Mon Sep 17 00:00:00 2001 From: Tony Asleson Date: Wed, 6 Sep 2017 14:25:57 +0800 Subject: bcache: Correct return value for sysfs attach errors If you encounter any errors in bch_cached_dev_attach it will return a negative error code. The variable 'v' which stores the result is unsigned, thus user space sees a very large value returned for bytes written which can cause incorrect user space behavior. Utilize 1 signed variable to use throughout the function to preserve error return capability. Signed-off-by: Tony Asleson Acked-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 09295c07ea88..104c57cd666c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -192,7 +192,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -227,7 +227,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; -- cgit v1.2.3 From 89b1fc54c257104df007c5888e3705e52b973d45 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:58 +0800 Subject: bcache: increase the number of open buckets In currently, we only alloc 6 open buckets for each cache set, but in usually, we always attach about 10 or so backend devices for each cache set, and the each bcache device are always accessed by about 10 or so threads in top application layer. So 6 open buckets are too few, It has led to that each of the same thread write data to different buckets, which would cause low efficiency write-back, and also cause buckets inefficient, and would be Very easy to run out of. I add debug message in bch_open_buckets_alloc() to print alloc bucket info, and test with ten bcache devices with a cache set, and each bcache device is accessed by ten threads. From the debug message, we can see that, after the modification, One bucket is more likely to assign to the same thread, and the data from the same thread are more likely to write the same bucket. Usually the same thread always write/read the same backend device, so it is good for write-back and also promote the usage efficiency of buckets. Signed-off-by: Tang Junhui Reviewed-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ca4abe1ccd8d..cacbe2dbd5c3 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -68,6 +68,8 @@ #include #include +#define MAX_OPEN_BUCKETS 128 + /* Bucket heap / gen */ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) @@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c) spin_lock_init(&c->data_bucket_lock); - for (i = 0; i < 6; i++) { + for (i = 0; i < MAX_OPEN_BUCKETS; i++) { struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); if (!b) return -ENOMEM; -- cgit v1.2.3 From 9baf30972b5568d8b5bc8b3c46a6ec5b58100463 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Wed, 6 Sep 2017 14:25:59 +0800 Subject: bcache: fix for gc and write-back race gc and write-back get raced (see the email "bcache get stucked" I sended before): gc thread write-back thread | |bch_writeback_thread() |bch_gc_thread() | | |==>read_dirty() |==>bch_btree_gc() | |==>btree_root() //get btree root | | //node write locker | |==>bch_btree_gc_root() | | |==>read_dirty_submit() | |==>write_dirty() | |==>continue_at(cl, | | write_dirty_finish, | | system_wq); | |==>write_dirty_finish()//excute | | //in system_wq | |==>bch_btree_insert() | |==>bch_btree_map_leaf_nodes() | |==>__bch_btree_map_nodes() | |==>btree_root //try to get btree | | //root node read | | //lock | |-----stuck here |==>bch_btree_set_root() |==>bch_journal_meta() |==>bch_journal() |==>journal_try_write() |==>journal_write_unlocked() //journal_full(&c->journal) | //condition satisfied |==>continue_at(cl, journal_write, system_wq); //try to excute | //journal_write in system_wq | //but work queue is excuting | //write_dirty_finish() |==>closure_sync(); //wait journal_write execute | //over and wake up gc, |-------------stuck here |==>release root node write locker This patch alloc a separate work-queue for write-back thread to avoid such race. (Commit log re-organized by Coly Li to pass checkpatch.pl checking) Signed-off-by: Tang Junhui Acked-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 2 ++ drivers/md/bcache/writeback.c | 9 +++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dee542fff68e..2ed9bd231d84 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c3fedd265d18..3b724fa2b68d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 5abe6472b66b..34131439e28c 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -187,7 +187,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -207,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -516,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) -- cgit v1.2.3 From da22f0eea555baf9b0a84b52afe56db2052cfe8d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Sep 2017 14:26:00 +0800 Subject: bcache: silence static checker warning In olden times, closure_return() used to have a hidden return built in. We removed the hidden return but forgot to add a new return here. If "c" were NULL we would oops on the next line, but fortunately "c" is never NULL. Let's just remove the if statement. Signed-off-by: Dan Carpenter Reviewed-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3b724fa2b68d..1ae63374366c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1376,9 +1376,6 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; - if (!c) - closure_return(cl); - bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); -- cgit v1.2.3 From 7b6a8570e02a20d0b6cadb9689e4b2e6217c390c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Sep 2017 14:26:01 +0800 Subject: bcache: Update continue_at() documentation continue_at() doesn't have a return statement anymore. Signed-off-by: Dan Carpenter Acked-by: Coly Li Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 1ec84ca81146..295b7e43f92c 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) * been dropped with closure_put()), it will resume execution at @fn running out * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). * - * NOTE: This macro expands to a return in the calling function! - * * This is because after calling continue_at() you no longer have a ref on @cl, * and whatever @cl owns may be freed out from under you - a running closure fn * has a ref on its own closure which continue_at() drops. @@ -340,8 +338,6 @@ do { \ * Causes @fn to be executed out of @cl, in @wq context (or called directly if * @wq is NULL). * - * NOTE: like continue_at(), this macro expands to a return in the caller! - * * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, * thus it's not safe to touch anything protected by @cl after a * continue_at_nobarrier(). -- cgit v1.2.3 From 9276717b9e297a62d1151a43d1cd286213f68eb7 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Wed, 6 Sep 2017 14:26:02 +0800 Subject: bcache: fix bch_hprint crash and improve output Most importantly, solve a crash where %llu was used to format signed numbers. This would cause a buffer overflow when reading sysfs writeback_rate_debug, as only 20 bytes were allocated for this and %llu writes 20 characters plus a null. Always use the units mechanism rather than having different output paths for simplicity. Also, correct problems with display output where 1.10 was a larger number than 1.09, by multiplying by 10 and then dividing by 1024 instead of dividing by 100. (Remainders of >= 1000 would print as .10). Minor changes: Always display the decimal point instead of trying to omit it based on number of digits shown. Decide what units to use based on 1000 as a threshold, not 1024 (in other words, always print at most 3 digits before the decimal point). Signed-off-by: Michael Lyle Reported-by: Dmitry Yu Okunev Acked-by: Kent Overstreet Reviewed-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/util.c | 50 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 8c3a938f4bf0..176d3c2ef5f5 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; - - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } - - if (!u) - return sprintf(buf, "%llu", v); - - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); - - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + int u = 0, t; + + uint64_t q; + + if (v < 0) + q = -v; + else + q = v; + + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; + + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], -- cgit v1.2.3 From 175206cf9ab63161dec74d9cd7f9992e062491f5 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Thu, 7 Sep 2017 01:28:53 +0800 Subject: bcache: initialize dirty stripes in flash_dev_run() bcache uses a Proportion-Differentiation Controller algorithm to control writeback rate to cached devices. In the PD controller algorithm, dirty stripes of thin flash device should not be counted in, because flash only volumes never write back dirty data. Currently dirty stripe counter for thin flash device is not initialized when the thin flash device starts. Which means the following calculation in PD controller will reference an undefined dirty stripes number, and all cached devices attached to the same cache set where the thin flash device lies on may have an inaccurate writeback rate. This patch calles bch_sectors_dirty_init() in flash_dev_run(), to correctly initialize dirty stripe counter when the thin flash device starts to run. This patch also does following parameter data type change, -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); to call this function conveniently in flash_dev_run(). (Commit log is composed by Coly Li) Signed-off-by: Tang Junhui Reviewed-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 ++- drivers/md/bcache/writeback.c | 8 ++++---- drivers/md/bcache/writeback.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1ae63374366c..fc0a31b13ac4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1230,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 34131439e28c..e663ca082183 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -482,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 8789b9c8c484..e35421d20d2e 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -103,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); -- cgit v1.2.3