diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-01 10:39:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-01 10:39:57 -0700 |
commit | 694752922b12bd318aa80191bd9d8c3dcfb39055 (patch) | |
tree | 5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /lib | |
parent | a351e9b9fc24e982ec2f0e76379a49826036da12 (diff) | |
parent | 9438b3e080beccf6022138ea62192d55cc7dc4ed (diff) | |
download | lwn-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.gz lwn-694752922b12bd318aa80191bd9d8c3dcfb39055.zip |
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
- Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ
was initially a fork of CFQ, but subsequently changed to implement
fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant
to be used on desktop type single drives, providing good fairness.
From Paolo.
- Add Kyber IO scheduler. This is a full multiqueue aware scheduler,
using a scalable token based algorithm that throttles IO based on
live completion IO stats, similary to blk-wbt. From Omar.
- A series from Jan, moving users to separately allocated backing
devices. This continues the work of separating backing device life
times, solving various problems with hot removal.
- A series of updates for lightnvm, mostly from Javier. Includes a
'pblk' target that exposes an open channel SSD as a physical block
device.
- A series of fixes and improvements for nbd from Josef.
- A series from Omar, removing queue sharing between devices on mostly
legacy drivers. This helps us clean up other bits, if we know that a
queue only has a single device backing. This has been overdue for
more than a decade.
- Fixes for the blk-stats, and improvements to unify the stats and user
windows. This both improves blk-wbt, and enables other users to
register a need to receive IO stats for a device. From Omar.
- blk-throttle improvements from Shaohua. This provides a scalable
framework for implementing scalable priotization - particularly for
blk-mq, but applicable to any type of block device. The interface is
marked experimental for now.
- Bucketized IO stats for IO polling from Stephen Bates. This improves
efficiency of polled workloads in the presence of mixed block size
IO.
- A few fixes for opal, from Scott.
- A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics.
From a variety of folks, mostly Sagi and James Smart.
- A series from Bart, improving our exposed info and capabilities from
the blk-mq debugfs support.
- A series from Christoph, cleaning up how handle WRITE_ZEROES.
- A series from Christoph, cleaning up the block layer handling of how
we track errors in a request. On top of being a nice cleanup, it also
shrinks the size of struct request a bit.
- Removal of mg_disk and hd (sorry Linus) by Christoph. The former was
never used by platforms, and the latter has outlived it's usefulness.
- Various little bug fixes and cleanups from a wide variety of folks.
* 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits)
block: hide badblocks attribute by default
blk-mq: unify hctx delay_work and run_work
block: add kblock_mod_delayed_work_on()
blk-mq: unify hctx delayed_run_work and run_work
nbd: fix use after free on module unload
MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler
blk-mq-sched: alloate reserved tags out of normal pool
mtip32xx: use runtime tag to initialize command header
scsi: Implement blk_mq_ops.show_rq()
blk-mq: Add blk_mq_ops.show_rq()
blk-mq: Show operation, cmd_flags and rq_flags names
blk-mq: Make blk_flags_show() callers append a newline character
blk-mq: Move the "state" debugfs attribute one level down
blk-mq: Unregister debugfs attributes earlier
blk-mq: Only unregister hctxs for which registration succeeded
blk-mq-debugfs: Rename functions for registering and unregistering the mq directory
blk-mq: Let blk_mq_debugfs_register() look up the queue name
blk-mq: Register <dev>/queue/mq after having registered <dev>/queue
ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset
ide-pm: always pass 0 error to __blk_end_request_all
..
Diffstat (limited to 'lib')
-rw-r--r-- | lib/kobject.c | 5 | ||||
-rw-r--r-- | lib/sbitmap.c | 75 |
2 files changed, 72 insertions, 8 deletions
diff --git a/lib/kobject.c b/lib/kobject.c index 445dcaeb0f56..763d70a18941 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -601,12 +601,15 @@ struct kobject *kobject_get(struct kobject *kobj) } EXPORT_SYMBOL(kobject_get); -static struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj) +struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj) { + if (!kobj) + return NULL; if (!kref_get_unless_zero(&kobj->kref)) kobj = NULL; return kobj; } +EXPORT_SYMBOL(kobject_get_unless_zero); /* * kobject_cleanup - free kobject resources. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 60e800e0b5a0..80aa8d5463fa 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -79,15 +79,15 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) } EXPORT_SYMBOL_GPL(sbitmap_resize); -static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, - bool wrap) +static int __sbitmap_get_word(unsigned long *word, unsigned long depth, + unsigned int hint, bool wrap) { unsigned int orig_hint = hint; int nr; while (1) { - nr = find_next_zero_bit(&word->word, word->depth, hint); - if (unlikely(nr >= word->depth)) { + nr = find_next_zero_bit(word, depth, hint); + if (unlikely(nr >= depth)) { /* * We started with an offset, and we didn't reset the * offset to 0 in a failure case, so start from 0 to @@ -100,11 +100,11 @@ static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, return -1; } - if (!test_and_set_bit(nr, &word->word)) + if (!test_and_set_bit(nr, word)) break; hint = nr + 1; - if (hint >= word->depth - 1) + if (hint >= depth - 1) hint = 0; } @@ -119,7 +119,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) index = SB_NR_TO_INDEX(sb, alloc_hint); for (i = 0; i < sb->map_nr; i++) { - nr = __sbitmap_get_word(&sb->map[index], + nr = __sbitmap_get_word(&sb->map[index].word, + sb->map[index].depth, SB_NR_TO_BIT(sb, alloc_hint), !round_robin); if (nr != -1) { @@ -141,6 +142,37 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) } EXPORT_SYMBOL_GPL(sbitmap_get); +int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, + unsigned long shallow_depth) +{ + unsigned int i, index; + int nr = -1; + + index = SB_NR_TO_INDEX(sb, alloc_hint); + + for (i = 0; i < sb->map_nr; i++) { + nr = __sbitmap_get_word(&sb->map[index].word, + min(sb->map[index].depth, shallow_depth), + SB_NR_TO_BIT(sb, alloc_hint), true); + if (nr != -1) { + nr += index << sb->shift; + break; + } + + /* Jump to next index. */ + index++; + alloc_hint = index << sb->shift; + + if (index >= sb->map_nr) { + index = 0; + alloc_hint = 0; + } + } + + return nr; +} +EXPORT_SYMBOL_GPL(sbitmap_get_shallow); + bool sbitmap_any_bit_set(const struct sbitmap *sb) { unsigned int i; @@ -342,6 +374,35 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq) } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); +int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth) +{ + unsigned int hint, depth; + int nr; + + hint = this_cpu_read(*sbq->alloc_hint); + depth = READ_ONCE(sbq->sb.depth); + if (unlikely(hint >= depth)) { + hint = depth ? prandom_u32() % depth : 0; + this_cpu_write(*sbq->alloc_hint, hint); + } + nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); + + if (nr == -1) { + /* If the map is full, a hint won't do us much good. */ + this_cpu_write(*sbq->alloc_hint, 0); + } else if (nr == hint || unlikely(sbq->round_robin)) { + /* Only update the hint if we used it. */ + hint = nr + 1; + if (hint >= depth - 1) + hint = 0; + this_cpu_write(*sbq->alloc_hint, hint); + } + + return nr; +} +EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); + static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; |