diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 12:08:14 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 12:08:14 +0900 |
commit | 0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch) | |
tree | 177c4cb22ece78b18f64f548ae82b9a15edbb99c /kernel | |
parent | 2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff) | |
parent | e37459b8e2c7db6735e39e019e448b76e5e77647 (diff) | |
download | lwn-0910c0bdf7c291a41bc21e40a97389c9d4c1960d.tar.gz lwn-0910c0bdf7c291a41bc21e40a97389c9d4c1960d.zip |
Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Pull block IO core updates from Jens Axboe:
"This is the pull request for the core changes in the block layer for
3.13. It contains:
- The new blk-mq request interface.
This is a new and more scalable queueing model that marries the
best part of the request based interface we currently have (which
is fully featured, but scales poorly) and the bio based "interface"
which the new drivers for high IOPS devices end up using because
it's much faster than the request based one.
The bio interface has no block layer support, since it taps into
the stack much earlier. This means that drivers end up having to
implement a lot of functionality on their own, like tagging,
timeout handling, requeue, etc. The blk-mq interface provides all
these. Some drivers even provide a switch to select bio or rq and
has code to handle both, since things like merging only works in
the rq model and hence is faster for some workloads. This is a
huge mess. Conversion of these drivers nets us a substantial code
reduction. Initial results on converting SCSI to this model even
shows an 8x improvement on single queue devices. So while the
model was intended to work on the newer multiqueue devices, it has
substantial improvements for "classic" hardware as well. This code
has gone through extensive testing and development, it's now ready
to go. A pull request is coming to convert virtio-blk to this
model will be will be coming as well, with more drivers scheduled
for 3.14 conversion.
- Two blktrace fixes from Jan and Chen Gang.
- A plug merge fix from Alireza Haghdoost.
- Conversion of __get_cpu_var() from Christoph Lameter.
- Fix for sector_div() with 64-bit divider from Geert Uytterhoeven.
- A fix for a race between request completion and the timeout
handling from Jeff Moyer. This is what caused the merge conflict
with blk-mq/core, in case you are looking at that.
- A dm stacking fix from Mike Snitzer.
- A code consolidation fix and duplicated code removal from Kent
Overstreet.
- A handful of block bug fixes from Mikulas Patocka, fixing a loop
crash and memory corruption on blk cg.
- Elevator switch bug fix from Tomoki Sekiyama.
A heads-up that I had to rebase this branch. Initially the immutable
bio_vecs had been queued up for inclusion, but a week later, it became
clear that it wasn't fully cooked yet. So the decision was made to
pull this out and postpone it until 3.14. It was a straight forward
rebase, just pruning out the immutable series and the later fixes of
problems with it. The rest of the patches applied directly and no
further changes were made"
* 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits)
block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
block: Do not call sector_div() with a 64-bit divisor
kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup()
block: Consolidate duplicated bio_trim() implementations
block: Use rw_copy_check_uvector()
block: Enable sysfs nomerge control for I/O requests in the plug list
block: properly stack underlying max_segment_size to DM device
elevator: acquire q->sysfs_lock in elevator_change()
elevator: Fix a race in elevator switching and md device initialization
block: Replace __get_cpu_var uses
bdi: test bdi_init failure
block: fix a probe argument to blk_register_region
loop: fix crash if blk_alloc_queue fails
blk-core: Fix memory corruption if blkcg_init_queue fails
block: fix race between request completion and timeout handling
blktrace: Send BLK_TN_PROCESS events to all running traces
blk-mq: don't disallow request merges for req->special being set
blk-mq: mq plug list breakage
blk-mq: fix for flush deadlock
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/smp.c | 7 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 36 |
2 files changed, 34 insertions, 9 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index f5768b0c816a..46116100f0ee 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -18,6 +18,7 @@ #ifdef CONFIG_USE_GENERIC_SMP_HELPERS enum { CSD_FLAG_LOCK = 0x01, + CSD_FLAG_WAIT = 0x02, }; struct call_function_data { @@ -124,7 +125,7 @@ static void csd_lock(struct call_single_data *csd) static void csd_unlock(struct call_single_data *csd) { - WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); + WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: @@ -146,6 +147,9 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait) unsigned long flags; int ipi; + if (wait) + csd->flags |= CSD_FLAG_WAIT; + raw_spin_lock_irqsave(&dst->lock, flags); ipi = list_empty(&dst->list); list_add_tail(&csd->list, &dst->list); @@ -340,6 +344,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *csd, } put_cpu(); } +EXPORT_SYMBOL_GPL(__smp_call_function_single); /** * smp_call_function_many(): Run a function on a set of other CPUs. diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b8b8560bfb95..f785aef65799 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -26,6 +26,7 @@ #include <linux/export.h> #include <linux/time.h> #include <linux/uaccess.h> +#include <linux/list.h> #include <trace/events/block.h> @@ -38,6 +39,9 @@ static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; static bool blk_tracer_enabled __read_mostly; +static LIST_HEAD(running_trace_list); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); + /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -107,10 +111,18 @@ record_it: * Send out a notify for this process, if we haven't done so since a trace * started */ -static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) +static void trace_note_tsk(struct task_struct *tsk) { + unsigned long flags; + struct blk_trace *bt; + tsk->btrace_seq = blktrace_seq; - trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); + spin_lock_irqsave(&running_trace_lock, flags); + list_for_each_entry(bt, &running_trace_list, running_list) { + trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, + sizeof(tsk->comm)); + } + spin_unlock_irqrestore(&running_trace_lock, flags); } static void trace_note_time(struct blk_trace *bt) @@ -229,16 +241,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, goto record_it; } + if (unlikely(tsk->btrace_seq != blktrace_seq)) + trace_note_tsk(tsk); + /* * A word about the locking here - we disable interrupts to reserve * some space in the relay per-cpu buffer, to prevent an irq * from coming in and stepping on our toes. */ local_irq_save(flags); - - if (unlikely(tsk->btrace_seq != blktrace_seq)) - trace_note_tsk(bt, tsk); - t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); if (t) { sequence = per_cpu_ptr(bt->sequence, cpu); @@ -477,6 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->dir = dir; bt->dev = dev; atomic_set(&bt->dropped, 0); + INIT_LIST_HEAD(&bt->running_list); ret = -EIO; bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, @@ -567,13 +579,12 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, .end_lba = cbuts.end_lba, .pid = cbuts.pid, }; - memcpy(&buts.name, &cbuts.name, 32); ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; - if (copy_to_user(arg, &buts.name, 32)) { + if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { blk_trace_remove(q); return -EFAULT; } @@ -601,6 +612,9 @@ int blk_trace_startstop(struct request_queue *q, int start) blktrace_seq++; smp_mb(); bt->trace_state = Blktrace_running; + spin_lock_irq(&running_trace_lock); + list_add(&bt->running_list, &running_trace_list); + spin_unlock_irq(&running_trace_lock); trace_note_time(bt); ret = 0; @@ -608,6 +622,9 @@ int blk_trace_startstop(struct request_queue *q, int start) } else { if (bt->trace_state == Blktrace_running) { bt->trace_state = Blktrace_stopped; + spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + spin_unlock_irq(&running_trace_lock); relay_flush(bt->rchan); ret = 0; } @@ -1472,6 +1489,9 @@ static int blk_trace_remove_queue(struct request_queue *q) if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); + spin_lock_irq(&running_trace_lock); + list_del(&bt->running_list); + spin_unlock_irq(&running_trace_lock); blk_trace_free(bt); return 0; } |