summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 10:33:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 10:33:36 -0700
commit096a705bbc080a4041636d07514560da8d78acbe (patch)
tree38c3c01225709ffa53419083ea6332f8a72610de
parentfea80311a939a746533a6d7e7c3183729d6a3faf (diff)
parent5757a6d76cdf6dda2a492c09b985c015e86779b1 (diff)
downloadlwn-096a705bbc080a4041636d07514560da8d78acbe.tar.gz
lwn-096a705bbc080a4041636d07514560da8d78acbe.zip
Merge branch 'for-3.1/core' of git://git.kernel.dk/linux-block
* 'for-3.1/core' of git://git.kernel.dk/linux-block: (24 commits) block: strict rq_affinity backing-dev: use synchronize_rcu_expedited instead of synchronize_rcu block: fix patch import error in max_discard_sectors check block: reorder request_queue to remove 64 bit alignment padding CFQ: add think time check for group CFQ: add think time check for service tree CFQ: move think time check variables to a separate struct fixlet: Remove fs_excl from struct task. cfq: Remove special treatment for metadata rqs. block: document blk_plug list access block: avoid building too big plug list compat_ioctl: fix make headers_check regression block: eliminate potential for infinite loop in blkdev_issue_discard compat_ioctl: fix warning caused by qemu block: flush MEDIA_CHANGE from drivers on close(2) blk-throttle: Make total_nr_queued unsigned block: Add __attribute__((format(printf...) and fix fallout fs/partitions/check.c: make local symbols static block:remove some spare spaces in genhd.c block:fix the comment error in blkdev.h ...
-rw-r--r--Documentation/block/queue-sysfs.txt10
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-ioc.c40
-rw-r--r--block/blk-lib.c5
-rw-r--r--block/blk-softirq.c11
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/cfq-iosched.c152
-rw-r--r--block/compat_ioctl.c14
-rw-r--r--block/deadline-iosched.c4
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c28
-rw-r--r--fs/block_dev.c23
-rw-r--r--fs/compat_ioctl.c5
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/reiserfs/journal.c13
-rw-r--r--fs/super.c4
-rw-r--r--include/linux/blkdev.h27
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/fd.h22
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/iocontext.h14
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/backing-dev.c2
28 files changed, 229 insertions, 208 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index f65274081c8d..d8147b336c35 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -45,9 +45,13 @@ device.
rq_affinity (RW)
----------------
-If this option is enabled, the block layer will migrate request completions
-to the CPU that originally submitted the request. For some workloads
-this provides a significant reduction in CPU cycles due to caching effects.
+If this option is '1', the block layer will migrate request completions to the
+cpu "group" that originally submitted the request. For some workloads this
+provides a significant reduction in CPU cycles due to caching effects.
+
+For storage configurations that need to maximize distribution of completion
+processing setting this option to '2' forces the completion to run on the
+requesting cpu (bypassing the "group" aggregation logic).
scheduler (RW)
--------------
diff --git a/block/blk-core.c b/block/blk-core.c
index 1d49e1c7c905..f8cb09951830 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1282,10 +1282,8 @@ get_rq:
init_request_from_bio(req, bio);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
- bio_flagged(bio, BIO_CPU_AFFINE)) {
- req->cpu = blk_cpu_to_group(get_cpu());
- put_cpu();
- }
+ bio_flagged(bio, BIO_CPU_AFFINE))
+ req->cpu = smp_processor_id();
plug = current->plug;
if (plug) {
@@ -1305,7 +1303,10 @@ get_rq:
plug->should_sort = 1;
}
list_add_tail(&req->queuelist, &plug->list);
+ plug->count++;
drive_stat_acct(req, 1);
+ if (plug->count >= BLK_MAX_REQUEST_COUNT)
+ blk_flush_plug_list(plug, false);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
@@ -2629,6 +2630,7 @@ void blk_start_plug(struct blk_plug *plug)
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->cb_list);
plug->should_sort = 0;
+ plug->count = 0;
/*
* If this is a nested plug, don't actually assign it. It will be
@@ -2712,6 +2714,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
return;
list_splice_init(&plug->list, &list);
+ plug->count = 0;
if (plug->should_sort) {
list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 342eae9b0d3c..6f9bbd978653 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
- struct io_context *ret;
+ struct io_context *ioc;
- ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_long_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = 0; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
+ ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
+ if (ioc) {
+ atomic_long_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = 0; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
- ret->cgroup_changed = 0;
+ ioc->cgroup_changed = 0;
#endif
}
- return ret;
+ return ioc;
}
/*
@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
*/
struct io_context *get_io_context(gfp_t gfp_flags, int node)
{
- struct io_context *ret = NULL;
+ struct io_context *ioc = NULL;
/*
* Check for unlikely race with exiting task. ioc ref count is
* zero when ioc is being detached.
*/
do {
- ret = current_io_context(gfp_flags, node);
- if (unlikely(!ret))
+ ioc = current_io_context(gfp_flags, node);
+ if (unlikely(!ioc))
break;
- } while (!atomic_long_inc_not_zero(&ret->refcount));
+ } while (!atomic_long_inc_not_zero(&ioc->refcount));
- return ret;
+ return ioc;
}
EXPORT_SYMBOL(get_io_context);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 78e627e2581d..2b461b496a78 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* granularity
*/
max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
- if (q->limits.discard_granularity) {
+ if (unlikely(!max_discard_sectors)) {
+ /* Avoid infinite loop below. Being cautious never hurts. */
+ return -EOPNOTSUPP;
+ } else if (q->limits.discard_granularity) {
unsigned int disc_sects = q->limits.discard_granularity >> 9;
max_discard_sectors &= ~(disc_sects - 1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c21602228..475fab809a80 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
void __blk_complete_request(struct request *req)
{
+ int ccpu, cpu, group_cpu = NR_CPUS;
struct request_queue *q = req->q;
unsigned long flags;
- int ccpu, cpu, group_cpu;
BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
cpu = smp_processor_id();
- group_cpu = blk_cpu_to_group(cpu);
/*
* Select completion CPU
*/
- if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
ccpu = req->cpu;
- else
+ if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
+ ccpu = blk_cpu_to_group(ccpu);
+ group_cpu = blk_cpu_to_group(cpu);
+ }
+ } else
ccpu = cpu;
if (ccpu == cpu || ccpu == group_cpu) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d935bd859c87..0ee17b5e7fb6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+ bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
- return queue_var_show(set, page);
+ return queue_var_show(set << force, page);
}
static ssize_t
@@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
- if (val)
+ if (val) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
- else
- queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ if (val == 2)
+ queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+ } else {
+ queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ }
spin_unlock_irq(q->queue_lock);
#endif
return ret;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3689f833afdc..f6a794120505 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
return NULL;
}
-static inline int total_nr_queued(struct throtl_data *td)
+static inline unsigned int total_nr_queued(struct throtl_data *td)
{
- return (td->nr_queued[0] + td->nr_queued[1]);
+ return td->nr_queued[0] + td->nr_queued[1];
}
static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
bio_list_init(&bio_list_on_stack);
- throtl_log(td, "dispatch nr_queued=%d read=%u write=%u",
+ throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
total_nr_queued(td), td->nr_queued[READ],
td->nr_queued[WRITE]);
@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
struct delayed_work *dwork = &td->throtl_work;
/* schedule work if limits changed even if no bio is queued */
- if (total_nr_queued(td) > 0 || td->limits_changed) {
+ if (total_nr_queued(td) || td->limits_changed) {
/*
* We might have a work scheduled to be executed in future.
* Cancel that and schedule a new one.
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae21919f15e1..1f96ad6254f1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
+ struct cfq_ttime ttime;
};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
- .count = 0, .min_vdisktime = 0, }
+#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
+ .ttime = {.last_end_request = jiffies,},}
/*
* Per process-grouping structure
@@ -129,14 +130,12 @@ struct cfq_queue {
unsigned long slice_end;
long slice_resid;
- /* pending metadata requests */
- int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
/* io prio of this group */
unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class, org_ioprio_class;
+ unsigned short ioprio_class;
pid_t pid;
@@ -212,6 +211,7 @@ struct cfq_group {
#endif
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
+ struct cfq_ttime ttime;
};
/*
@@ -393,6 +393,18 @@ CFQ_CFQQ_FNS(wait_busy);
j++, st = i < IDLE_WORKLOAD ? \
&cfqg->service_trees[i][j]: NULL) \
+static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
+ struct cfq_ttime *ttime, bool group_idle)
+{
+ unsigned long slice;
+ if (!sample_valid(ttime->ttime_samples))
+ return false;
+ if (group_idle)
+ slice = cfqd->cfq_group_idle;
+ else
+ slice = cfqd->cfq_slice_idle;
+ return ttime->ttime_mean > slice;
+}
static inline bool iops_mode(struct cfq_data *cfqd)
{
@@ -670,9 +682,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
if (rq_is_sync(rq1) != rq_is_sync(rq2))
return rq_is_sync(rq1) ? rq1 : rq2;
- if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
- return rq1->cmd_flags & REQ_META ? rq1 : rq2;
-
s1 = blk_rq_pos(rq1);
s2 = blk_rq_pos(rq2);
@@ -1005,8 +1014,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
return NULL;
}
-void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
- unsigned int weight)
+static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+ unsigned int weight)
{
struct cfq_group *cfqg = cfqg_of_blkg(blkg);
cfqg->new_weight = weight;
@@ -1059,6 +1068,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);
+ cfqg->ttime.last_end_request = jiffies;
+
/*
* Take the initial reference that will be released on destroy
* This can be thought of a joint reference by cgroup and
@@ -1235,7 +1246,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
* it should not be NULL as even if elevator was exiting, cgroup deltion
* path got to it first.
*/
-void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
{
unsigned long flags;
struct cfq_data *cfqd = key;
@@ -1502,16 +1513,11 @@ static void cfq_add_rq_rb(struct request *rq)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
- struct request *__alias, *prev;
+ struct request *prev;
cfqq->queued[rq_is_sync(rq)]++;
- /*
- * looks a little odd, but the first insert might return an alias.
- * if that happens, put the alias on the dispatch list
- */
- while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
- cfq_dispatch_insert(cfqd->queue, __alias);
+ elv_rb_add(&cfqq->sort_list, rq);
if (!cfq_cfqq_on_rr(cfqq))
cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1598,10 +1604,6 @@ static void cfq_remove_request(struct request *rq)
cfqq->cfqd->rq_queued--;
cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(rq), rq_is_sync(rq));
- if (rq->cmd_flags & REQ_META) {
- WARN_ON(!cfqq->meta_pending);
- cfqq->meta_pending--;
- }
}
static int cfq_merge(struct request_queue *q, struct request **req,
@@ -1969,7 +1971,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* Otherwise, we do only if they are the last ones
* in their service tree.
*/
- if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
+ if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
+ !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
@@ -2022,10 +2025,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* slice, then don't idle. This avoids overrunning the allotted
* time slice.
*/
- if (sample_valid(cic->ttime_samples) &&
- (cfqq->slice_end - jiffies < cic->ttime_mean)) {
+ if (sample_valid(cic->ttime.ttime_samples) &&
+ (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
- cic->ttime_mean);
+ cic->ttime.ttime_mean);
return;
}
@@ -2381,8 +2384,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
* this group, wait for requests to complete.
*/
check_group_idle:
- if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1
- && cfqq->cfqg->dispatched) {
+ if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
+ cfqq->cfqg->dispatched &&
+ !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
cfqq = NULL;
goto keep_queue;
}
@@ -2833,7 +2837,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
cfqd->queue->node);
if (cic) {
- cic->last_end_request = jiffies;
+ cic->ttime.last_end_request = jiffies;
INIT_LIST_HEAD(&cic->queue_list);
INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context;
@@ -2883,7 +2887,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
* elevate the priority of this queue
*/
cfqq->org_ioprio = cfqq->ioprio;
- cfqq->org_ioprio_class = cfqq->ioprio_class;
cfq_clear_cfqq_prio_changed(cfqq);
}
@@ -3221,14 +3224,28 @@ err:
}
static void
-cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
{
- unsigned long elapsed = jiffies - cic->last_end_request;
- unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
+ unsigned long elapsed = jiffies - ttime->last_end_request;
+ elapsed = min(elapsed, 2UL * slice_idle);
- cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
- cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
- cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
+ ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
+ ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
+ ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+}
+
+static void
+cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ struct cfq_io_context *cic)
+{
+ if (cfq_cfqq_sync(cfqq)) {
+ __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
+ __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
+ cfqd->cfq_slice_idle);
+ }
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
+#endif
}
static void
@@ -3277,8 +3294,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
- else if (sample_valid(cic->ttime_samples)) {
- if (cic->ttime_mean > cfqd->cfq_slice_idle)
+ else if (sample_valid(cic->ttime.ttime_samples)) {
+ if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
enable_idle = 0;
else
enable_idle = 1;
@@ -3340,13 +3357,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
return true;
/*
- * So both queues are sync. Let the new request get disk time if
- * it's a metadata request and the current queue is doing regular IO.
- */
- if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
- return true;
-
- /*
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
*/
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3410,10 +3420,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic = RQ_CIC(rq);
cfqd->rq_queued++;
- if (rq->cmd_flags & REQ_META)
- cfqq->meta_pending++;
- cfq_update_io_thinktime(cfqd, cic);
+ cfq_update_io_thinktime(cfqd, cfqq, cic);
cfq_update_io_seektime(cfqd, cfqq, rq);
cfq_update_idle_window(cfqd, cfqq, cic);
@@ -3520,12 +3528,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
if (cfqq->cfqg->nr_cfqq > 1)
return false;
+ /* the only queue in the group, but think time is big */
+ if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
+ return false;
+
if (cfq_slice_used(cfqq))
return true;
/* if slice left is less than think time, wait busy */
- if (cic && sample_valid(cic->ttime_samples)
- && (cfqq->slice_end - jiffies < cic->ttime_mean))
+ if (cic && sample_valid(cic->ttime.ttime_samples)
+ && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
return true;
/*
@@ -3566,11 +3578,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
if (sync) {
- RQ_CIC(rq)->last_end_request = now;
+ struct cfq_rb_root *service_tree;
+
+ RQ_CIC(rq)->ttime.last_end_request = now;
+
+ if (cfq_cfqq_on_rr(cfqq))
+ service_tree = cfqq->service_tree;
+ else
+ service_tree = service_tree_for(cfqq->cfqg,
+ cfqq_prio(cfqq), cfqq_type(cfqq));
+ service_tree->ttime.last_end_request = now;
if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
cfqd->last_delayed_sync = now;
}
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ cfqq->cfqg->ttime.last_end_request = now;
+#endif
+
/*
* If this is the active queue, check if it needs to be expired,
* or if we want to idle in case it has no pending requests.
@@ -3616,30 +3641,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_schedule_dispatch(cfqd);
}
-/*
- * we temporarily boost lower priority queues if they are holding fs exclusive
- * resources. they are boosted to normal prio (CLASS_BE/4)
- */
-static void cfq_prio_boost(struct cfq_queue *cfqq)
-{
- if (has_fs_excl()) {
- /*
- * boost idle prio on transactions that would lock out other
- * users of the filesystem
- */
- if (cfq_class_idle(cfqq))
- cfqq->ioprio_class = IOPRIO_CLASS_BE;
- if (cfqq->ioprio > IOPRIO_NORM)
- cfqq->ioprio = IOPRIO_NORM;
- } else {
- /*
- * unboost the queue (if needed)
- */
- cfqq->ioprio_class = cfqq->org_ioprio_class;
- cfqq->ioprio = cfqq->org_ioprio;
- }
-}
-
static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3670,7 +3671,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
cfq_init_prio_data(cfqq, cic->ioc);
- cfq_prio_boost(cfqq);
return __cfq_may_queue(cfqq);
}
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cc3eb78e333a..7b725020823c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
#define BLKBSZSET_32 _IOW(0x12, 113, int)
#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
-struct compat_floppy_struct {
- compat_uint_t size;
- compat_uint_t sect;
- compat_uint_t head;
- compat_uint_t track;
- compat_uint_t stretch;
- unsigned char gap;
- unsigned char rate;
- unsigned char spec1;
- unsigned char fmt_gap;
- const compat_caddr_t name;
-};
-
struct compat_floppy_drive_params {
char cmos;
compat_ulong_t max_dtr;
@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
-#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 5139c0ea1864..c644137d9cd6 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -77,10 +77,8 @@ static void
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
struct rb_root *root = deadline_rb_root(dd, rq);
- struct request *__alias;
- while (unlikely(__alias = elv_rb_add(root, rq)))
- deadline_move_request(dd, __alias);
+ elv_rb_add(root, rq);
}
static inline void
diff --git a/block/elevator.c b/block/elevator.c
index b0b38ce0dcb6..a3b64bc71d88 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
* RB-tree support functions for inserting/lookup/removal of requests
* in a sorted RB tree.
*/
-struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+void elv_rb_add(struct rb_root *root, struct request *rq)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
if (blk_rq_pos(rq) < blk_rq_pos(__rq))
p = &(*p)->rb_left;
- else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
+ else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
p = &(*p)->rb_right;
- else
- return __rq;
}
rb_link_node(&rq->rb_node, parent, p);
rb_insert_color(&rq->rb_node, root);
- return NULL;
}
EXPORT_SYMBOL(elv_rb_add);
diff --git a/block/genhd.c b/block/genhd.c
index 6024b82e3209..5cb51c55f6d8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk)
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
- /* Register BDI before referencing it from bdev */
+ /* Register BDI before referencing it from bdev */
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, disk_devt(disk));
@@ -1140,7 +1140,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"wsect wuse running use aveq"
"\n\n");
*/
-
+
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
cpu = part_stat_lock();
@@ -1164,7 +1164,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
);
}
disk_part_iter_exit(&piter);
-
+
return 0;
}
@@ -1492,30 +1492,32 @@ void disk_unblock_events(struct gendisk *disk)
}
/**
- * disk_check_events - schedule immediate event checking
- * @disk: disk to check events for
+ * disk_flush_events - schedule immediate event checking and flushing
+ * @disk: disk to check and flush events for
+ * @mask: events to flush
*
- * Schedule immediate event checking on @disk if not blocked.
+ * Schedule immediate event checking on @disk if not blocked. Events in
+ * @mask are scheduled to be cleared from the driver. Note that this
+ * doesn't clear the events from @disk->ev.
*
* CONTEXT:
- * Don't care. Safe to call from irq context.
+ * If @mask is non-zero must be called with bdev->bd_mutex held.
*/
-void disk_check_events(struct gendisk *disk)
+void disk_flush_events(struct gendisk *disk, unsigned int mask)
{
struct disk_events *ev = disk->ev;
- unsigned long flags;
if (!ev)
return;
- spin_lock_irqsave(&ev->lock, flags);
+ spin_lock_irq(&ev->lock);
+ ev->clearing |= mask;
if (!ev->block) {
cancel_delayed_work(&ev->dwork);
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
}
- spin_unlock_irqrestore(&ev->lock, flags);
+ spin_unlock_irq(&ev->lock);
}
-EXPORT_SYMBOL_GPL(disk_check_events);
/**
* disk_clear_events - synchronously check, clear and return pending events
@@ -1705,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
mutex_lock(&disk_events_mutex);
list_for_each_entry(ev, &disk_events, node)
- disk_check_events(ev->disk);
+ disk_flush_events(ev->disk, 0);
mutex_unlock(&disk_events_mutex);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9fb0b15331d3..c62fb84944d5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1448,6 +1448,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
int blkdev_put(struct block_device *bdev, fmode_t mode)
{
+ mutex_lock(&bdev->bd_mutex);
+
if (mode & FMODE_EXCL) {
bool bdev_free;
@@ -1456,7 +1458,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
* are protected with bdev_lock. bd_mutex is to
* synchronize disk_holder unlinking.
*/
- mutex_lock(&bdev->bd_mutex);
spin_lock(&bdev_lock);
WARN_ON_ONCE(--bdev->bd_holders < 0);
@@ -1474,17 +1475,21 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
* If this was the last claim, remove holder link and
* unblock evpoll if it was a write holder.
*/
- if (bdev_free) {
- if (bdev->bd_write_holder) {
- disk_unblock_events(bdev->bd_disk);
- disk_check_events(bdev->bd_disk);
- bdev->bd_write_holder = false;
- }
+ if (bdev_free && bdev->bd_write_holder) {
+ disk_unblock_events(bdev->bd_disk);
+ bdev->bd_write_holder = false;
}
-
- mutex_unlock(&bdev->bd_mutex);
}
+ /*
+ * Trigger event checking and tell drivers to flush MEDIA_CHANGE
+ * event. This is to ensure detection of media removal commanded
+ * from userland - e.g. eject(1).
+ */
+ disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
+
+ mutex_unlock(&bdev->bd_mutex);
+
return __blkdev_put(bdev, mode, 0);
}
EXPORT_SYMBOL(blkdev_put);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 61abb638b4bf..8be086e9abe4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -68,6 +68,8 @@
#ifdef CONFIG_BLOCK
#include <linux/loop.h>
+#include <linux/cdrom.h>
+#include <linux/fd.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/sg.h>
@@ -944,6 +946,9 @@ COMPATIBLE_IOCTL(FIOQSIZE)
IGNORE_IOCTL(LOOP_CLR_FD)
/* md calls this on random blockdevs */
IGNORE_IOCTL(RAID_VERSION)
+/* qemu/qemu-img might call these two on plain files for probing */
+IGNORE_IOCTL(CDROM_DRIVE_STATUS)
+IGNORE_IOCTL(FDGETPRM32)
/* SG stuff */
COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..e3c63d1c5e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,22 +237,22 @@ ssize_t part_size_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
}
-ssize_t part_ro_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t part_ro_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%d\n", p->policy ? 1 : 0);
}
-ssize_t part_alignment_offset_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t part_alignment_offset_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
}
-ssize_t part_discard_alignment_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t part_discard_alignment_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%u\n", p->discard_alignment);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c5e82ece7c6c..a159ba5a35e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -678,23 +678,19 @@ struct buffer_chunk {
static void write_chunk(struct buffer_chunk *chunk)
{
int i;
- get_fs_excl();
for (i = 0; i < chunk->nr; i++) {
submit_logged_buffer(chunk->bh[i]);
}
chunk->nr = 0;
- put_fs_excl();
}
static void write_ordered_chunk(struct buffer_chunk *chunk)
{
int i;
- get_fs_excl();
for (i = 0; i < chunk->nr; i++) {
submit_ordered_buffer(chunk->bh[i]);
}
chunk->nr = 0;
- put_fs_excl();
}
static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s,
return 0;
}
- get_fs_excl();
-
/* before we can put our commit blocks on disk, we have to make sure everyone older than
** us is on disk too
*/
@@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s,
if (retval)
reiserfs_abort(s, retval, "Journal write error in %s",
__func__);
- put_fs_excl();
return retval;
}
@@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s,
return 0;
}
- get_fs_excl();
-
/* if all the work is already done, get out of here */
if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s,
put_journal_list(s, jl);
if (flushall)
mutex_unlock(&journal->j_flush_mutex);
- put_fs_excl();
return err;
}
@@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
th->t_trans_id = journal->j_trans_id;
unlock_journal(sb);
INIT_LIST_HEAD(&th->t_list);
- get_fs_excl();
return 0;
out_fail:
@@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
flush = flags & FLUSH_ALL;
wait_on_commit = flags & WAIT;
- put_fs_excl();
current->journal_info = th->t_handle_save;
reiserfs_check_lock_depth(sb, "journal end");
if (journal->j_len == 0) {
@@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
dump_stack();
#endif
}
-
diff --git a/fs/super.c b/fs/super.c
index 7943f04cb3a9..3f56a269a4f4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -351,13 +351,11 @@ bool grab_super_passive(struct super_block *sb)
*/
void lock_super(struct super_block * sb)
{
- get_fs_excl();
mutex_lock(&sb->s_lock);
}
void unlock_super(struct super_block * sb)
{
- put_fs_excl();
mutex_unlock(&sb->s_lock);
}
@@ -385,7 +383,6 @@ void generic_shutdown_super(struct super_block *sb)
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
- get_fs_excl();
sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(&sb->s_inodes);
@@ -400,7 +397,6 @@ void generic_shutdown_super(struct super_block *sb)
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
}
- put_fs_excl();
}
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1a23722e8878..0e67c45b3bc9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -73,7 +73,7 @@ enum rq_cmd_type_bits {
/*
* try to put the fields that are referenced together in the same cacheline.
- * if you modify this structure, be sure to check block/blk-core.c:rq_init()
+ * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
* as well!
*/
struct request {
@@ -260,8 +260,7 @@ struct queue_limits {
unsigned char discard_zeroes_data;
};
-struct request_queue
-{
+struct request_queue {
/*
* Together with queue_head for cacheline sharing
*/
@@ -304,14 +303,14 @@ struct request_queue
void *queuedata;
/*
- * queue needs bounce pages for pages above this limit
+ * various queue flags, see QUEUE_* below
*/
- gfp_t bounce_gfp;
+ unsigned long queue_flags;
/*
- * various queue flags, see QUEUE_* below
+ * queue needs bounce pages for pages above this limit
*/
- unsigned long queue_flags;
+ gfp_t bounce_gfp;
/*
* protects queue structures from reentrancy. ->__queue_lock should
@@ -334,8 +333,8 @@ struct request_queue
unsigned int nr_congestion_off;
unsigned int nr_batching;
- void *dma_drain_buffer;
unsigned int dma_drain_size;
+ void *dma_drain_buffer;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
@@ -393,7 +392,7 @@ struct request_queue
#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */
+#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */
#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */
#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */
@@ -403,6 +402,7 @@ struct request_queue
#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
+#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -857,12 +857,21 @@ struct request_queue *blk_alloc_queue(gfp_t);
struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
+/*
+ * Note: Code in between changing the blk_plug list/cb_list or element of such
+ * lists is preemptable, but such code can't do sleep (or be very careful),
+ * otherwise data is corrupted. For details, please check schedule() where
+ * blk_schedule_flush_plug() is called.
+ */
struct blk_plug {
unsigned long magic;
struct list_head list;
struct list_head cb_list;
unsigned int should_sort;
+ unsigned int count;
};
+#define BLK_MAX_REQUEST_COUNT 16
+
struct blk_plug_cb {
struct list_head list;
void (*callback)(struct blk_plug_cb *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 21a8ebf2dc3a..d800d5142184 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -146,7 +146,7 @@ extern struct request *elv_rb_latter_request(struct request_queue *, struct requ
/*
* rb support functions.
*/
-extern struct request *elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_add(struct rb_root *, struct request *);
extern void elv_rb_del(struct rb_root *, struct request *);
extern struct request *elv_rb_find(struct rb_root *, sector_t);
diff --git a/include/linux/fd.h b/include/linux/fd.h
index f5d194af07a8..72202b1b9a6a 100644
--- a/include/linux/fd.h
+++ b/include/linux/fd.h
@@ -377,4 +377,26 @@ struct floppy_raw_cmd {
#define FDEJECT _IO(2, 0x5a)
/* eject the disk */
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+struct compat_floppy_struct {
+ compat_uint_t size;
+ compat_uint_t sect;
+ compat_uint_t head;
+ compat_uint_t track;
+ compat_uint_t stretch;
+ unsigned char gap;
+ unsigned char rate;
+ unsigned char spec1;
+ unsigned char fmt_gap;
+ const compat_caddr_t name;
+};
+
+#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
+#endif
+#endif
+
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b224dc468a23..0c35d6e767d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1469,10 +1469,6 @@ enum {
#define vfs_check_frozen(sb, level) \
wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
-#define get_fs_excl() atomic_inc(&current->fs_excl)
-#define put_fs_excl() atomic_dec(&current->fs_excl)
-#define has_fs_excl() atomic_read(&current->fs_excl)
-
/*
* until VFS tracks user namespaces for inodes, just make all files
* belong to init_user_ns
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 300d7582006e..02fa4697a0e5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -420,7 +420,7 @@ static inline int get_disk_ro(struct gendisk *disk)
extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
-extern void disk_check_events(struct gendisk *disk);
+extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
/* drivers/char/random.c */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 580f70c02391..d14e058aaeed 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -176,7 +176,6 @@ extern struct cred init_cred;
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
- .fs_excl = ATOMIC_INIT(0), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
.pids = { \
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index b2eee896dcbc..5037a0ad2312 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -5,6 +5,14 @@
#include <linux/rcupdate.h>
struct cfq_queue;
+struct cfq_ttime {
+ unsigned long last_end_request;
+
+ unsigned long ttime_total;
+ unsigned long ttime_samples;
+ unsigned long ttime_mean;
+};
+
struct cfq_io_context {
void *key;
@@ -12,11 +20,7 @@ struct cfq_io_context {
struct io_context *ioc;
- unsigned long last_end_request;
-
- unsigned long ttime_total;
- unsigned long ttime_samples;
- unsigned long ttime_mean;
+ struct cfq_ttime ttime;
struct list_head queue_list;
struct hlist_node cic_list;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed766add9b23..20b03bf94748 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1512,7 +1512,6 @@ struct task_struct {
short il_next;
short pref_node_fork;
#endif
- atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 73bb192a3d32..12ea415c6435 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -898,7 +898,6 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
- WARN_ON(atomic_read(&tsk->fs_excl));
WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
diff --git a/kernel/fork.c b/kernel/fork.c
index aeae5b11b62e..17bf7c8d6511 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -290,7 +290,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
/* One for us, one for whoever does the "release_task()" (usually parent) */
atomic_set(&tsk->usage,2);
- atomic_set(&tsk->fs_excl, 0);
#ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
#endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f032e6e1e09a..2ef0dc9e7f39 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -505,7 +505,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
list_del_rcu(&bdi->bdi_list);
spin_unlock_bh(&bdi_lock);
- synchronize_rcu();
+ synchronize_rcu_expedited();
}
int bdi_register(struct backing_dev_info *bdi, struct device *parent,