From bca4b914b5da3d8e7b9b647f620b71dc85c0c394 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 20 May 2010 23:21:34 +0400 Subject: cfq-iosched: remove dead_key from cfq_io_context Remove ->dead_key field from cfq_io_context to shrink its size to 128 bytes. (64 bytes for 32-bit hosts) Use lower bit in ->key as dead-mark, instead of moving key to separate field. After this for dead cfq_io_context we got cic->key != cfqd automatically. Thus, io_context's last-hit cache should work without changing. Now to check ->key for non-dead state compare it with cfqd, instead of checking ->key for non-null value as it was before. Plus remove obsolete race protection in cfq_cic_lookup. This race gone after v2.6.24-1728-g4ac845a Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index a0bb301afac0..64d529133031 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -7,7 +7,6 @@ struct cfq_queue; struct cfq_io_context { void *key; - unsigned long dead_key; struct cfq_queue *cfqq[2]; -- cgit v1.2.3 From 0e3c9a2284f5417f196e327c254d0b84c9ee8929 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Jun 2010 11:08:43 +0200 Subject: Revert "writeback: fix WB_SYNC_NONE writeback from umount" This reverts commit e913fc825dc685a444cb4c1d0f9d32f372f59861. We are investigating a hang associated with the WB_SYNC_NONE changes, so revert them for now. Conflicts: fs/fs-writeback.c mm/page-writeback.c Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 48 +++++++++++---------------------------------- fs/sync.c | 2 +- include/linux/backing-dev.h | 2 +- include/linux/writeback.h | 10 ---------- mm/page-writeback.c | 4 ++-- 5 files changed, 15 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6753912641b4..408a7877b79d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,7 +45,6 @@ struct wb_writeback_args { unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; - unsigned int sb_pinned:1; }; /* @@ -231,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, - /* - * Setting sb_pinned is not necessary for WB_SYNC_ALL, but - * lets make it explicitly clear. - */ - .sb_pinned = 1, }; struct bdi_work work; @@ -251,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * @bdi: the backing device to write from * @sb: write inodes from this super_block * @nr_pages: the number of pages to write - * @sb_locked: caller already holds sb umount sem. * * Description: * This does WB_SYNC_NONE opportunistic writeback. The IO is only * started when this function returns, we make no guarentees on - * completion. Caller specifies whether sb umount sem is held already or not. + * completion. Caller need not hold sb s_umount semaphore. * */ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages, int sb_locked) + long nr_pages) { struct wb_writeback_args args = { .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, - .sb_pinned = sb_locked, }; /* @@ -592,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, /* * Caller must already hold the ref for this */ - if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { + if (wbc->sync_mode == WB_SYNC_ALL) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); return SB_NOT_PINNED; } @@ -766,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb, .for_kupdate = args->for_kupdate, .for_background = args->for_background, .range_cyclic = args->range_cyclic, - .sb_pinned = args->sb_pinned, }; unsigned long oldest_jif; long wrote = 0; @@ -1214,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb) iput(old_inode); } -static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) -{ - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; - - nr_to_write = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - - bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); -} - /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock @@ -1237,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) */ void writeback_inodes_sb(struct super_block *sb) { - __writeback_inodes_sb(sb, 0); -} -EXPORT_SYMBOL(writeback_inodes_sb); + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); + long nr_to_write; -/** - * writeback_inodes_sb_locked - writeback dirty inodes from given super_block - * @sb: the superblock - * - * Like writeback_inodes_sb(), except the caller already holds the - * sb umount sem. - */ -void writeback_inodes_sb_locked(struct super_block *sb) -{ - __writeback_inodes_sb(sb, 1); + nr_to_write = nr_dirty + nr_unstable + + (inodes_stat.nr_inodes - inodes_stat.nr_unused); + + bdi_start_writeback(sb->s_bdi, sb, nr_to_write); } +EXPORT_SYMBOL(writeback_inodes_sb); /** * writeback_inodes_sb_if_idle - start writeback if none underway diff --git a/fs/sync.c b/fs/sync.c index e8cbd415e50a..5a537ccd2e85 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (wait) sync_inodes_sb(sb); else - writeback_inodes_sb_locked(sb); + writeback_inodes_sb(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e6e0cb5437e6..aee5f6ce166e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages, int sb_locked); + long nr_pages); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index cc97d6caf2b3..f64134653a8c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -65,15 +65,6 @@ struct writeback_control { * so we use a single control to update them */ unsigned no_nrwrite_index_update:1; - - /* - * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, - * the writeback code will pin the sb for the caller. However, - * for eg umount, the caller does WB_SYNC_NONE but already has - * the sb pinned. If the below is set, caller already has the - * sb pinned. - */ - unsigned sb_pinned:1; }; /* @@ -82,7 +73,6 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); -void writeback_inodes_sb_locked(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b289310e2c89..5fa63bdf52e4 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0, 0); + bdi_start_writeback(bdi, NULL, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -707,7 +707,7 @@ void laptop_mode_timer_fn(unsigned long data) */ if (bdi_has_dirty_io(&q->backing_dev_info)) - bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages, 0); + bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); } /* -- cgit v1.2.3 From 099c5c310e9744bd0654881bb55c137051228e56 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 May 2010 13:46:35 +0200 Subject: Preparing 8.3.8rc2 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 68530521ad00..30da4ae48972 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8rc1" +#define REL_VERSION "8.3.8rc2" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 94 -- cgit v1.2.3 From 28f4197e5d4707311febeec8a0eb97cb5fd93c97 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Jun 2010 12:23:18 +0200 Subject: block: disable preemption before using sched_clock() Commit 9195291e5f05e01d67f9a09c756b8aca8f009089 added calls to sched_clock() from preemptible code. sched_clock() is both the wrong interface AND cannot be called without preempt disabled. Apply a temporary fix to get rid of the warnings, a real patch is in the works. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8b7f5e0914ad..09a840264d6f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1211,14 +1211,23 @@ struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); #ifdef CONFIG_BLK_CGROUP +/* + * This should not be using sched_clock(). A real patch is in progress + * to fix this up, until that is in place we need to disable preemption + * around sched_clock() in this function and set_io_start_time_ns(). + */ static inline void set_start_time_ns(struct request *req) { + preempt_disable(); req->start_time_ns = sched_clock(); + preempt_enable(); } static inline void set_io_start_time_ns(struct request *req) { + preempt_disable(); req->io_start_time_ns = sched_clock(); + preempt_enable(); } static inline uint64_t rq_start_time_ns(struct request *req) -- cgit v1.2.3 From ff9da691c0498ff81fdd014e7a0731dab2337dac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jun 2010 14:54:39 +0200 Subject: pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface This changes the interface to be based on bytes instead. The API matches that of F_SETPIPE_SZ in that it rounds up the passed in size so that the resulting page array is a power-of-2 in size. The proc file is renamed to /proc/sys/fs/pipe-max-size to reflect this change. Signed-off-by: Jens Axboe --- fs/pipe.c | 54 ++++++++++++++++++++++++++++++++++++----------- include/linux/pipe_fs_i.h | 4 +++- kernel/sysctl.c | 8 +++---- 3 files changed, 49 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index f98fae3e36b0..69c4c7c13ea9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -26,9 +26,14 @@ /* * The max size that a non-root user is allowed to grow the pipe. Can - * be set by root in /proc/sys/fs/pipe-max-pages + * be set by root in /proc/sys/fs/pipe-max-size */ -unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; +unsigned int pipe_max_size = 1048576; + +/* + * Minimum pipe size, as required by POSIX + */ +unsigned int pipe_min_size = PAGE_SIZE; /* * We use a start+len construction, which provides full use of the @@ -1156,6 +1161,35 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) return nr_pages * PAGE_SIZE; } +/* + * Currently we rely on the pipe array holding a power-of-2 number + * of pages. + */ +static inline unsigned int round_pipe_size(unsigned int size) +{ + unsigned long nr_pages; + + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; +} + +/* + * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax + * will return an error. + */ +int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, + size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); + if (ret < 0 || !write) + return ret; + + pipe_max_size = round_pipe_size(pipe_max_size); + return ret; +} + long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe; @@ -1169,23 +1203,19 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { case F_SETPIPE_SZ: { - unsigned long nr_pages; + unsigned int size, nr_pages; - /* - * Currently the array must be a power-of-2 size, so adjust - * upwards if needed. - */ - nr_pages = (arg + PAGE_SIZE - 1) >> PAGE_SHIFT; - nr_pages = roundup_pow_of_two(nr_pages); + size = round_pipe_size(arg); + nr_pages = size >> PAGE_SHIFT; - if (!capable(CAP_SYS_RESOURCE) && nr_pages > pipe_max_pages) { + if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { ret = -EPERM; goto out; - } else if (nr_pages < 1) { + } else if (nr_pages < PAGE_SIZE) { ret = -EINVAL; goto out; } - ret = pipe_set_size(pipe, arg); + ret = pipe_set_size(pipe, nr_pages); break; } case F_GETPIPE_SZ: diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 16de3933c45e..445796945ac9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -139,7 +139,9 @@ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); -extern unsigned int pipe_max_pages; +extern unsigned int pipe_max_size, pipe_min_size; +int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 997080f00e0b..d24f761f4876 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = { }, #endif { - .procname = "pipe-max-pages", - .data = &pipe_max_pages, + .procname = "pipe-max-size", + .data = &pipe_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &two, + .proc_handler = &pipe_proc_fn, + .extra1 = &pipe_min_size, }, /* * NOTE: do not add new entries to this table unless you have read -- cgit v1.2.3