From d7cfa4684d82f58e5d7cb73b8a3c88c169937f25 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 17 Dec 2008 00:20:45 -0500 Subject: ext4: display average commit time Display the average commit time (which is used by the ext4 fsync batching patch) in /proc/fs/jbd2/*/info for performance tuning purposes. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/jbd2/journal.c') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e70d657a19f8..74d87290381c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -40,6 +40,7 @@ #include #include +#include EXPORT_SYMBOL(jbd2_journal_start); EXPORT_SYMBOL(jbd2_journal_restart); @@ -824,6 +825,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); seq_printf(seq, " %ums logging transaction\n", jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); + seq_printf(seq, " %luus average transaction commit time\n", + do_div(s->journal->j_average_commit_time, 1000)); seq_printf(seq, " %lu handles per transaction\n", s->stats->u.run.rs_handle_count / s->stats->ts_tid); seq_printf(seq, " %lu blocks per transaction\n", -- cgit v1.2.3 From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:27:38 -0500 Subject: ext4: add fsync batch tuning knobs Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 29 +++++++++++++++++++++++ fs/ext4/ext4.h | 7 ++++++ fs/ext4/ext4_sb.h | 2 ++ fs/ext4/super.c | 47 ++++++++++++++++++++++++++++++++------ fs/jbd2/journal.c | 2 ++ fs/jbd2/transaction.c | 4 +++- include/linux/jbd2.h | 8 +++++++ 7 files changed, 91 insertions(+), 8 deletions(-) (limited to 'fs/jbd2/journal.c') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index f75ab101c00a..e3fcbea3ec8c 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -283,6 +283,35 @@ delalloc (*) Deferring block allocation until write-out time. nodelalloc Disable delayed allocation. Blocks are allocation when data is copied from user to page cache. +max_batch_time=usec Maximum amount of time ext4 should wait for + additional filesystem operations to be batch + together with a synchronous write operation. + Since a synchronous write operation is going to + force a commit and then a wait for the I/O + complete, it doesn't cost much, and can be a + huge throughput win, we wait for a small amount + of time to see if any other transactions can + piggyback on the synchronous write. The + algorithm used is designed to automatically tune + for the speed of the disk, by measuring the + amount of time (on average) that it takes to + finish committing a transaction. Call this time + the "commit time". If the time that the + transactoin has been running is less than the + commit time, ext4 will try sleeping for the + commit time to see if other operations will join + the transaction. The commit time is capped by + the max_batch_time, which defaults to 15000us + (15ms). This optimization can be turned off + entirely by setting max_batch_time to 0. + +min_batch_time=usec This parameter sets the commit time (as + described above) to be at least min_batch_time. + It defaults to zero microseconds. Increasing + this parameter may improve the throughput of + multi-threaded, synchronous workloads on very + fast disks, at the cost of increasing latency. + Data Mode ========= There are 3 different data modes: diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ac8551e0b70a..9ba9fd6d14da 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -328,6 +328,7 @@ struct ext4_mount_options { uid_t s_resuid; gid_t s_resgid; unsigned long s_commit_interval; + u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA int s_jquota_fmt; char *s_qf_names[MAXQUOTAS]; @@ -805,6 +806,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_DEFM_JMODE_ORDERED 0x0040 #define EXT4_DEFM_JMODE_WBACK 0x0060 +/* + * Default journal batch times + */ +#define EXT4_DEF_MIN_BATCH_TIME 0 +#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ + /* * Structure of a directory entry */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 3db800f399a6..039b6ea1a042 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -74,6 +74,8 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; unsigned long s_commit_interval; + u32 s_max_batch_time; + u32 s_min_batch_time; struct block_device *journal_bdev; #ifdef CONFIG_JBD2_DEBUG struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dc27d4c613c0..da377f9521bb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -705,10 +705,19 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) #endif if (!test_opt(sb, RESERVATION)) seq_puts(seq, ",noreservation"); - if (sbi->s_commit_interval) { + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } + if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { + seq_printf(seq, ",min_batch_time=%u", + (unsigned) sbi->s_min_batch_time); + } + if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { + seq_printf(seq, ",max_batch_time=%u", + (unsigned) sbi->s_min_batch_time); + } + /* * We're changing the default of barrier mount option, so * let's always display its mount state so it's clear what its @@ -874,7 +883,8 @@ enum { Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, - Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_commit, Opt_min_batch_time, Opt_max_batch_time, + Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, @@ -913,6 +923,8 @@ static const match_table_t tokens = { {Opt_nobh, "nobh"}, {Opt_bh, "bh"}, {Opt_commit, "commit=%u"}, + {Opt_min_batch_time, "min_batch_time=%u"}, + {Opt_max_batch_time, "max_batch_time=%u"}, {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, {Opt_journal_dev, "journal_dev=%u"}, @@ -1131,6 +1143,22 @@ static int parse_options(char *options, struct super_block *sb, option = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * option; break; + case Opt_max_batch_time: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + if (option == 0) + option = EXT4_DEF_MAX_BATCH_TIME; + sbi->s_max_batch_time = option; + break; + case Opt_min_batch_time: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_min_batch_time = option; + break; case Opt_data_journal: data_opt = EXT4_MOUNT_JOURNAL_DATA; goto datacheck; @@ -1979,6 +2007,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; + sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; + sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; set_opt(sbi->s_mount_opt, RESERVATION); set_opt(sbi->s_mount_opt, BARRIER); @@ -2524,11 +2555,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) { struct ext4_sb_info *sbi = EXT4_SB(sb); - if (sbi->s_commit_interval) - journal->j_commit_interval = sbi->s_commit_interval; - /* We could also set up an ext4-specific default for the commit - * interval here, but for now we'll just fall back to the jbd - * default. */ + journal->j_commit_interval = sbi->s_commit_interval; + journal->j_min_batch_time = sbi->s_min_batch_time; + journal->j_max_batch_time = sbi->s_max_batch_time; spin_lock(&journal->j_state_lock); if (test_opt(sb, BARRIER)) @@ -3042,6 +3071,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_resuid = sbi->s_resuid; old_opts.s_resgid = sbi->s_resgid; old_opts.s_commit_interval = sbi->s_commit_interval; + old_opts.s_min_batch_time = sbi->s_min_batch_time; + old_opts.s_max_batch_time = sbi->s_max_batch_time; #ifdef CONFIG_QUOTA old_opts.s_jquota_fmt = sbi->s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) @@ -3178,6 +3209,8 @@ restore_opts: sbi->s_resuid = old_opts.s_resuid; sbi->s_resgid = old_opts.s_resgid; sbi->s_commit_interval = old_opts.s_commit_interval; + sbi->s_min_batch_time = old_opts.s_min_batch_time; + sbi->s_max_batch_time = old_opts.s_max_batch_time; #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 74d87290381c..fd1d7557a098 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -964,6 +964,8 @@ static journal_t * journal_init_common (void) spin_lock_init(&journal->j_state_lock); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); + journal->j_min_batch_time = 0; + journal->j_max_batch_time = 15000; /* 15ms */ /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 13dcbc990f41..48c21bac5a56 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1255,8 +1255,10 @@ int jbd2_journal_stop(handle_t *handle) trans_time = ktime_to_ns(ktime_sub(ktime_get(), transaction->t_start_time)); + commit_time = max_t(u64, commit_time, + 1000*journal->j_min_batch_time); commit_time = min_t(u64, commit_time, - 1000*jiffies_to_usecs(1)); + 1000*journal->j_max_batch_time); if (trans_time < commit_time) { ktime_t expires = ktime_add_ns(ktime_get(), diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab8cef130c28..a3cd647ea1bc 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -956,6 +956,14 @@ struct journal_s */ u64 j_average_commit_time; + /* + * minimum and maximum times that we should wait for + * additional filesystem operations to get batched into a + * synchronous handle in microseconds + */ + u32 j_min_batch_time; + u32 j_max_batch_time; + /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- cgit v1.2.3 From 1a0d3786dd57dbd74f340322054c3d618b999dcf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Nov 2008 00:09:22 -0500 Subject: jbd2: Remove a large array of bh's from the stack of the checkpoint routine jbd2_log_do_checkpoint()n is one of the kernel's largest stack users. Move the array of buffer head's from the stack of jbd2_log_do_checkpoint() to the in-core journal structure. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 22 +++++++++------------- fs/jbd2/journal.c | 2 ++ include/linux/jbd2.h | 10 ++++++++++ 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'fs/jbd2/journal.c') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 9497718fe920..adc08ec875ed 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -249,16 +249,14 @@ restart: return ret; } -#define NR_BATCH 64 - static void -__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) +__flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); for (i = 0; i < *batch_count; i++) { - struct buffer_head *bh = bhs[i]; + struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); BUFFER_TRACE(bh, "brelse"); __brelse(bh); @@ -277,8 +275,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - transaction_t *transaction) + int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; @@ -325,14 +322,14 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); - bhs[*batch_count] = bh; + journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); jbd_unlock_bh_state(bh); transaction->t_chp_stats.cs_written++; (*batch_count)++; - if (*batch_count == NR_BATCH) { + if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); - __flush_batch(journal, bhs, batch_count); + __flush_batch(journal, batch_count); ret = 1; } } @@ -388,7 +385,6 @@ restart: if (journal->j_checkpoint_transactions == transaction && transaction->t_tid == this_tid) { int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; int retry = 0, err; @@ -402,7 +398,7 @@ restart: retry = 1; break; } - retry = __process_buffer(journal, jh, bhs, &batch_count, + retry = __process_buffer(journal, jh, &batch_count, transaction); if (retry < 0 && !result) result = retry; @@ -419,7 +415,7 @@ restart: spin_unlock(&journal->j_list_lock); retry = 1; } - __flush_batch(journal, bhs, &batch_count); + __flush_batch(journal, &batch_count); } if (retry) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fd1d7557a098..34ef98057202 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1477,7 +1477,9 @@ int jbd2_journal_destroy(journal_t *journal) spin_lock(&journal->j_list_lock); while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); jbd2_log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a3cd647ea1bc..004c9a8d63ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -687,6 +687,8 @@ jbd2_time_diff(unsigned long start, unsigned long end) return end + (MAX_JIFFY_OFFSET - start); } +#define JBD2_NR_BATCH 64 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -830,6 +832,14 @@ struct journal_s /* Semaphore for locking against concurrent checkpoints */ struct mutex j_checkpoint_mutex; + /* + * List of buffer heads used by the checkpoint routine. This + * was moved from jbd2_log_do_checkpoint() to reduce stack + * usage. Access to this array is controlled by the + * j_checkpoint_mutex. [j_checkpoint_mutex] + */ + struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; + /* * Journal head: identifies the first unused block in the journal. * [j_state_lock] -- cgit v1.2.3 From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 11:14:25 -0500 Subject: ext4: Remove code to create the journal inode This code has been obsolete in quite some time, since the supported method for adding a journal inode is to use tune2fs (or to creating new filesystem with a journal via mke2fs or mkfs.ext4). Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 4 --- fs/ext4/super.c | 68 +++-------------------------------- fs/jbd2/journal.c | 72 -------------------------------------- include/linux/jbd2.h | 1 - 4 files changed, 4 insertions(+), 141 deletions(-) (limited to 'fs/jbd2/journal.c') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index e3fcbea3ec8c..9ec29d86ff8b 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -149,10 +149,6 @@ journal_async_commit Commit block can be written to disk without waiting journal=update Update the ext4 file system's journal to the current format. -journal=inum When a journal already exists, this option is ignored. - Otherwise, it specifies the number of the inode which - will represent the ext4 file system's journal file. - journal_dev=devnum When the external journal device's major/minor numbers have changed, this option allows the user to specify the new journal location. The journal device is diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5ab520724da..8036392b2121 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -51,8 +51,6 @@ struct proc_dir_entry *ext4_proc_root; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); -static int ext4_create_journal(struct super_block *, struct ext4_super_block *, - unsigned int); static void ext4_commit_super(struct super_block *sb, struct ext4_super_block *es, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, @@ -1006,7 +1004,7 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_min_batch_time, Opt_max_batch_time, - Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, @@ -1048,7 +1046,6 @@ static const match_table_t tokens = { {Opt_min_batch_time, "min_batch_time=%u"}, {Opt_max_batch_time, "max_batch_time=%u"}, {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, {Opt_journal_dev, "journal_dev=%u"}, {Opt_journal_checksum, "journal_checksum"}, {Opt_journal_async_commit, "journal_async_commit"}, @@ -1102,7 +1099,7 @@ static ext4_fsblk_t get_sb_block(void **data) } static int parse_options(char *options, struct super_block *sb, - unsigned int *inum, unsigned long *journal_devnum, + unsigned long *journal_devnum, ext4_fsblk_t *n_blocks_count, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1226,16 +1223,6 @@ static int parse_options(char *options, struct super_block *sb, } set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; - case Opt_journal_inum: - if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); - return 0; - } - if (match_int(&args[0], &option)) - return 0; - *inum = option; - break; case Opt_journal_dev: if (is_remount) { printk(KERN_ERR "EXT4-fs: cannot specify " @@ -2035,7 +2022,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; unsigned long offset = 0; - unsigned int journal_inum = 0; unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; @@ -2155,8 +2141,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, - NULL, 0)) + if (!parse_options((char *) data, sb, &journal_devnum, NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -2460,9 +2445,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } } - } else if (journal_inum) { - if (ext4_create_journal(sb, es, journal_inum)) - goto failed_mount3; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { printk(KERN_ERR "EXT4-fs: required journal recovery " @@ -2926,48 +2908,6 @@ static int ext4_load_journal(struct super_block *sb, return 0; } -static int ext4_create_journal(struct super_block *sb, - struct ext4_super_block *es, - unsigned int journal_inum) -{ - journal_t *journal; - int err; - - if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " - "create journal.\n"); - return -EROFS; - } - - journal = ext4_get_journal(sb, journal_inum); - if (!journal) - return -EINVAL; - - printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", - journal_inum); - - err = jbd2_journal_create(journal); - if (err) { - printk(KERN_ERR "EXT4-fs: error creating journal.\n"); - jbd2_journal_destroy(journal); - return -EIO; - } - - EXT4_SB(sb)->s_journal = journal; - - ext4_update_dynamic_rev(sb); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); - - es->s_journal_inum = cpu_to_le32(journal_inum); - sb->s_dirt = 1; - - /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); - - return 0; -} - static void ext4_commit_super(struct super_block *sb, struct ext4_super_block *es, int sync) { @@ -3209,7 +3149,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { + if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { err = -EINVAL; goto restore_opts; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 34ef98057202..b10d7283ba5b 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,7 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_update_format); EXPORT_SYMBOL(jbd2_journal_check_used_features); EXPORT_SYMBOL(jbd2_journal_check_available_features); EXPORT_SYMBOL(jbd2_journal_set_features); -EXPORT_SYMBOL(jbd2_journal_create); EXPORT_SYMBOL(jbd2_journal_load); EXPORT_SYMBOL(jbd2_journal_destroy); EXPORT_SYMBOL(jbd2_journal_abort); @@ -1162,77 +1161,6 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -/** - * int jbd2_journal_create() - Initialise the new journal file - * @journal: Journal to create. This structure must have been initialised - * - * Given a journal_t structure which tells us which disk blocks we can - * use, create a new journal superblock and initialise all of the - * journal fields from scratch. - **/ -int jbd2_journal_create(journal_t *journal) -{ - unsigned long long blocknr; - struct buffer_head *bh; - journal_superblock_t *sb; - int i, err; - - if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { - printk (KERN_ERR "Journal length (%d blocks) too short.\n", - journal->j_maxlen); - journal_fail_superblock(journal); - return -EINVAL; - } - - if (journal->j_inode == NULL) { - /* - * We don't know what block to start at! - */ - printk(KERN_EMERG - "%s: creation of journal on external device!\n", - __func__); - BUG(); - } - - /* Zero out the entire journal on disk. We cannot afford to - have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ - jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); - for (i = 0; i < journal->j_maxlen; i++) { - err = jbd2_journal_bmap(journal, i, &blocknr); - if (err) - return err; - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - lock_buffer(bh); - memset (bh->b_data, 0, journal->j_blocksize); - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - __brelse(bh); - } - - sync_blockdev(journal->j_dev); - jbd_debug(1, "JBD: journal cleared.\n"); - - /* OK, fill in the initial static fields in the new superblock */ - sb = journal->j_superblock; - - sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); - - sb->s_blocksize = cpu_to_be32(journal->j_blocksize); - sb->s_maxlen = cpu_to_be32(journal->j_maxlen); - sb->s_first = cpu_to_be32(1); - - journal->j_transaction_sequence = 1; - - journal->j_flags &= ~JBD2_ABORT; - journal->j_format_version = 2; - - return journal_reset(journal); -} - /** * void jbd2_journal_update_superblock() - Update journal sb on disk. * @journal: The journal to update. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9d82084a1605..adef1c9940d3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1104,7 +1104,6 @@ extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); -- cgit v1.2.3 From 4a9bf99b205448ec1f0cbdee1776a29f9c503ce4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 22:56:44 -0500 Subject: jbd2: Add pid and journal device name to the "kjournald2 starting" message Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/jbd2/journal.c') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b10d7283ba5b..fe20e40ee7c3 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -131,8 +131,9 @@ static int kjournald2(void *arg) journal->j_task = current; wake_up(&journal->j_wait_done_commit); - printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", - journal->j_commit_interval / HZ); + printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, " + "commit interval %ld seconds\n", current->pid, + journal->j_devname, journal->j_commit_interval / HZ); /* * And now, wait forever for commit wakeup events. -- cgit v1.2.3 From 4b905671d2ea09fd48fed72c581df17e40823f39 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Jan 2009 14:53:35 -0500 Subject: jbd2: Fix oops in jbd2_journal_init_inode() on corrupted fs On 32-bit system with CONFIG_LBD getblk can fail because provided block number is too big. Add error checks so we fail gracefully if getblk() returns NULL (which can also happen on memory allocation failures). Thanks to David Maciejak from Fortinet's FortiGuard Global Security Research Team for reporting this bug. http://bugzilla.kernel.org/show_bug.cgi?id=12370 Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" cc: stable@kernel.org --- fs/jbd2/journal.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'fs/jbd2/journal.c') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fe20e40ee7c3..2932c8f55199 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -632,6 +632,8 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) return NULL; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) + return NULL; lock_buffer(bh); memset(bh->b_data, 0, journal->j_blocksize); set_buffer_uptodate(bh); @@ -1021,15 +1023,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, /* journal descriptor can store up to n blocks -bzzz */ journal->j_blocksize = blocksize; + jbd2_stats_proc_init(journal); n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", __func__); - kfree(journal); - journal = NULL; - goto out; + goto out_err; } journal->j_dev = bdev; journal->j_fs_dev = fs_dev; @@ -1039,14 +1040,22 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, p = journal->j_devname; while ((p = strchr(p, '/'))) *p = '!'; - jbd2_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); + if (!bh) { + printk(KERN_ERR + "%s: Cannot get buffer for journal superblock\n", + __func__); + goto out_err; + } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; -out: + return journal; +out_err: + jbd2_stats_proc_exit(journal); + kfree(journal); + return NULL; } /** @@ -1094,9 +1103,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", __func__); - jbd2_stats_proc_exit(journal); - kfree(journal); - return NULL; + goto out_err; } err = jbd2_journal_bmap(journal, 0, &blocknr); @@ -1104,17 +1111,24 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (err) { printk(KERN_ERR "%s: Cannnot locate journal superblock\n", __func__); - jbd2_stats_proc_exit(journal); - kfree(journal); - return NULL; + goto out_err; } bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - J_ASSERT(bh != NULL); + if (!bh) { + printk(KERN_ERR + "%s: Cannot get buffer for journal superblock\n", + __func__); + goto out_err; + } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; +out_err: + jbd2_stats_proc_exit(journal); + kfree(journal); + return NULL; } /* -- cgit v1.2.3