diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-02 19:33:21 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-02 19:33:21 -0800 |
commit | a7c1120d2dcc83691bafa034d98f70285757e826 (patch) | |
tree | 56779f292c2dea78feff290c3ce26724d33b3f3e /fs/ext4 | |
parent | 6ec40b423032ca769c96fbf9a080db822821893d (diff) | |
parent | 9b2ff35753c0512bc8c6adae9e9c87cbeee86f82 (diff) | |
download | lwn-a7c1120d2dcc83691bafa034d98f70285757e826.tar.gz lwn-a7c1120d2dcc83691bafa034d98f70285757e826.zip |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 bug fixes from Ted Ts'o:
"Various bug fixes for ext4. The most important is a fix for the new
extent cache's slab shrinker which can cause significant, user-visible
pauses when the system is under memory pressure."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: enable quotas before orphan cleanup
ext4: don't allow quota mount options when quota feature enabled
ext4: fix a warning from sparse check for ext4_dir_llseek
ext4: convert number of blocks to clusters properly
ext4: fix possible memory leak in ext4_remount()
jbd2: fix ERR_PTR dereference in jbd2__journal_start
ext4: use percpu counter for extent cache count
ext4: optimize ext4_es_shrink()
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 39 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 8 | ||||
-rw-r--r-- | fs/ext4/resize.c | 6 | ||||
-rw-r--r-- | fs/ext4/super.c | 61 |
7 files changed, 66 insertions, 53 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 2f2e0da1a6b7..92e68b33fffd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -635,7 +635,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) brelse(bitmap_bh); printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" ", computed = %llu, %llu\n", - EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), + EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), desc_count, bitmap_count); return bitmap_count; #else diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 6dda04f05ef4..d8cd1f0f4661 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -334,7 +334,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) * * For non-htree, ext4_llseek already chooses the proper max offset. */ -loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) +static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int dx_dir = is_dx_dir(inode); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6e16c1867959..4a01ba315262 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1309,6 +1309,7 @@ struct ext4_sb_info { /* Reclaim extents from extent status tree */ struct shrinker s_es_shrinker; struct list_head s_es_lru; + struct percpu_counter s_extent_cache_cnt; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; }; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index f768f4a98a2b..95796a1b7522 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -147,11 +147,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t end); static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, int nr_to_scan); -static int ext4_es_reclaim_extents_count(struct super_block *sb); int __init ext4_init_es(void) { - ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT); + ext4_es_cachep = kmem_cache_create("ext4_extent_status", + sizeof(struct extent_status), + 0, (SLAB_RECLAIM_ACCOUNT), NULL); if (ext4_es_cachep == NULL) return -ENOMEM; return 0; @@ -302,8 +303,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, /* * We don't count delayed extent because we never try to reclaim them */ - if (!ext4_es_is_delayed(es)) + if (!ext4_es_is_delayed(es)) { EXT4_I(inode)->i_es_lru_nr++; + percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); + } return es; } @@ -314,6 +317,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) if (!ext4_es_is_delayed(es)) { BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); EXT4_I(inode)->i_es_lru_nr--; + percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); } kmem_cache_free(ext4_es_cachep, es); @@ -674,10 +678,11 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk = 0; - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan); + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); if (!nr_to_scan) - return ext4_es_reclaim_extents_count(sbi->s_sb); + return ret; INIT_LIST_HEAD(&scanned); @@ -705,9 +710,10 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) } list_splice_tail(&scanned, &sbi->s_es_lru); spin_unlock(&sbi->s_es_lru_lock); - trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk); - return ext4_es_reclaim_extents_count(sbi->s_sb); + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); + return ret; } void ext4_es_register_shrinker(struct super_block *sb) @@ -751,25 +757,6 @@ void ext4_es_lru_del(struct inode *inode) spin_unlock(&sbi->s_es_lru_lock); } -static int ext4_es_reclaim_extents_count(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_inode_info *ei; - struct list_head *cur; - int nr_cached = 0; - - spin_lock(&sbi->s_es_lru_lock); - list_for_each(cur, &sbi->s_es_lru) { - ei = list_entry(cur, struct ext4_inode_info, i_es_lru); - read_lock(&ei->i_es_lock); - nr_cached += ei->i_es_lru_nr; - read_unlock(&ei->i_es_lock); - } - spin_unlock(&sbi->s_es_lru_lock); - trace_ext4_es_reclaim_extents_count(sb, nr_cached); - return nr_cached; -} - static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, int nr_to_scan) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6540ebe058e3..7bb713a46fe4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3419,7 +3419,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) win = offs; ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - - EXT4_B2C(sbi, win); + EXT4_NUM_B2C(sbi, win); BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); } @@ -4565,7 +4565,7 @@ do_more: EXT4_BLOCKS_PER_GROUP(sb); count -= overflow; } - count_clusters = EXT4_B2C(sbi, count); + count_clusters = EXT4_NUM_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) { err = -EIO; @@ -4807,11 +4807,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_group_desc_csum_set(sb, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_B2C(sbi, blocks_freed)); + EXT4_NUM_B2C(sbi, blocks_freed)); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(EXT4_B2C(sbi, blocks_freed), + atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), &sbi->s_flex_groups[flex_group].free_clusters); } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c7f4d7584669..b2c8ee56eb98 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1247,7 +1247,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, - EXT4_B2C(sbi, group_data->free_blocks_count)); + EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); if (ext4_has_group_desc_csum(sb)) ext4_itable_unused_set(sb, gdp, @@ -1349,7 +1349,7 @@ static void ext4_update_super(struct super_block *sb, /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_B2C(sbi, free_blocks)); + EXT4_NUM_B2C(sbi, free_blocks)); percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); @@ -1360,7 +1360,7 @@ static void ext4_update_super(struct super_block *sb, sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, group_data[0].group); - atomic_add(EXT4_B2C(sbi, free_blocks), + atomic_add(EXT4_NUM_B2C(sbi, free_blocks), &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, &sbi->s_flex_groups[flex_group].free_inodes); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 620cf5615ba2..5e6c87836193 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -783,6 +783,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_counter_destroy(&sbi->s_extent_cache_cnt); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -1247,6 +1248,11 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "quota options when quota turned on"); return -1; } + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " + "when QUOTA feature is enabled"); + return -1; + } qname = match_strdup(args); if (!qname) { ext4_msg(sb, KERN_ERR, @@ -1544,6 +1550,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, "quota options when quota turned on"); return -1; } + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_QUOTA)) { + ext4_msg(sb, KERN_ERR, + "Cannot set journaled quota options " + "when QUOTA feature is enabled"); + return -1; + } sbi->s_jquota_fmt = m->mount_opt; #endif } else { @@ -1592,6 +1605,12 @@ static int parse_options(char *options, struct super_block *sb, return 0; } #ifdef CONFIG_QUOTA + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { + ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " + "feature is enabled"); + return 0; + } if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sb, USRQUOTA); @@ -3161,7 +3180,7 @@ int ext4_calculate_overhead(struct super_block *sb) } /* Add the journal blocks as well */ if (sbi->s_journal) - overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen); + overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); sbi->s_overhead = overhead; smp_wmb(); @@ -3688,6 +3707,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!err) { err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); } + if (!err) { + err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0); + } if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount3; @@ -3711,13 +3733,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA - sb->s_qcop = &ext4_qctl_operations; sb->dq_op = &ext4_quota_operations; - - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { - /* Use qctl operations for hidden quota files. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) sb->s_qcop = &ext4_qctl_sysfile_operations; - } + else + sb->s_qcop = &ext4_qctl_operations; #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); @@ -3913,6 +3933,16 @@ no_journal: if (err) goto failed_mount7; +#ifdef CONFIG_QUOTA + /* Enable quota usage during mount. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + !(sb->s_flags & MS_RDONLY)) { + err = ext4_enable_quotas(sb); + if (err) + goto failed_mount8; + } +#endif /* CONFIG_QUOTA */ + EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; @@ -3930,16 +3960,6 @@ no_journal: } else descr = "out journal"; -#ifdef CONFIG_QUOTA - /* Enable quota usage during mount. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !(sb->s_flags & MS_RDONLY)) { - err = ext4_enable_quotas(sb); - if (err) - goto failed_mount8; - } -#endif /* CONFIG_QUOTA */ - if (test_opt(sb, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); if (!blk_queue_discard(q)) @@ -3993,6 +4013,7 @@ failed_mount3: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_counter_destroy(&sbi->s_extent_cache_cnt); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: @@ -4538,6 +4559,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!old_opts.s_qf_names[i]) { for (j = 0; j < i; j++) kfree(old_opts.s_qf_names[j]); + kfree(orig_data); return -ENOMEM; } } else @@ -4816,9 +4838,12 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { + struct super_block *sb = dquot->dq_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); + /* Are we journaling quotas? */ - if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || + sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); } else { |