From 9e52484c713321e84e8834803a44ca0a001376d2 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Wed, 15 Apr 2020 16:31:39 -0400 Subject: ext4: remove EXT4_GET_BLOCKS_KEEP_SIZE flag The eofblocks code was removed in the 5.7 release by "ext4: remove EOFBLOCKS_FL and associated code" (4337ecd1fe99). The ext4_map_blocks() flag used to trigger it can now be removed as well. Signed-off-by: Eric Whitney Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200415203140.30349-2-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 91eb4381cae5..c8d060627448 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -609,8 +609,6 @@ enum { #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 /* Don't normalize allocation size (used for fallocate) */ #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 - /* Request will not result in inode size update (user for fallocate) */ -#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 /* Convert written extents to unwritten */ #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 /* Write zeros to newly created written extents */ -- cgit v1.2.3 From 4209ae12b12265d475bba28634184423149bd14f Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Sun, 26 Apr 2020 18:34:37 -0700 Subject: ext4: handle ext4_mark_inode_dirty errors ext4_mark_inode_dirty() can fail for real reasons. Ignoring its return value may lead ext4 to ignore real failures that would result in corruption / crashes. Harden ext4_mark_inode_dirty error paths to fail as soon as possible and return errors to the caller whenever appropriate. One of the possible scnearios when this bug could affected is that while creating a new inode, its directory entry gets added successfully but while writing the inode itself mark_inode_dirty returns error which is ignored. This would result in inconsistency that the directory entry points to a non-existent inode. Ran gce-xfstests smoke tests and verified that there were no regressions. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20200427013438.219117-1-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/acl.c | 2 +- fs/ext4/ext4.h | 2 +- fs/ext4/ext4_jbd2.h | 5 +++- fs/ext4/extents.c | 34 ++++++++++++++---------- fs/ext4/file.c | 11 ++++++-- fs/ext4/indirect.c | 4 ++- fs/ext4/inline.c | 6 +++-- fs/ext4/inode.c | 38 ++++++++++++++++++--------- fs/ext4/migrate.c | 12 ++++++--- fs/ext4/namei.c | 76 ++++++++++++++++++++++++++++++++++------------------- fs/ext4/super.c | 16 ++++++----- fs/ext4/xattr.c | 6 +++-- 12 files changed, 139 insertions(+), 73 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index b3eba92f38f5..76f634d185f1 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -255,7 +255,7 @@ retry: if (!error && update_mode) { inode->i_mode = mode; inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + error = ext4_mark_inode_dirty(handle, inode); } out_stop: ext4_journal_stop(handle); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c8d060627448..884ce3086486 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3352,7 +3352,7 @@ struct ext4_extent; */ #define EXT_MAX_BLOCKS 0xffffffff -extern int ext4_ext_tree_init(handle_t *handle, struct inode *); +extern void ext4_ext_tree_init(handle_t *handle, struct inode *inode); extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 4b9002f0e84c..3bacf76d2609 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -222,7 +222,10 @@ ext4_mark_iloc_dirty(handle_t *handle, int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc); -int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); +#define ext4_mark_inode_dirty(__h, __i) \ + __ext4_mark_inode_dirty((__h), (__i), __func__, __LINE__) +int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, + const char *func, unsigned int line); int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5809891eadab..49ea6973d65f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -816,7 +816,7 @@ ext4_ext_binsearch(struct inode *inode, } -int ext4_ext_tree_init(handle_t *handle, struct inode *inode) +void ext4_ext_tree_init(handle_t *handle, struct inode *inode) { struct ext4_extent_header *eh; @@ -826,7 +826,6 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) eh->eh_magic = EXT4_EXT_MAGIC; eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); ext4_mark_inode_dirty(handle, inode); - return 0; } struct ext4_ext_path * @@ -1319,7 +1318,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_idx_pblock(EXT_FIRST_INDEX(neh))); le16_add_cpu(&neh->eh_depth, 1); - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); out: brelse(bh); @@ -4363,7 +4362,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, struct inode *inode = file_inode(file); handle_t *handle; int ret = 0; - int ret2 = 0; + int ret2 = 0, ret3 = 0; int retries = 0; int depth = 0; struct ext4_map_blocks map; @@ -4423,10 +4422,11 @@ retry: if (ext4_update_inode_size(inode, epos) & 0x1) inode->i_mtime = inode->i_ctime; } - ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); - ret2 = ext4_journal_stop(handle); - if (ret2) + ret3 = ext4_journal_stop(handle); + ret2 = ret3 ? ret3 : ret2; + if (unlikely(ret2)) break; } if (ret == -ENOSPC && @@ -4575,7 +4575,9 @@ static long ext4_zero_range(struct file *file, loff_t offset, inode->i_mtime = inode->i_ctime = current_time(inode); if (new_size) ext4_update_inode_size(inode, new_size); - ext4_mark_inode_dirty(handle, inode); + ret = ext4_mark_inode_dirty(handle, inode); + if (unlikely(ret)) + goto out_handle; /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); @@ -4585,6 +4587,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (file->f_flags & O_SYNC) ext4_handle_sync(handle); +out_handle: ext4_journal_stop(handle); out_mutex: inode_unlock(inode); @@ -4696,8 +4699,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, loff_t offset, ssize_t len) { unsigned int max_blocks; - int ret = 0; - int ret2 = 0; + int ret = 0, ret2 = 0, ret3 = 0; struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; unsigned int credits = 0; @@ -4730,9 +4732,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, "ext4_ext_map_blocks returned %d", inode->i_ino, map.m_lblk, map.m_len, ret); - ext4_mark_inode_dirty(handle, inode); - if (credits) - ret2 = ext4_journal_stop(handle); + ret2 = ext4_mark_inode_dirty(handle, inode); + if (credits) { + ret3 = ext4_journal_stop(handle); + if (unlikely(ret3)) + ret2 = ret3; + } + if (ret <= 0 || ret2) break; } @@ -5269,7 +5275,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (IS_SYNC(inode)) ext4_handle_sync(handle); inode->i_mtime = inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + ret = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0d624250a62b..b8e69f9e3858 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -287,6 +287,7 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, bool truncate = false; u8 blkbits = inode->i_blkbits; ext4_lblk_t written_blk, end_blk; + int ret; /* * Note that EXT4_I(inode)->i_disksize can get extended up to @@ -327,8 +328,14 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, goto truncate; } - if (ext4_update_inode_size(inode, offset + written)) - ext4_mark_inode_dirty(handle, inode); + if (ext4_update_inode_size(inode, offset + written)) { + ret = ext4_mark_inode_dirty(handle, inode); + if (unlikely(ret)) { + written = ret; + ext4_journal_stop(handle); + goto truncate; + } + } /* * We may need to truncate allocated but not written blocks beyond EOF. diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 107f0043f67f..be2b66eb65f7 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -467,7 +467,9 @@ static int ext4_splice_branch(handle_t *handle, /* * OK, we spliced it into the inode itself on a direct block. */ - ext4_mark_inode_dirty(handle, ar->inode); + err = ext4_mark_inode_dirty(handle, ar->inode); + if (unlikely(err)) + goto err_out; jbd_debug(5, "splicing direct\n"); } return err; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f35e289e17aa..c3a1ad2db122 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1260,7 +1260,7 @@ out: int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { - int ret, inline_size, no_expand; + int ret, ret2, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; @@ -1314,7 +1314,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, out: ext4_write_unlock_xattr(dir, &no_expand); - ext4_mark_inode_dirty(handle, dir); + ret2 = ext4_mark_inode_dirty(handle, dir); + if (unlikely(ret2 && !ret)) + ret = ret2; brelse(iloc.bh); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a7087ff533bb..456e8a6b4809 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1302,7 +1302,7 @@ static int ext4_write_end(struct file *file, * filesystems. */ if (i_size_changed || inline_data) - ext4_mark_inode_dirty(handle, inode); + ret = ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied @@ -3083,7 +3083,7 @@ static int ext4_da_write_end(struct file *file, * new_i_size is less that inode->i_size * bu greater than i_disksize.(hint delalloc) */ - ext4_mark_inode_dirty(handle, inode); + ret = ext4_mark_inode_dirty(handle, inode); } } @@ -3100,7 +3100,7 @@ static int ext4_da_write_end(struct file *file, if (ret2 < 0) ret = ret2; ret2 = ext4_journal_stop(handle); - if (!ret) + if (unlikely(ret2 && !ret)) ret = ret2; return ret ? ret : copied; @@ -3892,6 +3892,8 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, loff_t len) { handle_t *handle; + int ret; + loff_t size = i_size_read(inode); WARN_ON(!inode_is_locked(inode)); @@ -3905,10 +3907,10 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, if (IS_ERR(handle)) return PTR_ERR(handle); ext4_update_i_disksize(inode, size); - ext4_mark_inode_dirty(handle, inode); + ret = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); - return 0; + return ret; } static void ext4_wait_dax_page(struct ext4_inode_info *ei) @@ -3960,7 +3962,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) loff_t first_block_offset, last_block_offset; handle_t *handle; unsigned int credits; - int ret = 0; + int ret = 0, ret2 = 0; trace_ext4_punch_hole(inode, offset, length, 0); @@ -4083,7 +4085,9 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) ext4_handle_sync(handle); inode->i_mtime = inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(ret2)) + ret = ret2; if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: @@ -4152,7 +4156,7 @@ int ext4_truncate(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); unsigned int credits; - int err = 0; + int err = 0, err2; handle_t *handle; struct address_space *mapping = inode->i_mapping; @@ -4240,7 +4244,9 @@ out_stop: ext4_orphan_del(handle, inode); inode->i_mtime = inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + err2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err2 && !err)) + err = err2; ext4_journal_stop(handle); trace_ext4_truncate_exit(inode); @@ -5298,6 +5304,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) inode->i_gid = attr->ia_gid; error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); + if (unlikely(error)) + return error; } if (attr->ia_valid & ATTR_SIZE) { @@ -5783,7 +5791,8 @@ out_unlock: * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) * we start and wait on commits. */ -int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) +int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, + const char *func, unsigned int line) { struct ext4_iloc iloc; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -5793,13 +5802,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) - return err; + goto out; if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize) ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize, iloc, handle); - return ext4_mark_iloc_dirty(handle, inode, &iloc); + err = ext4_mark_iloc_dirty(handle, inode, &iloc); +out: + if (unlikely(err)) + ext4_error_inode_err(inode, func, line, 0, err, + "mark_inode_dirty error"); + return err; } /* diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index fb6520f37135..c5e3fc998211 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -287,7 +287,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, struct inode *tmp_inode) { - int retval; + int retval, retval2 = 0; __le32 i_data[3]; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); @@ -342,7 +342,9 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * i_blocks when freeing the indirect meta-data blocks */ retval = free_ind_block(handle, inode, i_data); - ext4_mark_inode_dirty(handle, inode); + retval2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(retval2 && !retval)) + retval = retval2; err_out: return retval; @@ -601,7 +603,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; - int ret; + int ret, ret2 = 0; if (!ext4_has_feature_extents(inode->i_sb) || (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) @@ -655,7 +657,9 @@ int ext4_ind_migrate(struct inode *inode) memset(ei->i_data, 0, sizeof(ei->i_data)); for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); - ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(ret2 && !ret)) + ret = ret2; errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a8aca4772aaa..56738b538ddf 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1993,7 +1993,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, { unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; - int err; + int err, err2; if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); @@ -2028,12 +2028,12 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, dir->i_mtime = dir->i_ctime = current_time(dir); ext4_update_dx_flag(dir); inode_inc_iversion(dir); - ext4_mark_inode_dirty(handle, dir); + err2 = ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_dirblock(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); - return 0; + return err ? err : err2; } /* @@ -2223,7 +2223,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; - ext4_mark_inode_dirty(handle, dir); + retval = ext4_mark_inode_dirty(handle, dir); + if (unlikely(retval)) + goto out; } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { @@ -2576,12 +2578,12 @@ static int ext4_add_nondir(handle_t *handle, struct inode *inode = *inodep; int err = ext4_add_entry(handle, dentry, inode); if (!err) { - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); d_instantiate_new(dentry, inode); *inodep = NULL; - return 0; + return err; } drop_nlink(inode); ext4_orphan_add(handle, inode); @@ -2775,7 +2777,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { handle_t *handle; struct inode *inode; - int err, credits, retries = 0; + int err, err2 = 0, credits, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; @@ -2808,7 +2810,9 @@ out_clear_inode: clear_nlink(inode); ext4_orphan_add(handle, inode); unlock_new_inode(inode); - ext4_mark_inode_dirty(handle, inode); + err2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err2)) + err = err2; ext4_journal_stop(handle); iput(inode); goto out_retry; @@ -3148,10 +3152,12 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) inode->i_size = 0; ext4_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + retval = ext4_mark_inode_dirty(handle, inode); + if (retval) + goto end_rmdir; ext4_dec_count(handle, dir); ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); + retval = ext4_mark_inode_dirty(handle, dir); #ifdef CONFIG_UNICODE /* VFS negative dentries are incompatible with Encoding and @@ -3221,7 +3227,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) goto end_unlink; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); + retval = ext4_mark_inode_dirty(handle, dir); + if (retval) + goto end_unlink; if (inode->i_nlink == 0) ext4_warning_inode(inode, "Deleting file '%.*s' with no links", dentry->d_name.len, dentry->d_name.name); @@ -3230,7 +3238,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + retval = ext4_mark_inode_dirty(handle, inode); #ifdef CONFIG_UNICODE /* VFS negative dentries are incompatible with Encoding and @@ -3419,7 +3427,7 @@ retry: err = ext4_add_entry(handle, dentry, inode); if (!err) { - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); /* this can happen only for tmpfile being * linked the first time */ @@ -3531,7 +3539,7 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, static int ext4_setent(handle_t *handle, struct ext4_renament *ent, unsigned ino, unsigned file_type) { - int retval; + int retval, retval2; BUFFER_TRACE(ent->bh, "get write access"); retval = ext4_journal_get_write_access(handle, ent->bh); @@ -3543,19 +3551,19 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, inode_inc_iversion(ent->dir); ent->dir->i_ctime = ent->dir->i_mtime = current_time(ent->dir); - ext4_mark_inode_dirty(handle, ent->dir); + retval = ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { - retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); - if (unlikely(retval)) { - ext4_std_error(ent->dir->i_sb, retval); - return retval; + retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); + if (unlikely(retval2)) { + ext4_std_error(ent->dir->i_sb, retval2); + return retval2; } } brelse(ent->bh); ent->bh = NULL; - return 0; + return retval; } static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, @@ -3790,7 +3798,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, EXT4_FT_CHRDEV); if (retval) goto end_rename; - ext4_mark_inode_dirty(handle, whiteout); + retval = ext4_mark_inode_dirty(handle, whiteout); + if (unlikely(retval)) + goto end_rename; } if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); @@ -3811,7 +3821,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, * rename. */ old.inode->i_ctime = current_time(old.inode); - ext4_mark_inode_dirty(handle, old.inode); + retval = ext4_mark_inode_dirty(handle, old.inode); + if (unlikely(retval)) + goto end_rename; if (!whiteout) { /* @@ -3840,12 +3852,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } else { ext4_inc_count(handle, new.dir); ext4_update_dx_flag(new.dir); - ext4_mark_inode_dirty(handle, new.dir); + retval = ext4_mark_inode_dirty(handle, new.dir); + if (unlikely(retval)) + goto end_rename; } } - ext4_mark_inode_dirty(handle, old.dir); + retval = ext4_mark_inode_dirty(handle, old.dir); + if (unlikely(retval)) + goto end_rename; if (new.inode) { - ext4_mark_inode_dirty(handle, new.inode); + retval = ext4_mark_inode_dirty(handle, new.inode); + if (unlikely(retval)) + goto end_rename; if (!new.inode->i_nlink) ext4_orphan_add(handle, new.inode); } @@ -3979,8 +3997,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ctime = current_time(old.inode); old.inode->i_ctime = ctime; new.inode->i_ctime = ctime; - ext4_mark_inode_dirty(handle, old.inode); - ext4_mark_inode_dirty(handle, new.inode); + retval = ext4_mark_inode_dirty(handle, old.inode); + if (unlikely(retval)) + goto end_rename; + retval = ext4_mark_inode_dirty(handle, new.inode); + if (unlikely(retval)) + goto end_rename; if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 49821d8a0910..c3983f87587d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5880,7 +5880,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL; inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, S_NOATIME | S_IMMUTABLE); - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); @@ -5982,12 +5982,14 @@ static int ext4_quota_off(struct super_block *sb, int type) * this is not a hard failure and quotas are already disabled. */ handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + err = PTR_ERR(handle); goto out_unlock; + } EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL); inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); inode->i_mtime = inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); out_unlock: inode_unlock(inode); @@ -6045,7 +6047,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err, offset = off & (sb->s_blocksize - 1); + int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1); int retries = 0; struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -6093,9 +6095,11 @@ out: if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; - ext4_mark_inode_dirty(handle, inode); + err2 = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err2 && !err)) + err = err2; } - return len; + return err ? err : len; } #endif diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 01ba66373e97..9b29a40738ac 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1327,7 +1327,7 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, int blocksize = ea_inode->i_sb->s_blocksize; int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits; int csize, wsize = 0; - int ret = 0; + int ret = 0, ret2 = 0; int retries = 0; retry: @@ -1385,7 +1385,9 @@ retry: ext4_update_i_disksize(ea_inode, wsize); inode_unlock(ea_inode); - ext4_mark_inode_dirty(handle, ea_inode); + ret2 = ext4_mark_inode_dirty(handle, ea_inode); + if (unlikely(ret2 && !ret)) + ret = ret2; out: brelse(bh); -- cgit v1.2.3 From 73c384c0cdaa8ea9ca9ef2d0cff6a25930f1648e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 7 May 2020 10:50:28 -0700 Subject: ext4: avoid ext4_error()'s caused by ENOMEM in the truncate path We can't fail in the truncate path without requiring an fsck. Add work around for this by using a combination of retry loops and the __GFP_NOFAIL flag. From: Theodore Ts'o Signed-off-by: Theodore Ts'o Signed-off-by: Anna Pendleton Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20200507175028.15061-1-pendleton@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 884ce3086486..5d901bf92ce9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -630,6 +630,7 @@ enum { */ #define EXT4_EX_NOCACHE 0x40000000 #define EXT4_EX_FORCE_CACHE 0x20000000 +#define EXT4_EX_NOFAIL 0x10000000 /* * Flags used by ext4_free_blocks diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3ca797faa86b..ff7eeb5a77ef 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -297,11 +297,14 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode, { struct ext4_ext_path *path = *ppath; int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); + int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO; + + if (nofail) + flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, - EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | - (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); + flags); } static int @@ -487,8 +490,12 @@ __read_extent_tree_block(const char *function, unsigned int line, { struct buffer_head *bh; int err; + gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; + + if (flags & EXT4_EX_NOFAIL) + gfp_flags |= __GFP_NOFAIL; - bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); + bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -837,6 +844,10 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, struct ext4_ext_path *path = orig_path ? *orig_path : NULL; short int depth, i, ppos = 0; int ret; + gfp_t gfp_flags = GFP_NOFS; + + if (flags & EXT4_EX_NOFAIL) + gfp_flags |= __GFP_NOFAIL; eh = ext_inode_hdr(inode); depth = ext_depth(inode); @@ -857,7 +868,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, if (!path) { /* account possible depth increase */ path = kcalloc(depth + 2, sizeof(struct ext4_ext_path), - GFP_NOFS); + gfp_flags); if (unlikely(!path)) return ERR_PTR(-ENOMEM); path[0].p_maxdepth = depth + 1; @@ -1007,9 +1018,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock, oldblock; __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ + gfp_t gfp_flags = GFP_NOFS; int err = 0; size_t ext_size = 0; + if (flags & EXT4_EX_NOFAIL) + gfp_flags |= __GFP_NOFAIL; + /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ @@ -1043,7 +1058,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, * We need this to handle errors and free blocks * upon them. */ - ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), GFP_NOFS); + ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags); if (!ablocks) return -ENOMEM; @@ -2019,7 +2034,7 @@ prepend: if (next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %u\n", next); BUG_ON(npath != NULL); - npath = ext4_find_extent(inode, next, NULL, 0); + npath = ext4_find_extent(inode, next, NULL, gb_flags); if (IS_ERR(npath)) return PTR_ERR(npath); BUG_ON(npath->p_depth != path->p_depth); @@ -2792,7 +2807,8 @@ again: ext4_fsblk_t pblk; /* find extent for or closest extent to this block */ - path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); + path = ext4_find_extent(inode, end, NULL, + EXT4_EX_NOCACHE | EXT4_EX_NOFAIL); if (IS_ERR(path)) { ext4_journal_stop(handle); return PTR_ERR(path); @@ -2878,7 +2894,7 @@ again: le16_to_cpu(path[k].p_hdr->eh_entries)+1; } else { path = kcalloc(depth + 1, sizeof(struct ext4_ext_path), - GFP_NOFS); + GFP_NOFS | __GFP_NOFAIL); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; @@ -3303,7 +3319,7 @@ static int ext4_split_extent(handle_t *handle, * Update path is required because previous ext4_split_extent_at() may * result in split of original leaf or extent zeroout. */ - path = ext4_find_extent(inode, map->m_lblk, ppath, 0); + path = ext4_find_extent(inode, map->m_lblk, ppath, flags); if (IS_ERR(path)) return PTR_ERR(path); depth = ext_depth(inode); @@ -4365,7 +4381,14 @@ retry: } if (err) return err; - return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); +retry_remove_space: + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + if (err == -ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry_remove_space; + } + return err; } static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, -- cgit v1.2.3 From 6db074618969dc6fac4978e8043945fd440b310a Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sun, 10 May 2020 11:54:51 +0530 Subject: ext4: use BIT() macro for BH_** state bits Simply use BIT() macro for all BH_** state bits instead of open coding it. There should be no functionality change in this patch. Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/57667689f51a3f9dba2fcef7d3425187fa3ba69f.1589086800.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 8 ++++---- fs/ext4/inode.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5d901bf92ce9..89cac4e32018 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -171,10 +171,10 @@ struct ext4_allocation_request { * well as to store the information returned by ext4_map_blocks(). It * takes less room on the stack than a struct buffer_head. */ -#define EXT4_MAP_NEW (1 << BH_New) -#define EXT4_MAP_MAPPED (1 << BH_Mapped) -#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) -#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) +#define EXT4_MAP_NEW BIT(BH_New) +#define EXT4_MAP_MAPPED BIT(BH_Mapped) +#define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) +#define EXT4_MAP_BOUNDARY BIT(BH_Boundary) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 456e8a6b4809..043ee7efce5f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2084,7 +2084,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) return err; } -#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) +#define BH_FLAGS (BIT(BH_Unwritten) | BIT(BH_Delay)) /* * mballoc gives us at most this number of blocks... @@ -2364,7 +2364,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) dioread_nolock = ext4_should_dioread_nolock(inode); if (dioread_nolock) get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; - if (map->m_flags & (1 << BH_Delay)) + if (map->m_flags & BIT(BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; err = ext4_map_blocks(handle, inode, map, get_blocks_flags); -- cgit v1.2.3 From 70aa1554b01474ab08d08e5a18b0215a7ff1e8dc Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Sun, 10 May 2020 11:54:55 +0530 Subject: ext4: make ext_debug() implementation to use pr_debug() ext_debug() msgs could be helpful, provided those could be enabled without recompiling kernel and also if we could selectively enable only required prints for case by case debugging. So make ext_debug() implementation use pr_debug(). Also change ext_debug() to be defined with CONFIG_EXT4_DEBUG. So EXT_DEBUG macro now mostly remain for below 3 functions. ext4_ext_show_path/leaf/move() (whose print msgs use ext_debug() which again could be dynamically enabled using pr_debug()) This also changes the ext_debug() to take inode as a parameter to add inode no. in all of it's msgs. Prints additional info like process name / pid, superblock id etc. This also removes any explicit function names passed in ext_debug(). Since ext_debug() on it's own prints file, func and line no. Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/d31dc189b0aeda9384fe7665e36da7cd8c61571f.1589086800.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/Kconfig | 2 +- fs/ext4/ext4.h | 18 +++++-- fs/ext4/extents.c | 144 ++++++++++++++++++++++++++---------------------------- fs/ext4/inode.c | 11 ++--- 4 files changed, 87 insertions(+), 88 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 02376ddb0cb5..cf9e430514c4 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -99,7 +99,7 @@ config EXT4_DEBUG Enables run-time debugging support for the ext4 filesystem. If you select Y here, then you will be able to turn on debugging - using dynamic debug control for mb_debug() msgs. + using dynamic debug control for mb_debug() / ext_debug() msgs. config EXT4_KUNIT_TESTS tristate "KUnit tests for ext4" diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 89cac4e32018..80866f124b9a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -80,14 +80,22 @@ #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif + /* + * Turn on EXT_DEBUG to enable ext4_ext_show_path/leaf/move in extents.c + */ +#define EXT_DEBUG__ + /* - * Turn on EXT_DEBUG to get lots of info about extents operations. + * Dynamic printk for controlled extents debugging. */ -#define EXT_DEBUG__ -#ifdef EXT_DEBUG -#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) +#ifdef CONFIG_EXT4_DEBUG +#define ext_debug(ino, fmt, ...) \ + pr_debug("[%s/%d] EXT4-fs (%s): ino %lu: (%s, %d): %s:" fmt, \ + current->comm, task_pid_nr(current), \ + ino->i_sb->s_id, ino->i_ino, __FILE__, __LINE__, \ + __func__, ##__VA_ARGS__) #else -#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) +#define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* data type for block offset of block group */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2f711cc3cdce..969f4c030cf0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -607,22 +607,22 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) { int k, l = path->p_depth; - ext_debug("path:"); + ext_debug(inode, "path:"); for (k = 0; k <= l; k++, path++) { if (path->p_idx) { - ext_debug(" %d->%llu", + ext_debug(inode, " %d->%llu", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); } else if (path->p_ext) { - ext_debug(" %d:[%d]%d:%llu ", + ext_debug(inode, " %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext), ext4_ext_pblock(path->p_ext)); } else - ext_debug(" []"); + ext_debug(inode, " []"); } - ext_debug("\n"); + ext_debug(inode, "\n"); } static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) @@ -638,14 +638,14 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) eh = path[depth].p_hdr; ex = EXT_FIRST_EXTENT(eh); - ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); + ext_debug(inode, "Displaying leaf extents\n"); for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { - ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), + ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); } - ext_debug("\n"); + ext_debug(inode, "\n"); } static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, @@ -658,10 +658,9 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent_idx *idx; idx = path[level].p_idx; while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { - ext_debug("%d: move %d:%llu in new index %llu\n", level, - le32_to_cpu(idx->ei_block), - ext4_idx_pblock(idx), - newblock); + ext_debug(inode, "%d: move %d:%llu in new index %llu\n", + level, le32_to_cpu(idx->ei_block), + ext4_idx_pblock(idx), newblock); idx++; } @@ -670,7 +669,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, ex = path[depth].p_ext; while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", + ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n", le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), ext4_ext_is_unwritten(ex), @@ -714,7 +713,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_extent_idx *r, *l, *m; - ext_debug("binsearch for %u(idx): ", block); + ext_debug(inode, "binsearch for %u(idx): ", block); l = EXT_FIRST_INDEX(eh) + 1; r = EXT_LAST_INDEX(eh); @@ -724,13 +723,13 @@ ext4_ext_binsearch_idx(struct inode *inode, r = m - 1; else l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), - m, le32_to_cpu(m->ei_block), - r, le32_to_cpu(r->ei_block)); + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), + r, le32_to_cpu(r->ei_block)); } path->p_idx = l - 1; - ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), + ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); #ifdef CHECK_BINSEARCH @@ -781,7 +780,7 @@ ext4_ext_binsearch(struct inode *inode, return; } - ext_debug("binsearch for %u: ", block); + ext_debug(inode, "binsearch for %u: ", block); l = EXT_FIRST_EXTENT(eh) + 1; r = EXT_LAST_EXTENT(eh); @@ -792,13 +791,13 @@ ext4_ext_binsearch(struct inode *inode, r = m - 1; else l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), - m, le32_to_cpu(m->ee_block), - r, le32_to_cpu(r->ee_block)); + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), + r, le32_to_cpu(r->ee_block)); } path->p_ext = l - 1; - ext_debug(" -> %d:%llu:[%d]%d ", + ext_debug(inode, " -> %d:%llu:[%d]%d ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_pblock(path->p_ext), ext4_ext_is_unwritten(path->p_ext), @@ -881,7 +880,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { - ext_debug("depth %d: num %d, max %d\n", + ext_debug(inode, "depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); ext4_ext_binsearch_idx(inode, path + ppos, block); @@ -958,18 +957,20 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ - ext_debug("insert new index %d after: %llu\n", logical, ptr); + ext_debug(inode, "insert new index %d after: %llu\n", + logical, ptr); ix = curp->p_idx + 1; } else { /* insert before */ - ext_debug("insert new index %d before: %llu\n", logical, ptr); + ext_debug(inode, "insert new index %d before: %llu\n", + logical, ptr); ix = curp->p_idx; } len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; BUG_ON(len < 0); if (len > 0) { - ext_debug("insert new index %d: " + ext_debug(inode, "insert new index %d: " "move %d indices from 0x%p to 0x%p\n", logical, len, ix, ix + 1); memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); @@ -1036,12 +1037,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; - ext_debug("leaf will be split." + ext_debug(inode, "leaf will be split." " next leaf starts at %d\n", le32_to_cpu(border)); } else { border = newext->ee_block; - ext_debug("leaf will be added." + ext_debug(inode, "leaf will be added." " next leaf starts at %d\n", le32_to_cpu(border)); } @@ -1063,7 +1064,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, return -ENOMEM; /* allocate all needed blocks */ - ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); + ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err, flags); @@ -1149,7 +1150,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, goto cleanup; } if (k) - ext_debug("create %d intermediate indices\n", k); + ext_debug(inode, "create %d intermediate indices\n", k); /* insert new index into current index block */ /* current depth stored in i var */ i = depth - 1; @@ -1176,7 +1177,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, fidx->ei_block = border; ext4_idx_store_pblock(fidx, oldblock); - ext_debug("int.index at %d (block %llu): %u -> %llu\n", + ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n", i, newblock, le32_to_cpu(border), oldblock); /* move remainder of path[i] to the new index block */ @@ -1190,7 +1191,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } /* start copy indexes */ m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx, EXT_MAX_INDEX(path[i].p_hdr)); ext4_ext_show_move(inode, path, newblock, i); if (m) { @@ -1327,7 +1328,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, EXT_FIRST_INDEX(neh)->ei_block = EXT_FIRST_EXTENT(neh)->ee_block; } - ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", + ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n", le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), ext4_idx_pblock(EXT_FIRST_INDEX(neh))); @@ -1969,7 +1970,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* Try to append newex to the ex */ if (ext4_can_extents_be_merged(inode, ex, newext)) { - ext_debug("append [%d]%d block to %u:[%d]%d" + ext_debug(inode, "append [%d]%d block to %u:[%d]%d" "(from %llu)\n", ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), @@ -1994,7 +1995,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, prepend: /* Try to prepend newex to the ex */ if (ext4_can_extents_be_merged(inode, newext, ex)) { - ext_debug("prepend %u[%d]%d block to %u:[%d]%d" + ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d" "(from %llu)\n", le32_to_cpu(newext->ee_block), ext4_ext_is_unwritten(newext), @@ -2032,7 +2033,7 @@ prepend: if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { - ext_debug("next leaf block - %u\n", next); + ext_debug(inode, "next leaf block - %u\n", next); BUG_ON(npath != NULL); npath = ext4_find_extent(inode, next, NULL, gb_flags); if (IS_ERR(npath)) @@ -2040,12 +2041,12 @@ prepend: BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isn't full(%d)\n", + ext_debug(inode, "next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; goto has_space; } - ext_debug("next leaf has no free space(%d,%d)\n", + ext_debug(inode, "next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); } @@ -2071,7 +2072,7 @@ has_space: if (!nearex) { /* there is no extent in this leaf, create first one */ - ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", + ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), ext4_ext_is_unwritten(newext), @@ -2081,7 +2082,7 @@ has_space: if (le32_to_cpu(newext->ee_block) > le32_to_cpu(nearex->ee_block)) { /* Insert after */ - ext_debug("insert %u:%llu:[%d]%d before: " + ext_debug(inode, "insert %u:%llu:[%d]%d before: " "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), @@ -2092,7 +2093,7 @@ has_space: } else { /* Insert before */ BUG_ON(newext->ee_block == nearex->ee_block); - ext_debug("insert %u:%llu:[%d]%d after: " + ext_debug(inode, "insert %u:%llu:[%d]%d after: " "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), @@ -2102,7 +2103,7 @@ has_space: } len = EXT_LAST_EXTENT(eh) - nearex + 1; if (len > 0) { - ext_debug("insert %u:%llu:[%d]%d: " + ext_debug(inode, "insert %u:%llu:[%d]%d: " "move %d extents from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), @@ -2246,7 +2247,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, return; hole_len = min(es.es_lblk - hole_start, hole_len); } - ext_debug(" -> %u:%u\n", hole_start, hole_len); + ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); ext4_es_insert_extent(inode, hole_start, hole_len, ~0, EXTENT_STATUS_HOLE); } @@ -2283,7 +2284,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_dirty(handle, inode, path); if (err) return err; - ext_debug("index is empty, remove it, free block %llu\n", leaf); + ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf); trace_ext4_ext_rm_idx(inode, leaf); ext4_free_blocks(handle, inode, NULL, leaf, 1, @@ -2562,7 +2563,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ext4_fsblk_t pblk; /* the header must be checked already in ext4_ext_remove_space() */ - ext_debug("truncate since %u in leaf to %u\n", start, end); + ext_debug(inode, "truncate since %u in leaf to %u\n", start, end); if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; @@ -2588,7 +2589,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, else unwritten = 0; - ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, + ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block, unwritten, ex_ee_len); path[depth].p_ext = ex; @@ -2596,7 +2597,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, b = ex_ee_block+ex_ee_len - 1 < end ? ex_ee_block+ex_ee_len - 1 : end; - ext_debug(" border %u:%u\n", a, b); + ext_debug(inode, " border %u:%u\n", a, b); /* If this extent is beyond the end of the hole, skip it */ if (end < ex_ee_block) { @@ -2705,7 +2706,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (err) goto out; - ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num, + ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num, ext4_ext_pblock(ex)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); @@ -2782,7 +2783,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, partial.lblk = 0; partial.state = initial; - ext_debug("truncate since %u to %u\n", start, end); + ext_debug(inode, "truncate since %u to %u\n", start, end); /* probably first extent we're gonna free will be last in block */ handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE, @@ -2924,7 +2925,7 @@ again: /* this is index block */ if (!path[i].p_hdr) { - ext_debug("initialize header\n"); + ext_debug(inode, "initialize header\n"); path[i].p_hdr = ext_block_hdr(path[i].p_bh); } @@ -2932,7 +2933,7 @@ again: /* this level hasn't been touched yet */ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; - ext_debug("init index ptr: hdr 0x%p, num %d\n", + ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n", path[i].p_hdr, le16_to_cpu(path[i].p_hdr->eh_entries)); } else { @@ -2940,13 +2941,13 @@ again: path[i].p_idx--; } - ext_debug("level %d - index, first 0x%p, cur 0x%p\n", + ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n", i, EXT_FIRST_INDEX(path[i].p_hdr), path[i].p_idx); if (ext4_ext_more_to_rm(path + i)) { struct buffer_head *bh; /* go to the next level */ - ext_debug("move to level %d (block %llu)\n", + ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); bh = read_extent_tree_block(inode, @@ -2982,7 +2983,7 @@ again: brelse(path[i].p_bh); path[i].p_bh = NULL; i--; - ext_debug("return to level %d\n", i); + ext_debug(inode, "return to level %d\n", i); } } @@ -3150,8 +3151,7 @@ static int ext4_split_extent_at(handle_t *handle, BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); - ext_debug("ext4_split_extents_at: inode %lu, logical" - "block %llu\n", inode->i_ino, (unsigned long long)split); + ext_debug(inode, "logical block %llu\n", (unsigned long long)split); ext4_ext_show_leaf(inode, path); @@ -3388,9 +3388,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, int err = 0; int split_flag = EXT4_EXT_DATA_VALID2; - ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" - "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)map->m_lblk, map_len); + ext_debug(inode, "logical block %llu, max_blocks %u\n", + (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) @@ -3642,8 +3641,7 @@ static int ext4_split_convert_extents(handle_t *handle, unsigned int ee_len; int split_flag = 0, depth; - ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", - __func__, inode->i_ino, + ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)map->m_lblk, map->m_len); eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) @@ -3689,8 +3687,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" - "block %llu, max_blocks %u\n", inode->i_ino, + ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); /* If extent is larger than requested it is a clear sign that we still @@ -3760,8 +3757,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - ext_debug("%s: inode %lu, logical" - "block %llu, max_blocks %u\n", __func__, inode->i_ino, + ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); if (ee_block != map->m_lblk || ee_len > map->m_len) { @@ -3817,10 +3813,9 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, int ret = 0; int err = 0; - ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical " - "block %llu, max_blocks %u, flags %x, allocated %u\n", - inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, - flags, allocated); + ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n", + (unsigned long long)map->m_lblk, map->m_len, flags, + allocated); ext4_ext_show_leaf(inode, path); /* @@ -4057,8 +4052,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; ext4_lblk_t cluster_offset; - ext_debug("blocks %u/%u requested for inode %lu\n", - map->m_lblk, map->m_len, inode->i_ino); + ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len); trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* find extent for this block */ @@ -4105,8 +4099,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, newblock = map->m_lblk - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (map->m_lblk - ee_block); - ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, - ee_block, ee_len, newblock); + ext_debug(inode, "%u fit into %u:%d -> %llu\n", + map->m_lblk, ee_block, ee_len, newblock); /* * If the extent is initialized check whether the @@ -4246,7 +4240,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, goto out2; allocated_clusters = ar.len; ar.len = EXT4_C2B(sbi, ar.len) - offset; - ext_debug("allocate new block: goal %llu, found %llu/%u, requested %u\n", + ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n", ar.goal, newblock, ar.len, allocated); if (ar.len > allocated) ar.len = allocated; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0a52f98512d7..e7bf9388538b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -501,9 +501,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, #endif map->m_flags = 0; - ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," - "logical block %lu\n", inode->i_ino, flags, map->m_len, - (unsigned long) map->m_lblk); + ext_debug(inode, "flag 0x%x, max_blocks %u, logical block %lu\n", + flags, map->m_len, (unsigned long) map->m_lblk); /* * ext4_map_blocks returns an int, and m_len is an unsigned int @@ -734,8 +733,7 @@ out_sem: } if (retval < 0) - ext_debug("failed for inode %lu with err %d\n", - inode->i_ino, retval); + ext_debug(inode, "failed with err %d\n", retval); return retval; } @@ -1691,8 +1689,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, invalid_block = ~0; map->m_flags = 0; - ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," - "logical block %lu\n", inode->i_ino, map->m_len, + ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len, (unsigned long) map->m_lblk); /* Lookup extent status tree firstly */ -- cgit v1.2.3 From de8ff14cab998f51a3a289d2b58d6d440782f77e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 May 2020 14:52:52 -0700 Subject: ext4: add casefold flag to EXT4_INODE_* flags No one currently needs EXT4_INODE_CASEFOLD, but add it to keep the EXT4_INODE_* definitions in sync with the EXT4_*_FL definitions. Also make it clearer that the casefold flag is only for directories. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200510215252.87833-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 80866f124b9a..af60be906528 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -425,7 +425,7 @@ struct flex_groups { /* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */ #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ -#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ @@ -498,6 +498,7 @@ enum { /* 22 was formerly EXT4_INODE_EOFBLOCKS */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */ + EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */ EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ }; @@ -543,6 +544,7 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(EA_INODE); CHECK_FLAG_VALUE(INLINE_DATA); CHECK_FLAG_VALUE(PROJINHERIT); + CHECK_FLAG_VALUE(CASEFOLD); CHECK_FLAG_VALUE(RESERVED); } -- cgit v1.2.3 From 993778306e7901a7286322f25c7c681dd47bede6 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 20 May 2020 12:10:36 +0530 Subject: ext4: mballoc: use lock for checking free blocks while retrying Currently while doing block allocation grp->bb_free may be getting modified if discard is happening in parallel. For e.g. consider a case where there are lot of threads who have preallocated lot of blocks and there is a thread which is trying to discard all of this group's PA. Now it could happen that we see all of those group's bb_free is zero and fail the allocation while there is sufficient space if we free up all the PA. So this patch adds another flag "EXT4_MB_STRICT_CHECK" which will be set if we are unable to allocate any blocks in the first try (since we may not have considered blocks about to be discarded from PA lists). So during retry attempt to allocate blocks we will use ext4_lock_group() for checking if the group is good or not. Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/9cb740a117c958c36596f167b12af1beae9a68b7.1589955723.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/mballoc.c | 13 ++++++++++++- include/trace/events/ext4.h | 3 ++- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af60be906528..dbc36e377eb0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -150,6 +150,8 @@ enum SHIFT_DIRECTION { #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 /* Use blocks from reserved pool */ #define EXT4_MB_USE_RESERVED 0x2000 +/* Do strict check for free blocks while retrying block allocation */ +#define EXT4_MB_STRICT_CHECK 0x4000 struct ext4_allocation_request { /* target inode for block we're allocating */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c9297c878a90..a9083113a8c0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2176,9 +2176,13 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, ext4_group_t group, int cr) { struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); + struct super_block *sb = ac->ac_sb; + bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK; ext4_grpblk_t free; int ret = 0; + if (should_lock) + ext4_lock_group(sb, group); free = grp->bb_free; if (free == 0) goto out; @@ -2186,6 +2190,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, goto out; if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) goto out; + if (should_lock) + ext4_unlock_group(sb, group); /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { @@ -2194,8 +2200,12 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, return ret; } + if (should_lock) + ext4_lock_group(sb, group); ret = ext4_mb_good_group(ac, group, cr); out: + if (should_lock) + ext4_unlock_group(sb, group); return ret; } @@ -4610,7 +4620,8 @@ static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, goto out_dbg; } seq_retry = ext4_get_discard_pa_seq_sum(); - if (seq_retry != *seq) { + if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) { + ac->ac_flags |= EXT4_MB_STRICT_CHECK; *seq = seq_retry; ret = true; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 280475c1cecc..cc41d692ae8e 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -35,7 +35,8 @@ struct partial_cluster; { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ - { EXT4_MB_USE_RESERVED, "USE_RESV" }) + { EXT4_MB_USE_RESERVED, "USE_RESV" }, \ + { EXT4_MB_STRICT_CHECK, "STRICT_CHECK" }) #define show_map_flags(flags) __print_flags(flags, "|", \ { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ -- cgit v1.2.3 From 9f364e1d9537d44fdf58deeeddb51277d8327ce5 Mon Sep 17 00:00:00 2001 From: Jonathan Grant Date: Fri, 22 May 2020 16:07:58 +0100 Subject: add comment for ext4_dir_entry_2 file_type member Signed-off-by: Jonathan Grant Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/ad3290d5-86af-99c1-f9d5-cd1bab710429@jguk.org Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index dbc36e377eb0..a94e7aaea6e6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2061,7 +2061,7 @@ struct ext4_dir_entry_2 { __le32 inode; /* Inode number */ __le16 rec_len; /* Directory entry length */ __u8 name_len; /* Name length */ - __u8 file_type; + __u8 file_type; /* See file type macros EXT4_FT_* below */ char name[EXT4_NAME_LEN]; /* File name */ }; -- cgit v1.2.3 From 175efa81feb8405676e0136d97b10380179c92e0 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Tue, 5 May 2020 17:43:14 +0200 Subject: ext4: fix EXT4_MAX_LOGICAL_BLOCK macro ext4 supports max number of logical blocks in a file to be 0xffffffff. (This is since ext4_extent's ee_block is __le32). This means that EXT4_MAX_LOGICAL_BLOCK should be 0xfffffffe (starting from 0 logical offset). This patch fixes this. The issue was seen when ext4 moved to iomap_fiemap API and when overlayfs was mounted on top of ext4. Since overlayfs was missing filemap_check_ranges(), so it could pass a arbitrary huge length which lead to overflow of map.m_len logic. This patch fixes that. Fixes: d3b6f23f7167 ("ext4: move ext4_fiemap to use iomap framework") Reported-by: syzbot+77fa5bdb65cc39711820@syzkaller.appspotmail.com Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200505154324.3226743-2-hch@lst.de Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a94e7aaea6e6..1eb07ca91fca 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -733,7 +733,7 @@ enum { #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF /* Max logical block we can support */ -#define EXT4_MAX_LOGICAL_BLOCK 0xFFFFFFFF +#define EXT4_MAX_LOGICAL_BLOCK 0xFFFFFFFE /* * Structure of an inode on the disk -- cgit v1.2.3 From 10c5db286452b8c60e8f58e9a4c1cbc5a91e4e5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 May 2020 09:30:11 +0200 Subject: fs: move the fiemap definitions out of fs.h No need to pull the fiemap definitions into almost every file in the kernel build. Signed-off-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200523073016.2944131-5-hch@lst.de Signed-off-by: Theodore Ts'o --- fs/bad_inode.c | 1 + fs/btrfs/extent_io.h | 1 + fs/cifs/inode.c | 1 + fs/cifs/smb2ops.c | 1 + fs/ext2/inode.c | 1 + fs/ext4/ext4.h | 1 + fs/f2fs/data.c | 1 + fs/f2fs/inline.c | 1 + fs/gfs2/inode.c | 1 + fs/hpfs/file.c | 1 + fs/ioctl.c | 1 + fs/iomap/fiemap.c | 1 + fs/nilfs2/inode.c | 1 + fs/overlayfs/inode.c | 1 + fs/xfs/xfs_iops.c | 1 + include/linux/fiemap.h | 24 ++++++++++++++++++++++++ include/linux/fs.h | 19 +------------------ include/uapi/linux/fiemap.h | 6 +++--- 18 files changed, 43 insertions(+), 21 deletions(-) create mode 100644 include/linux/fiemap.h (limited to 'fs/ext4/ext4.h') diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 8035d2a44561..54f0ce444272 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -15,6 +15,7 @@ #include #include #include +#include static int bad_file_open(struct inode *inode, struct file *filp) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2ed65bd0760e..817698bc0669 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -5,6 +5,7 @@ #include #include +#include #include "ulist.h" /* diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 390d2b15ef6e..3f276eb8ca68 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "cifsfs.h" diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f829f4165d38..09047f1ddfb6 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifsglob.h" #include "smb2pdu.h" diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c885cf7d724b..0f12a0e8a8d9 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xattr.h" diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1eb07ca91fca..9e5c332a2b94 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include #endif diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdf2f626bea7..25abbbb65ba0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4167e5408151..9686ffea177e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -8,6 +8,7 @@ #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 70b2d3a1e866..4842f313a808 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "gfs2.h" diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index b36abf9cb345..62959a8e43ad 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -9,6 +9,7 @@ #include "hpfs_fn.h" #include +#include #define BLOCKS(size) (((size) + 511) >> 9) diff --git a/fs/ioctl.c b/fs/ioctl.c index 8fe5131b1dee..3f300cc07dee 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "internal.h" diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index d55e8f491a5e..0a807bbb2b4a 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -6,6 +6,7 @@ #include #include #include +#include struct fiemap_ctx { struct fiemap_extent_info *fi; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 671085512e0f..6e1aca38931f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "nilfs.h" #include "btnode.h" #include "segment.h" diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b0d42ece4d7c..b5fec3410556 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "overlayfs.h" diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f7a99b3bbcf7..44c353998ac5 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * Directories have different lock order w.r.t. mmap_sem compared to regular diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h new file mode 100644 index 000000000000..240d4f7d9116 --- /dev/null +++ b/include/linux/fiemap.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FIEMAP_H +#define _LINUX_FIEMAP_H 1 + +#include +#include + +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent __user *fi_extents_start; /* Start of + fiemap_extent array */ +}; + +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags); +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + +int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, u64 len, + get_block_t *get_block); + +#endif /* _LINUX_FIEMAP_H 1 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3104c6f7527b..09bcd329c062 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -48,6 +47,7 @@ struct backing_dev_info; struct bdi_writeback; struct bio; struct export_operations; +struct fiemap_extent_info; struct hd_geometry; struct iovec; struct kiocb; @@ -1745,19 +1745,6 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, extern void inode_init_owner(struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); -/* - * VFS FS_IOC_FIEMAP helper definitions. - */ -struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent __user *fi_extents_start; /* Start of - fiemap_extent array */ -}; -int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, - u64 phys, u64 len, u32 flags); -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); /* * This is the "filldir" function type, used by readdir() to let @@ -3299,10 +3286,6 @@ static inline int vfs_fstat(int fd, struct kstat *stat) extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); -extern int generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block); - extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 7a900b2377b6..24ca0c00cae3 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -9,8 +9,8 @@ * Andreas Dilger */ -#ifndef _LINUX_FIEMAP_H -#define _LINUX_FIEMAP_H +#ifndef _UAPI_LINUX_FIEMAP_H +#define _UAPI_LINUX_FIEMAP_H #include @@ -67,4 +67,4 @@ struct fiemap { #define FIEMAP_EXTENT_SHARED 0x00002000 /* Space shared with other * files. */ -#endif /* _LINUX_FIEMAP_H */ +#endif /* _UAPI_LINUX_FIEMAP_H */ -- cgit v1.2.3