diff options
author | Jan Kara <jack@suse.cz> | 2013-08-17 10:09:31 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-08-17 10:09:31 -0400 |
commit | 90e775b71ac4e685898c7995756fe58c135adaa6 (patch) | |
tree | e7e84b7990ad61717808830726b0fce4e9c0464a /fs/ext4/ext4.h | |
parent | 5208386c501276df18fee464e21d3c58d2d79517 (diff) | |
download | lwn-90e775b71ac4e685898c7995756fe58c135adaa6.tar.gz lwn-90e775b71ac4e685898c7995756fe58c135adaa6.zip |
ext4: fix lost truncate due to race with writeback
The following race can lead to a loss of i_disksize update from truncate
thus resulting in a wrong inode size if the inode size isn't updated
again before inode is reclaimed:
ext4_setattr() mpage_map_and_submit_extent()
EXT4_I(inode)->i_disksize = attr->ia_size;
... ...
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT
/* False because i_size isn't
* updated yet */
if (disksize > i_size_read(inode))
/* True, because i_disksize is
* already truncated */
if (disksize > EXT4_I(inode)->i_disksize)
/* Overwrite i_disksize
* update from truncate */
ext4_update_i_disksize()
i_size_write(inode, attr->ia_size);
For other places updating i_disksize such race cannot happen because
i_mutex prevents these races. Writeback is the only place where we do
not hold i_mutex and we cannot grab it there because of lock ordering.
We fix the race by doing both i_disksize and i_size update in truncate
atomically under i_data_sem and in mpage_map_and_submit_extent() we move
the check against i_size under i_data_sem as well.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
Diffstat (limited to 'fs/ext4/ext4.h')
-rw-r--r-- | fs/ext4/ext4.h | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 58dede76f75f..3dbc56eb4849 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2432,16 +2432,32 @@ do { \ #define EXT4_FREECLUSTERS_WATERMARK 0 #endif +/* Update i_disksize. Requires i_mutex to avoid races with truncate */ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) { - /* - * XXX: replace with spinlock if seen contended -bzzz - */ + WARN_ON_ONCE(S_ISREG(inode->i_mode) && + !mutex_is_locked(&inode->i_mutex)); + down_write(&EXT4_I(inode)->i_data_sem); + if (newsize > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = newsize; + up_write(&EXT4_I(inode)->i_data_sem); +} + +/* + * Update i_disksize after writeback has been started. Races with truncate + * are avoided by checking i_size under i_data_sem. + */ +static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize) +{ + loff_t i_size; + down_write(&EXT4_I(inode)->i_data_sem); + i_size = i_size_read(inode); + if (newsize > i_size) + newsize = i_size; if (newsize > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = newsize; up_write(&EXT4_I(inode)->i_data_sem); - return ; } struct ext4_group_info { |