From 9e52484c713321e84e8834803a44ca0a001376d2 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Wed, 15 Apr 2020 16:31:39 -0400 Subject: ext4: remove EXT4_GET_BLOCKS_KEEP_SIZE flag The eofblocks code was removed in the 5.7 release by "ext4: remove EOFBLOCKS_FL and associated code" (4337ecd1fe99). The ext4_map_blocks() flag used to trigger it can now be removed as well. Signed-off-by: Eric Whitney Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200415203140.30349-2-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 19c87661eeec..40ff8a2fc763 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -45,7 +45,6 @@ struct partial_cluster; { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ - { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ { EXT4_GET_BLOCKS_ZERO, "ZERO" }) /* -- cgit v1.2.3 From 493e83aafa02316bc79ec90041c378d7902194fa Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Wed, 15 Apr 2020 16:31:40 -0400 Subject: ext4: translate a few more map flags to strings in tracepoints As new ext4_map_blocks() flags have been added, not all have gotten flag bit to string translations to make tracepoint output more readable. Fix that, and go one step further by adding a translation for the EXT4_EX_NOCACHE flag as well. The EXT4_EX_FORCE_CACHE flag can never be set in a tracepoint in the current code, so there's no need to bother with a translation for it right now. Signed-off-by: Eric Whitney Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200415203140.30349-3-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 40ff8a2fc763..280475c1cecc 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -45,7 +45,10 @@ struct partial_cluster; { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ - { EXT4_GET_BLOCKS_ZERO, "ZERO" }) + { EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, "CONVERT_UNWRITTEN" }, \ + { EXT4_GET_BLOCKS_ZERO, "ZERO" }, \ + { EXT4_GET_BLOCKS_IO_SUBMIT, "IO_SUBMIT" }, \ + { EXT4_EX_NOCACHE, "EX_NOCACHE" }) /* * __print_flags() requires that all enum values be wrapped in the -- cgit v1.2.3 From 4301efa4c7cca11556dd89899eee066d49b47bf7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Apr 2020 10:54:44 +0200 Subject: writeback: Export inode_io_list_del() Ext4 needs to remove inode from writeback lists after it is out of visibility of its journalling machinery (which can still dirty the inode). Export inode_io_list_del() for it. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200421085445.5731-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/fs-writeback.c | 1 + fs/internal.h | 2 -- include/linux/writeback.h | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 76ac9c7d32ec..e58bd5f758d0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1126,6 +1126,7 @@ void inode_io_list_del(struct inode *inode) inode_io_list_del_locked(inode, wb); spin_unlock(&wb->list_lock); } +EXPORT_SYMBOL(inode_io_list_del); /* * mark an inode as under writeback on the sb diff --git a/fs/internal.h b/fs/internal.h index aa5d45524e87..8819d0d58b03 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -143,8 +143,6 @@ extern int dentry_needs_remove_privs(struct dentry *dentry); /* * fs-writeback.c */ -extern void inode_io_list_del(struct inode *inode); - extern long get_nr_dirty_inodes(void); extern int invalidate_inodes(struct super_block *, bool); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a19d845dd7eb..902aa317621b 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -197,6 +197,7 @@ void wakeup_flusher_threads(enum wb_reason reason); void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); +void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) -- cgit v1.2.3 From 993778306e7901a7286322f25c7c681dd47bede6 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 20 May 2020 12:10:36 +0530 Subject: ext4: mballoc: use lock for checking free blocks while retrying Currently while doing block allocation grp->bb_free may be getting modified if discard is happening in parallel. For e.g. consider a case where there are lot of threads who have preallocated lot of blocks and there is a thread which is trying to discard all of this group's PA. Now it could happen that we see all of those group's bb_free is zero and fail the allocation while there is sufficient space if we free up all the PA. So this patch adds another flag "EXT4_MB_STRICT_CHECK" which will be set if we are unable to allocate any blocks in the first try (since we may not have considered blocks about to be discarded from PA lists). So during retry attempt to allocate blocks we will use ext4_lock_group() for checking if the group is good or not. Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/9cb740a117c958c36596f167b12af1beae9a68b7.1589955723.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/mballoc.c | 13 ++++++++++++- include/trace/events/ext4.h | 3 ++- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af60be906528..dbc36e377eb0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -150,6 +150,8 @@ enum SHIFT_DIRECTION { #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 /* Use blocks from reserved pool */ #define EXT4_MB_USE_RESERVED 0x2000 +/* Do strict check for free blocks while retrying block allocation */ +#define EXT4_MB_STRICT_CHECK 0x4000 struct ext4_allocation_request { /* target inode for block we're allocating */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c9297c878a90..a9083113a8c0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2176,9 +2176,13 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, ext4_group_t group, int cr) { struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); + struct super_block *sb = ac->ac_sb; + bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK; ext4_grpblk_t free; int ret = 0; + if (should_lock) + ext4_lock_group(sb, group); free = grp->bb_free; if (free == 0) goto out; @@ -2186,6 +2190,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, goto out; if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) goto out; + if (should_lock) + ext4_unlock_group(sb, group); /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { @@ -2194,8 +2200,12 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, return ret; } + if (should_lock) + ext4_lock_group(sb, group); ret = ext4_mb_good_group(ac, group, cr); out: + if (should_lock) + ext4_unlock_group(sb, group); return ret; } @@ -4610,7 +4620,8 @@ static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, goto out_dbg; } seq_retry = ext4_get_discard_pa_seq_sum(); - if (seq_retry != *seq) { + if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) { + ac->ac_flags |= EXT4_MB_STRICT_CHECK; *seq = seq_retry; ret = true; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 280475c1cecc..cc41d692ae8e 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -35,7 +35,8 @@ struct partial_cluster; { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ - { EXT4_MB_USE_RESERVED, "USE_RESV" }) + { EXT4_MB_USE_RESERVED, "USE_RESV" }, \ + { EXT4_MB_STRICT_CHECK, "STRICT_CHECK" }) #define show_map_flags(flags) __print_flags(flags, "|", \ { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ -- cgit v1.2.3 From 44ebcd06bbb3ab3ee446b933800aca32fc4ca9b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 May 2020 09:30:10 +0200 Subject: fs: mark __generic_block_fiemap static There is no caller left outside of ioctl.c. Signed-off-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200523073016.2944131-4-hch@lst.de Signed-off-by: Theodore Ts'o --- fs/ioctl.c | 4 +--- include/linux/fs.h | 4 ---- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/fs/ioctl.c b/fs/ioctl.c index 5e80b40bc1b5..8fe5131b1dee 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -307,8 +307,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) * If you use this function directly, you need to do your own locking. Use * generic_block_fiemap if you want the locking done for you. */ - -int __generic_block_fiemap(struct inode *inode, +static int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, get_block_t *get_block) { @@ -453,7 +452,6 @@ int __generic_block_fiemap(struct inode *inode, return ret; } -EXPORT_SYMBOL(__generic_block_fiemap); /** * generic_block_fiemap - FIEMAP for block based inodes diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..3104c6f7527b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3299,10 +3299,6 @@ static inline int vfs_fstat(int fd, struct kstat *stat) extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); -extern int __generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - loff_t start, loff_t len, - get_block_t *get_block); extern int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, get_block_t *get_block); -- cgit v1.2.3 From 10c5db286452b8c60e8f58e9a4c1cbc5a91e4e5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 May 2020 09:30:11 +0200 Subject: fs: move the fiemap definitions out of fs.h No need to pull the fiemap definitions into almost every file in the kernel build. Signed-off-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200523073016.2944131-5-hch@lst.de Signed-off-by: Theodore Ts'o --- fs/bad_inode.c | 1 + fs/btrfs/extent_io.h | 1 + fs/cifs/inode.c | 1 + fs/cifs/smb2ops.c | 1 + fs/ext2/inode.c | 1 + fs/ext4/ext4.h | 1 + fs/f2fs/data.c | 1 + fs/f2fs/inline.c | 1 + fs/gfs2/inode.c | 1 + fs/hpfs/file.c | 1 + fs/ioctl.c | 1 + fs/iomap/fiemap.c | 1 + fs/nilfs2/inode.c | 1 + fs/overlayfs/inode.c | 1 + fs/xfs/xfs_iops.c | 1 + include/linux/fiemap.h | 24 ++++++++++++++++++++++++ include/linux/fs.h | 19 +------------------ include/uapi/linux/fiemap.h | 6 +++--- 18 files changed, 43 insertions(+), 21 deletions(-) create mode 100644 include/linux/fiemap.h (limited to 'include') diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 8035d2a44561..54f0ce444272 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -15,6 +15,7 @@ #include #include #include +#include static int bad_file_open(struct inode *inode, struct file *filp) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2ed65bd0760e..817698bc0669 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -5,6 +5,7 @@ #include #include +#include #include "ulist.h" /* diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 390d2b15ef6e..3f276eb8ca68 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "cifsfs.h" diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f829f4165d38..09047f1ddfb6 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifsglob.h" #include "smb2pdu.h" diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c885cf7d724b..0f12a0e8a8d9 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xattr.h" diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1eb07ca91fca..9e5c332a2b94 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include #endif diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdf2f626bea7..25abbbb65ba0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4167e5408151..9686ffea177e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -8,6 +8,7 @@ #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 70b2d3a1e866..4842f313a808 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "gfs2.h" diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index b36abf9cb345..62959a8e43ad 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -9,6 +9,7 @@ #include "hpfs_fn.h" #include +#include #define BLOCKS(size) (((size) + 511) >> 9) diff --git a/fs/ioctl.c b/fs/ioctl.c index 8fe5131b1dee..3f300cc07dee 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "internal.h" diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index d55e8f491a5e..0a807bbb2b4a 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -6,6 +6,7 @@ #include #include #include +#include struct fiemap_ctx { struct fiemap_extent_info *fi; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 671085512e0f..6e1aca38931f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "nilfs.h" #include "btnode.h" #include "segment.h" diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b0d42ece4d7c..b5fec3410556 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "overlayfs.h" diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f7a99b3bbcf7..44c353998ac5 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * Directories have different lock order w.r.t. mmap_sem compared to regular diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h new file mode 100644 index 000000000000..240d4f7d9116 --- /dev/null +++ b/include/linux/fiemap.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FIEMAP_H +#define _LINUX_FIEMAP_H 1 + +#include +#include + +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent __user *fi_extents_start; /* Start of + fiemap_extent array */ +}; + +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags); +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + +int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, u64 len, + get_block_t *get_block); + +#endif /* _LINUX_FIEMAP_H 1 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3104c6f7527b..09bcd329c062 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -48,6 +47,7 @@ struct backing_dev_info; struct bdi_writeback; struct bio; struct export_operations; +struct fiemap_extent_info; struct hd_geometry; struct iovec; struct kiocb; @@ -1745,19 +1745,6 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, extern void inode_init_owner(struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); -/* - * VFS FS_IOC_FIEMAP helper definitions. - */ -struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent __user *fi_extents_start; /* Start of - fiemap_extent array */ -}; -int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, - u64 phys, u64 len, u32 flags); -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); /* * This is the "filldir" function type, used by readdir() to let @@ -3299,10 +3286,6 @@ static inline int vfs_fstat(int fd, struct kstat *stat) extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); -extern int generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block); - extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 7a900b2377b6..24ca0c00cae3 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -9,8 +9,8 @@ * Andreas Dilger */ -#ifndef _LINUX_FIEMAP_H -#define _LINUX_FIEMAP_H +#ifndef _UAPI_LINUX_FIEMAP_H +#define _UAPI_LINUX_FIEMAP_H #include @@ -67,4 +67,4 @@ struct fiemap { #define FIEMAP_EXTENT_SHARED 0x00002000 /* Space shared with other * files. */ -#endif /* _LINUX_FIEMAP_H */ +#endif /* _UAPI_LINUX_FIEMAP_H */ -- cgit v1.2.3 From 2732881894714f545ffac42dad7ba7730069874d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 May 2020 09:30:12 +0200 Subject: iomap: fix the iomap_fiemap prototype iomap_fiemap should take u64 start and len arguments, just like the ->fiemap prototype. Signed-off-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200523073016.2944131-6-hch@lst.de Signed-off-by: Theodore Ts'o --- fs/iomap/fiemap.c | 2 +- include/linux/iomap.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index 0a807bbb2b4a..449705575acf 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -66,7 +66,7 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, } int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, - loff_t start, loff_t len, const struct iomap_ops *ops) + u64 start, u64 len, const struct iomap_ops *ops) { struct fiemap_ctx ctx; loff_t ret; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 8b09463dae0d..63db02528b70 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -178,7 +178,7 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - loff_t start, loff_t len, const struct iomap_ops *ops); + u64 start, u64 len, const struct iomap_ops *ops); loff_t iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops); loff_t iomap_seek_data(struct inode *inode, loff_t offset, -- cgit v1.2.3 From cddf8a2c4a8286ae60fc866eab59c8bc524e93a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 May 2020 09:30:13 +0200 Subject: fs: move fiemap range validation into the file systems instances Replace fiemap_check_flags with a fiemap_prep helper that also takes the inode and mapped range, and performs the sanity check and truncation previously done in fiemap_check_range. This way the validation is inside the file system itself and thus properly works for the stacked overlayfs case as well. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200523073016.2944131-7-hch@lst.de Signed-off-by: Theodore Ts'o --- Documentation/filesystems/fiemap.txt | 12 ++++--- fs/btrfs/inode.c | 2 +- fs/cifs/smb2ops.c | 6 ++-- fs/ext4/extents.c | 5 +-- fs/f2fs/data.c | 3 +- fs/ioctl.c | 63 ++++++++++++++---------------------- fs/iomap/fiemap.c | 2 +- fs/nilfs2/inode.c | 2 +- fs/ocfs2/extent_map.c | 3 +- include/linux/fiemap.h | 3 +- 10 files changed, 47 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt index ac87e6fda842..35c8571eccb6 100644 --- a/Documentation/filesystems/fiemap.txt +++ b/Documentation/filesystems/fiemap.txt @@ -203,16 +203,18 @@ EINTR once fatal signal received. Flag checking should be done at the beginning of the ->fiemap callback via the -fiemap_check_flags() helper: +fiemap_prep() helper: -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); +int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 *len, u32 supported_flags); The struct fieinfo should be passed in as received from ioctl_fiemap(). The set of fiemap flags which the fs understands should be passed via fs_flags. If -fiemap_check_flags finds invalid user flags, it will place the bad values in +fiemap_prep finds invalid user flags, it will place the bad values in fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from -fiemap_check_flags(), it should immediately exit, returning that error back to -ioctl_fiemap(). +fiemap_prep(), it should immediately exit, returning that error back to +ioctl_fiemap(). Additionally the range is validate against the supported +maximum file size. For each extent in the request range, the file system should call diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 320d1062068d..1f1ec361089b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8250,7 +8250,7 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, { int ret; - ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS); + ret = fiemap_prep(inode, fieinfo, start, &len, BTRFS_FIEMAP_FLAGS); if (ret) return ret; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 09047f1ddfb6..828e53e795c6 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3408,8 +3408,10 @@ static int smb3_fiemap(struct cifs_tcon *tcon, int i, num, rc, flags, last_blob; u64 next; - if (fiemap_check_flags(fei, FIEMAP_FLAG_SYNC)) - return -EBADR; + rc = fiemap_prep(d_inode(cfile->dentry), fei, start, &len, + FIEMAP_FLAG_SYNC); + if (rc) + return rc; xid = get_xid(); again: diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b365c8b407c4..cea083efb650 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4938,8 +4938,9 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; } - if (fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)) - return -EBADR; + error = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC); + if (error) + return error; error = ext4_fiemap_check_ranges(inode, start, &len); if (error) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 25abbbb65ba0..03faafc591b1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1825,7 +1825,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return ret; } - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); + ret = fiemap_prep(inode, fieinfo, start, &len, + FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); if (ret) return ret; diff --git a/fs/ioctl.c b/fs/ioctl.c index 3f300cc07dee..56bbf02209ae 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -149,61 +149,50 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, EXPORT_SYMBOL(fiemap_fill_next_extent); /** - * fiemap_check_flags - check validity of requested flags for fiemap + * fiemap_prep - check validity of requested flags for fiemap + * @inode: Inode to operate on * @fieinfo: Fiemap context passed into ->fiemap - * @fs_flags: Set of fiemap flags that the file system understands + * @start: Start of the mapped range + * @len: Length of the mapped range, can be truncated by this function. + * @supported_flags: Set of fiemap flags that the file system understands * - * Called from file system ->fiemap callback. This will compute the - * intersection of valid fiemap flags and those that the fs supports. That - * value is then compared against the user supplied flags. In case of bad user - * flags, the invalid values will be written into the fieinfo structure, and - * -EBADR is returned, which tells ioctl_fiemap() to return those values to - * userspace. For this reason, a return code of -EBADR should be preserved. + * This function must be called from each ->fiemap instance to validate the + * fiemap request against the file system parameters. * - * Returns 0 on success, -EBADR on bad flags. + * Returns 0 on success, or a negative error on failure. */ -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) +int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 *len, u32 supported_flags) { + u64 maxbytes = inode->i_sb->s_maxbytes; u32 incompat_flags; - incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); - if (incompat_flags) { - fieinfo->fi_flags = incompat_flags; - return -EBADR; - } - return 0; -} -EXPORT_SYMBOL(fiemap_check_flags); - -static int fiemap_check_ranges(struct super_block *sb, - u64 start, u64 len, u64 *new_len) -{ - u64 maxbytes = (u64) sb->s_maxbytes; - - *new_len = len; - - if (len == 0) + if (*len == 0) return -EINVAL; - if (start > maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ - if (len > maxbytes || (maxbytes - len) < start) - *new_len = maxbytes - start; + if (*len > maxbytes || (maxbytes - *len) < start) + *len = maxbytes - start; + supported_flags &= FIEMAP_FLAGS_COMPAT; + incompat_flags = fieinfo->fi_flags & ~supported_flags; + if (incompat_flags) { + fieinfo->fi_flags = incompat_flags; + return -EBADR; + } return 0; } +EXPORT_SYMBOL(fiemap_prep); static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) { struct fiemap fiemap; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = file_inode(filp); - struct super_block *sb = inode->i_sb; - u64 len; int error; if (!inode->i_op->fiemap) @@ -215,11 +204,6 @@ static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) return -EINVAL; - error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, - &len); - if (error) - return error; - fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; fieinfo.fi_extents_start = ufiemap->fm_extents; @@ -232,7 +216,8 @@ static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) filemap_write_and_wait(inode->i_mapping); - error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); + error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, + fiemap.fm_length); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) @@ -320,7 +305,7 @@ static int __generic_block_fiemap(struct inode *inode, bool past_eof = false, whole_file = false; int ret = 0; - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC); if (ret) return ret; diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index 449705575acf..89dca4a97e4a 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -75,7 +75,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, ctx.fi = fi; ctx.prev.type = IOMAP_HOLE; - ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC); + ret = fiemap_prep(inode, fi, start, &len, FIEMAP_FLAG_SYNC); if (ret) return ret; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6e1aca38931f..052c2da11e4d 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1006,7 +1006,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, unsigned int blkbits = inode->i_blkbits; int ret, n; - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC); if (ret) return ret; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index e3e2d1b2af51..3744179b73fa 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -746,7 +746,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct buffer_head *di_bh = NULL; struct ocfs2_extent_rec rec; - ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); + ret = fiemap_prep(inode, fieinfo, map_start, &map_len, + OCFS2_FIEMAP_FLAGS); if (ret) return ret; diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index 240d4f7d9116..4e624c466583 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -13,9 +13,10 @@ struct fiemap_extent_info { fiemap_extent array */ }; +int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 *len, u32 supported_flags); int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, u64 phys, u64 len, u32 flags); -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, -- cgit v1.2.3