From 2cf66b9de406dadbe7598618aa4541261d7bf536 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 11 Jul 2024 09:46:32 +0800 Subject: f2fs: clean up data_blkaddr() and get_dnode_addr() Introudce a new help get_dnode_base() to wrap common code from get_dnode_addr() and data_blkaddr() for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ac19c61f0c3e..ee030a0b093f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2900,26 +2900,27 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t data_blkaddr(struct inode *inode, - struct page *node_page, unsigned int offset) +static inline unsigned int get_dnode_base(struct inode *inode, + struct page *node_page) { - struct f2fs_node *raw_node; - __le32 *addr_array; - int base = 0; - bool is_inode = IS_INODE(node_page); + if (!IS_INODE(node_page)) + return 0; - raw_node = F2FS_NODE(node_page); + return inode ? get_extra_isize(inode) : + offset_in_addr(&F2FS_NODE(node_page)->i); +} - if (is_inode) { - if (!inode) - /* from GC path only */ - base = offset_in_addr(&raw_node->i); - else if (f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + return blkaddr_in_node(F2FS_NODE(node_page)) + + get_dnode_base(inode, node_page); +} - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[base + offset]); +static inline block_t data_blkaddr(struct inode *inode, + struct page *node_page, unsigned int offset) +{ + return le32_to_cpu(*(get_dnode_addr(inode, node_page) + offset)); } static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) @@ -3292,8 +3293,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { __le32 *addr = get_dnode_addr(inode, page); @@ -3432,17 +3431,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page) -{ - int base = 0; - - if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - - return blkaddr_in_node(F2FS_NODE(node_page)) + base; -} - #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) -- cgit v1.2.3 From 47f268f33dff4a5e31541a990dc09f116f80e61c Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jul 2024 10:05:44 -0700 Subject: f2fs: prevent possible int overflow in dir_block_index() The result of multiplication between values derived from functions dir_buckets() and bucket_blocks() *could* technically reach 2^30 * 2^2 = 2^32. While unlikely to happen, it is prudent to ensure that it will not lead to integer overflow. Thus, use mul_u32_u32() as it's more appropriate to mitigate the issue. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 3843154598a0 ("f2fs: introduce large directory support") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cbd7a5e96a37..14900ca8a9ff 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -166,7 +166,8 @@ static unsigned long dir_block_index(unsigned int level, unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i, dir_level) * bucket_blocks(i); + bidx += mul_u32_u32(dir_buckets(i, dir_level), + bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } -- cgit v1.2.3 From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jul 2024 10:28:38 -0700 Subject: f2fs: fix several potential integer overflows in file offsets When dealing with large extents and calculating file offsets by summing up according extent offsets and lengths of unsigned int type, one may encounter possible integer overflow if the values are big enough. Prevent this from happening by expanding one of the addends to (pgoff_t) type. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: d323d005ac4a ("f2fs: support file defragment") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/file.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index fd1fc06359ee..62ac440d9416 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - if (fofs < et->largest.fofs + et->largest.len && + if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; @@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (type == EX_READ && et->largest.fofs <= pgofs && - et->largest.fofs + et->largest.len > pgofs) { + (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 168f08507004..c598cfe5e0ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } -- cgit v1.2.3 From 50438dbc483ca6a133d2bce9d5d6747bcee38371 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jul 2024 10:51:58 -0700 Subject: f2fs: avoid potential int overflow in sanity_check_area_boundary() While calculating the end addresses of main area and segment 0, u32 may be not enough to hold the result without the danger of int overflow. Just in case, play it safe and cast one of the operands to a wider type (u64). Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: fd694733d523 ("f2fs: cover large section in sanity check of super") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3959fd137cc9..4d8f38ca6fcd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3356,9 +3356,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, u32 segment_count = le32_to_cpu(raw_super->segment_count); u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); u64 main_end_blkaddr = main_blkaddr + - (segment_count_main << log_blocks_per_seg); + ((u64)segment_count_main << log_blocks_per_seg); u64 seg_end_blkaddr = segment0_blkaddr + - (segment_count << log_blocks_per_seg); + ((u64)segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", -- cgit v1.2.3 From 8444ce524947daf441546b5b3a0c418706dade35 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Mon, 15 Jul 2024 20:34:51 +0800 Subject: f2fs: add write priority option based on zone UFS Currently, we are using a mix of traditional UFS and zone UFS to support some functionalities that cannot be achieved on zone UFS alone. However, there are some issues with this approach. There exists a significant performance difference between traditional UFS and zone UFS. Under normal usage, we prioritize writes to zone UFS. However, in critical conditions (such as when the entire UFS is almost full), we cannot determine whether data will be written to traditional UFS or zone UFS. This can lead to significant performance fluctuations, which is not conducive to development and testing. To address this, we have added an option zlu_io_enable under sys with the following three modes: 1) zlu_io_enable == 0:Normal mode, prioritize writing to zone UFS; 2) zlu_io_enable == 1:Zone UFS only mode, only allow writing to zone UFS; 3) zlu_io_enable == 2:Traditional UFS priority mode, prioritize writing to traditional UFS. Signed-off-by: Liao Yuanhong Signed-off-by: Wu Bo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 14 ++++++++++++++ fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/segment.c | 25 ++++++++++++++++++++++++- fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 11 +++++++++++ 5 files changed, 58 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index cad6c3dc1f9c..3500920ab7ce 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -763,3 +763,17 @@ Date: November 2023 Contact: "Chao Yu" Description: It controls to enable/disable IO aware feature for background discard. By default, the value is 1 which indicates IO aware is on. + +What: /sys/fs/f2fs//blkzone_alloc_policy +Date: July 2024 +Contact: "Yuanhong Liao" +Description: The zone UFS we are currently using consists of two parts: + conventional zones and sequential zones. It can be used to control which part + to prioritize for writes, with a default value of 0. + + ======================== ========================================= + value description + blkzone_alloc_policy = 0 Prioritize writing to sequential zones + blkzone_alloc_policy = 1 Only allow writing to sequential zones + blkzone_alloc_policy = 2 Prioritize writing to conventional zones + ======================== ========================================= diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ee030a0b093f..ebcb059bd702 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -134,6 +134,12 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +enum blkzone_allocation_policy { + BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */ + BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */ + BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */ +}; + /* * An implementation of an rwsem that is explicitly unfair to readers. This * prevents priority inversion when a low-priority reader acquires the read lock @@ -1559,6 +1565,8 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ unsigned int max_open_zones; /* max open zone resources of the zoned device */ + /* For adjust the priority writing position of data in zone UFS */ + unsigned int blkzone_alloc_policy; #endif /* for node-related operations */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 78c3198a6308..3aee71c9f3c6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2686,17 +2686,40 @@ static int get_new_segment(struct f2fs_sb_info *sbi, goto got_it; } +#ifdef CONFIG_BLK_DEV_ZONED /* * If we format f2fs on zoned storage, let's try to get pinned sections * from beginning of the storage, which should be a conventional one. */ if (f2fs_sb_has_blkzoned(sbi)) { - segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); + /* Prioritize writing to conventional zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) + segno = 0; + else + segno = max(first_zoned_segno(sbi), *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } +#endif find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + +#ifdef CONFIG_BLK_DEV_ZONED + if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { + /* Write only to sequential zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { + hint = GET_SEC_FROM_SEG(sbi, first_zoned_segno(sbi)); + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + } else + secno = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + goto out_unlock; + } + } +#endif + if (secno >= MAIN_SECS(sbi)) { secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4d8f38ca6fcd..aa23e4cfd89e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4219,6 +4219,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->aligned_blksize = true; #ifdef CONFIG_BLK_DEV_ZONED sbi->max_open_zones = UINT_MAX; + sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ; #endif for (i = 0; i < max_devices; i++) { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index fee7ee45ceaa..63ff2d1647eb 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -627,6 +627,15 @@ out: } #endif +#ifdef CONFIG_BLK_DEV_ZONED + if (!strcmp(a->attr.name, "blkzone_alloc_policy")) { + if (t < BLKZONE_ALLOC_PRIOR_SEQ || t > BLKZONE_ALLOC_PRIOR_CONV) + return -EINVAL; + sbi->blkzone_alloc_policy = t; + return count; + } +#endif + #ifdef CONFIG_F2FS_FS_COMPRESSION if (!strcmp(a->attr.name, "compr_written_block") || !strcmp(a->attr.name, "compr_saved_block")) { @@ -1033,6 +1042,7 @@ F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif /* STAT_INFO ATTR */ @@ -1187,6 +1197,7 @@ static struct attribute *f2fs_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), + ATTR_LIST(blkzone_alloc_policy), #endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), -- cgit v1.2.3 From d1e1ff971d1aafeaedda69136e6974e3a8792cbd Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Sun, 21 Jul 2024 22:39:12 -0400 Subject: f2fs: fix macro definition on_f2fs_build_free_nids The macro on_f2fs_build_free_nids accepts a parameter nmi, but it was not used, rather the variable nm_i was directly used, which may be a local variable inside a function that calls the macros. Signed-off-by: Julian Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b72ef96f7e33..58721a55f173 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -20,7 +20,7 @@ #include "iostat.h" #include -#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; -- cgit v1.2.3 From d72750e4a7528b83a07b65335c35b454a98593d4 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Sun, 21 Jul 2024 22:39:13 -0400 Subject: f2fs: fix macro definition stat_inc_cp_count The macro stat_inc_cp_count accepts a parameter si, but it was not used, rather the variable sbi was directly used, which may be a local variable inside a function that calls the macros. Signed-off-by: Julian Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ebcb059bd702..0e181dde79c7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3983,7 +3983,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_call_count(sbi, foreground) \ atomic_inc(&sbi->cp_call_count[(foreground)]) -#define stat_inc_cp_count(si) (F2FS_STAT(sbi)->cp_count++) +#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) -- cgit v1.2.3 From 1a0bd289a5db1df8df8fab949633a0b8d3f235ee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Jun 2024 11:13:48 +0800 Subject: f2fs: atomic: fix to avoid racing w/ GC Case #1: SQLite App GC Thread Kworker Shrinker - f2fs_ioc_start_atomic_write - f2fs_ioc_commit_atomic_write - f2fs_commit_atomic_write - filemap_write_and_wait_range : write atomic_file's data to cow_inode echo 3 > drop_caches to drop atomic_file's cache. - f2fs_gc - gc_data_segment - move_data_page - set_page_dirty - writepages - f2fs_do_write_data_page : overwrite atomic_file's data to cow_inode - f2fs_down_write(&fi->i_gc_rwsem[WRITE]) - __f2fs_commit_atomic_write - f2fs_up_write(&fi->i_gc_rwsem[WRITE]) Case #2: SQLite App GC Thread Kworker - f2fs_ioc_start_atomic_write - __writeback_single_inode - do_writepages - f2fs_write_cache_pages - f2fs_write_single_data_page - f2fs_do_write_data_page : write atomic_file's data to cow_inode - f2fs_gc - gc_data_segment - move_data_page - set_page_dirty - writepages - f2fs_do_write_data_page : overwrite atomic_file's data to cow_inode - f2fs_ioc_commit_atomic_write In above cases racing in between atomic_write and GC, previous data in atomic_file may be overwrited to cow_file, result in data corruption. This patch introduces PAGE_PRIVATE_ATOMIC_WRITE bit flag in page.private, and use it to indicate that there is last dirty data in atomic file, and the data should be writebacked into cow_file, if the flag is not tagged in page, we should never write data across files. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 +++++++++- fs/f2fs/f2fs.h | 8 +++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6457e5bca9c9..be66b3a0e793 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2650,10 +2650,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; + bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ - if (f2fs_is_atomic_file(inode)) + atomic_commit = f2fs_is_atomic_file(inode) && + page_private_atomic(fio->page); + if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -2752,6 +2755,8 @@ got_it: f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page_folio(page), OPU); set_inode_flag(inode, FI_APPEND_WRITE); + if (atomic_commit) + clear_page_private_atomic(page); out_writepage: f2fs_put_dnode(&dn); out: @@ -3721,6 +3726,9 @@ static int f2fs_write_end(struct file *file, set_page_dirty(page); + if (f2fs_is_atomic_file(inode)) + set_page_private_atomic(page); + if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { f2fs_i_size_write(inode, pos + copied); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0e181dde79c7..7b00846af915 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1424,7 +1424,8 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION * bit 2 PAGE_PRIVATE_INLINE_INODE * bit 3 PAGE_PRIVATE_REF_RESOURCE - * bit 4- f2fs private data + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE + * bit 5- f2fs private data * * Layout B: lowest bit should be 0 * page.private is a wrapped pointer. @@ -1434,6 +1435,7 @@ enum { PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ PAGE_PRIVATE_MAX }; @@ -2404,14 +2406,17 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); static inline unsigned long get_page_private_data(struct page *page) { @@ -2443,6 +2448,7 @@ static inline void clear_page_private_all(struct page *page) clear_page_private_reference(page); clear_page_private_gcing(page); clear_page_private_inline(page); + clear_page_private_atomic(page); f2fs_bug_on(F2FS_P_SB(page), page_private(page)); } -- cgit v1.2.3 From aaf8c0b9ae042494cb4585883b15c1332de77840 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Jun 2024 09:47:27 +0800 Subject: f2fs: reduce expensive checkpoint trigger frequency We may trigger high frequent checkpoint for below case: 1. mkdir /mnt/dir1; set dir1 encrypted 2. touch /mnt/file1; fsync /mnt/file1 3. mkdir /mnt/dir2; set dir2 encrypted 4. touch /mnt/file2; fsync /mnt/file2 ... Although, newly created dir and file are not related, due to commit bbf156f7afa7 ("f2fs: fix lost xattrs of directories"), we will trigger checkpoint whenever fsync() comes after a new encrypted dir created. In order to avoid such performance regression issue, let's record an entry including directory's ino in global cache whenever we update directory's xattr data, and then triggerring checkpoint() only if xattr metadata of target file's parent was updated. This patch updates to cover below no encryption case as well: 1) parent is checkpointed 2) set_xattr(dir) w/ new xnid 3) create(file) 4) fsync(file) Fixes: bbf156f7afa7 ("f2fs: fix lost xattrs of directories") Reported-by: wangzijie Reported-by: Zhiguo Niu Tested-by: Zhiguo Niu Reported-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 3 +++ fs/f2fs/xattr.c | 14 ++++++++++++-- include/trace/events/f2fs.h | 3 ++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7b00846af915..f070a425a982 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -291,6 +291,7 @@ enum { APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -1161,6 +1162,7 @@ enum cp_reason_type { CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, CP_RECOVER_DIR, + CP_XATTR_DIR, }; enum iostat_type { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c598cfe5e0ed..e3bea575ddc3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -218,6 +218,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index f290fe9327c4..3f3874943679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -629,6 +629,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; int found, newsize; @@ -772,9 +773,18 @@ retry: if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - if (S_ISDIR(inode->i_mode)) - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); + if (!S_ISDIR(inode->i_mode)) + goto same; + /* + * In restrict mode, fsync() always try to trigger checkpoint for all + * metadata consistency, in other mode, it triggers checkpoint when + * parent's xattr metadata was updated. + */ + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + set_sbi_flag(sbi, SBI_NEED_CP); + else + f2fs_add_ino_entry(sbi, inode->i_ino, XATTR_DIR_INO); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ed794b5fefbe..2851c823095b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -139,7 +139,8 @@ TRACE_DEFINE_ENUM(EX_BLOCK_AGE); { CP_NODE_NEED_CP, "node needs cp" }, \ { CP_FASTBOOT_MODE, "fastboot mode" }, \ { CP_SPEC_LOG_NUM, "log type is 2" }, \ - { CP_RECOVER_DIR, "dir needs recovery" }) + { CP_RECOVER_DIR, "dir needs recovery" }, \ + { CP_XATTR_DIR, "dir's xattr updated" }) #define show_shutdown_mode(type) \ __print_symbolic(type, \ -- cgit v1.2.3 From 96cfeb0389530ae32ade8a48ae3ae1ac3b6c009d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Jun 2024 15:17:11 +0800 Subject: f2fs: fix to wait dio completion It should wait all existing dio write IOs before block removal, otherwise, previous direct write IO may overwrite data in the block which may be reused by other inode. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e3bea575ddc3..67e5c6060403 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1055,6 +1055,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -1891,6 +1898,12 @@ static long f2fs_fallocate(struct file *file, int mode, if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; -- cgit v1.2.3 From 0cac51185e65dc2a20686184e02f3cafc99eb202 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Jun 2024 15:15:21 +0800 Subject: f2fs: fix to avoid racing in between read and OPU dio write If lfs mode is on, buffered read may race w/ OPU dio write as below, it may cause buffered read hits unwritten data unexpectly, and for dio read, the race condition exists as well. Thread A Thread B - f2fs_file_write_iter - f2fs_dio_write_iter - __iomap_dio_rw - f2fs_iomap_begin - f2fs_map_blocks - __allocate_data_block - allocated blkaddr #x - iomap_dio_submit_bio - f2fs_file_read_iter - filemap_read - f2fs_read_data_folio - f2fs_mpage_readpages - f2fs_map_blocks : get blkaddr #x - f2fs_submit_read_bio IRQ - f2fs_read_end_io : read IO on blkaddr #x complete IRQ - iomap_dio_bio_end_io : direct write IO on blkaddr #x complete In LFS mode, if there is inflight dio, let's wait for its completion, this policy won't cover all race cases, however it is a tradeoff which avoids abusing lock around IO paths. Fixes: f847c699cff3 ("f2fs: allow out-place-update for direct IO in LFS mode") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 67e5c6060403..0893a67811dd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4613,6 +4613,10 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode))) + inode_dio_wait(inode); + if (f2fs_should_use_dio(inode, iocb, to)) { ret = f2fs_dio_read_iter(iocb, to); } else { -- cgit v1.2.3 From 5bcde45578624c43eb35bb739b03c50459971865 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Jun 2024 15:26:20 +0800 Subject: f2fs: get rid of buffer_head use Convert to use folio and related functionality. Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - fs/f2fs/f2fs.h | 11 ++++++- fs/f2fs/file.c | 1 - fs/f2fs/inode.c | 1 - fs/f2fs/super.c | 93 ++++++++++++++++++++++++++++++++++----------------------- 5 files changed, 66 insertions(+), 41 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index be66b3a0e793..a2c3d39ddfed 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -7,7 +7,6 @@ */ #include #include -#include #include #include #include diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f070a425a982..51fd5063a69c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -2006,6 +2005,16 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) return (struct f2fs_super_block *)(sbi->raw_super); } +static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, + pgoff_t index) +{ + pgoff_t idx_in_folio = index % (1 << folio_order(folio)); + + return (struct f2fs_super_block *) + (page_address(folio_page(folio, idx_in_folio)) + + F2FS_SUPER_OFFSET); +} + static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) { return (struct f2fs_checkpoint *)(sbi->ckpt); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0893a67811dd..a458eb080198 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aef57172014f..5d7e4c7f5969 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -7,7 +7,6 @@ */ #include #include -#include #include #include #include diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index aa23e4cfd89e..4d2f3bb8e418 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -3323,24 +3322,42 @@ loff_t max_file_blocks(struct inode *inode) return result; } -static int __f2fs_commit_super(struct buffer_head *bh, - struct f2fs_super_block *super) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, + pgoff_t index, bool update) { - lock_buffer(bh); - if (super) - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_dirty(bh); - unlock_buffer(bh); - + struct bio *bio; /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA; + int ret; + + folio_lock(folio); + folio_wait_writeback(folio); + if (update) + memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi), + sizeof(struct f2fs_super_block)); + folio_mark_dirty(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); + + bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS); + + /* it doesn't need to set crypto context for superblock update */ + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio)); + + if (!bio_add_folio(bio, folio, folio_size(folio), 0)) + f2fs_bug_on(sbi, 1); + + ret = submit_bio_wait(bio); + folio_end_writeback(folio); + + return ret; } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); struct super_block *sb = sbi->sb; u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); @@ -3415,7 +3432,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, set_sbi_flag(sbi, SBI_NEED_SB_WRITE); res = "internally"; } else { - err = __f2fs_commit_super(bh, NULL); + err = __f2fs_commit_super(sbi, folio, index, false); res = err ? "failed" : "done"; } f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)", @@ -3428,12 +3445,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, } static int sanity_check_raw_super(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main; block_t total_sections, blocks_per_seg; - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); size_t crc_offset = 0; __u32 crc = 0; @@ -3591,7 +3607,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ - if (sanity_check_area_boundary(sbi, bh)) + if (sanity_check_area_boundary(sbi, folio, index)) return -EFSCORRUPTED; return 0; @@ -3938,7 +3954,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, { struct super_block *sb = sbi->sb; int block; - struct buffer_head *bh; + struct folio *folio; struct f2fs_super_block *super; int err = 0; @@ -3947,32 +3963,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, return -ENOMEM; for (block = 0; block < 2; block++) { - bh = sb_bread(sb, block); - if (!bh) { + folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL); + if (IS_ERR(folio)) { f2fs_err(sbi, "Unable to read %dth superblock", block + 1); - err = -EIO; + err = PTR_ERR(folio); *recovery = 1; continue; } /* sanity checking of raw super */ - err = sanity_check_raw_super(sbi, bh); + err = sanity_check_raw_super(sbi, folio, block); if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - brelse(bh); + folio_put(folio); *recovery = 1; continue; } if (!*raw_super) { - memcpy(super, bh->b_data + F2FS_SUPER_OFFSET, + memcpy(super, F2FS_SUPER_BLOCK(folio, block), sizeof(*super)); *valid_super_block = block; *raw_super = super; } - brelse(bh); + folio_put(folio); } /* No valid superblock */ @@ -3986,7 +4002,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { - struct buffer_head *bh; + struct folio *folio; + pgoff_t index; __u32 crc = 0; int err; @@ -4004,22 +4021,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block ? 0 : 1; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; /* write current valid superblock */ - bh = sb_bread(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); return err; } -- cgit v1.2.3 From b722ff8ad66cf9beba971d9eb4bb7b5e6265ae5c Mon Sep 17 00:00:00 2001 From: Wenjie Cheng Date: Fri, 14 Jun 2024 00:48:41 +0000 Subject: Revert "f2fs: use flush command instead of FUA for zoned device" This reverts commit c550e25bca660ed2554cbb48d32b82d0bb98e4b1. Commit c550e25bca660ed2554cbb48d32b82d0bb98e4b1 ("f2fs: use flush command instead of FUA for zoned device") used additional flush command to keep write order. Since Commit dd291d77cc90eb6a86e9860ba8e6e38eebd57d12 ("block: Introduce zone write plugging") has enabled the block layer to handle this order issue, there is no need to use flush command. Signed-off-by: Wenjie Cheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +-- fs/f2fs/node.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a458eb080198..a7a3e6ac7f68 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -375,8 +375,7 @@ sync_nodes: f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) || - (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi))) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 58721a55f173..2fe182f73599 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1674,7 +1674,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; } - if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi)) + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; /* should add to global list before clearing PAGECACHE status */ -- cgit v1.2.3 From 6f092b55e1ad2d5c6a82d91dcf93534aeefcf1b9 Mon Sep 17 00:00:00 2001 From: liujinbao1 Date: Wed, 21 Feb 2024 15:32:49 +0800 Subject: f2fs: sysfs: support atgc_enabled When we add "atgc" to the fstab table, ATGC is not immediately enabled. There is a 7-day time threshold, and we can use "atgc_enabled" to show whether ATGC is enabled. Signed-off-by: liujinbao1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/sysfs.c | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3500920ab7ce..ca61ffcae126 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -579,6 +579,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young candidates whose age is not beyond the threshold, by default it was initialized as 604800 seconds (equals to 7 days). +What: /sys/fs/f2fs//atgc_enabled +Date: Feb 2024 +Contact: "Jinbao Liu" +Description: It represents whether ATGC is on or off. The value is 1 which + indicates that ATGC is on, and 0 indicates that it is off. + What: /sys/fs/f2fs//gc_reclaimed_segments Date: July 2021 Contact: "Daeho Jeong" diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 63ff2d1647eb..ed4bf434207a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -170,6 +170,12 @@ static ssize_t undiscard_blks_show(struct f2fs_attr *a, SM_I(sbi)->dcc_info->undiscard_blks); } +static ssize_t atgc_enabled_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", sbi->am.atgc_enabled ? 1 : 0); +} + static ssize_t gc_mode_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -1082,6 +1088,7 @@ F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(atgc_enabled); F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_GENERAL_RO_ATTR(moved_blocks_background); @@ -1211,6 +1218,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(atgc_candidate_count), ATTR_LIST(atgc_age_weight), ATTR_LIST(atgc_age_threshold), + ATTR_LIST(atgc_enabled), ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), -- cgit v1.2.3 From f97a11c86cc38da14fd1d44fde767ae2ee1b6ebe Mon Sep 17 00:00:00 2001 From: Zijie Wang Date: Mon, 1 Apr 2024 21:24:08 +0800 Subject: f2fs: use f2fs_get_node_page when write inline data We just need inode page when write inline data, use f2fs_get_node_page() to get it instead of using dnode_of_data, which can eliminate unnecessary struct use. Signed-off-by: Zijie Wang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cca7d448e55c..d76a3bed7093 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -262,33 +262,32 @@ out: int f2fs_write_inline_data(struct inode *inode, struct page *page) { - struct dnode_of_data dn; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *ipage; - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); - if (err) - return err; + ipage = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); if (!f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); + f2fs_put_page(ipage, 1); return -EAGAIN; } f2fs_bug_on(F2FS_I_SB(inode), page->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); - memcpy_from_page(inline_data_addr(inode, dn.inode_page), + f2fs_wait_on_page_writeback(ipage, NODE, true, true); + memcpy_from_page(inline_data_addr(inode, ipage), page, 0, MAX_INLINE_DATA(inode)); - set_page_dirty(dn.inode_page); + set_page_dirty(ipage); f2fs_clear_page_cache_dirty_tag(page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(dn.inode_page); - f2fs_put_dnode(&dn); + clear_page_private_inline(ipage); + f2fs_put_page(ipage, 1); return 0; } -- cgit v1.2.3 From d33ebd57b987bb8dc499f4468170c8f85f64fe72 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 26 Jul 2024 14:27:26 +0800 Subject: f2fs: fix to use per-inode maxbytes and cleanup This is a supplement to commit 6d1451bf7f84 ("f2fs: fix to use per-inode maxbytes") for some missed cases, also cleanup redundant code in f2fs_llseek. Cc: Chengguang Xu Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/file.c | 7 ++----- fs/f2fs/verity.c | 5 +++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a2c3d39ddfed..e9570f4e0f21 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1938,7 +1938,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock_shared(inode); - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); if (start > maxbytes) { ret = -EFBIG; goto out; @@ -2063,7 +2063,7 @@ out: static inline loff_t f2fs_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) - return inode->i_sb->s_maxbytes; + return F2FS_BLK_TO_BYTES(max_file_blocks(inode)); return i_size_read(inode); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a7a3e6ac7f68..a2d466ca2579 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -432,7 +432,7 @@ static bool __found_offset(struct address_space *mapping, static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); struct dnode_of_data dn; pgoff_t pgofs, end_offset; loff_t data_ofs = offset; @@ -514,10 +514,7 @@ fail: static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; - - if (f2fs_compressed_file(inode)) - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); switch (whence) { case SEEK_SET: diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index f7bb0c54502c..83310b5e0422 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -74,7 +74,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; - if (pos + count > inode->i_sb->s_maxbytes) + if (pos + count > F2FS_BLK_TO_BYTES(max_file_blocks(inode))) return -EFBIG; while (count) { @@ -237,7 +237,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, pos = le64_to_cpu(dloc.pos); /* Get the descriptor */ - if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + if (pos + size < pos || + pos + size > F2FS_BLK_TO_BYTES(max_file_blocks(inode)) || pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); f2fs_handle_error(F2FS_I_SB(inode), -- cgit v1.2.3 From 8fb9f31984bdc0aaa1b0f7c7e7a27bd3137f8744 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 1 Aug 2024 09:33:51 +0800 Subject: f2fs: clean up val{>>,<<}F2FS_BLKSIZE_BITS Use F2FS_BYTES_TO_BLK(bytes) and F2FS_BLK_TO_BYTES(blk) for cleanup Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/debug.c | 2 +- fs/f2fs/file.c | 6 +++--- fs/f2fs/node.c | 4 ++-- fs/f2fs/super.c | 2 +- include/linux/f2fs_fs.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bdd96329dddd..f3d22b8ef2ff 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1551,7 +1551,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + - (i << F2FS_BLKSIZE_BITS), blk + i); + F2FS_BLK_TO_BYTES(i), blk + i); } /* write out checkpoint buffer at block 0 */ diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8b0e1e71b667..546b8ba91261 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -275,7 +275,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); + si->base_mem += F2FS_BLK_TO_BYTES(NM_I(sbi)->nat_bits_blocks); si->base_mem += NM_I(sbi)->nat_blocks * f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a2d466ca2579..bddcb2cd945a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2979,9 +2979,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, - pos_out >> F2FS_BLKSIZE_BITS, - len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), + F2FS_BYTES_TO_BLK(pos_out), + F2FS_BYTES_TO_BLK(len), false); if (!ret) { if (dst_max_i_size) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2fe182f73599..9e7a6e21f30c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3166,7 +3166,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kvzalloc(sbi, - nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); + F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3185,7 +3185,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (IS_ERR(page)) return PTR_ERR(page); - memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), + memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), page_address(page), F2FS_BLKSIZE); f2fs_put_page(page, 1); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4d2f3bb8e418..977f038f3557 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3317,7 +3317,7 @@ loff_t max_file_blocks(struct inode *inode) * fit within U32_MAX + 1 data units. */ - result = min(result, (((loff_t)U32_MAX + 1) * 4096) >> F2FS_BLKSIZE_BITS); + result = min(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); return result; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 01bee2b289c2..f17f89a259e2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,7 +19,6 @@ #define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ -#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ @@ -28,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) #define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) +#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1)) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -- cgit v1.2.3 From 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Aug 2024 16:07:16 +0200 Subject: f2fs: Require FMODE_WRITE for atomic write ioctls The F2FS ioctls for starting and committing atomic writes check for inode_owner_or_capable(), but this does not give LSMs like SELinux or Landlock an opportunity to deny the write access - if the caller's FSUID matches the inode's UID, inode_owner_or_capable() immediately returns true. There are scenarios where LSMs want to deny a process the ability to write particular files, even files that the FSUID of the process owns; but this can currently partially be bypassed using atomic write ioctls in two ways: - F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can truncate an inode to size 0 - F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert changes another process concurrently made to a file Fix it by requiring FMODE_WRITE for these operations, just like for F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these ioctls when intending to write into the file, that seems unlikely to break anything. Fixes: 88b88a667971 ("f2fs: support atomic writes") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Reviewed-by: Chao Yu Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bddcb2cd945a..2c591fbc75a9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2131,6 +2131,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2239,6 +2242,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2271,6 +2277,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; -- cgit v1.2.3 From 8c1b787938fd86bab27a1492fa887408c75fec2b Mon Sep 17 00:00:00 2001 From: Yeongjin Gil Date: Tue, 13 Aug 2024 16:32:44 +0900 Subject: f2fs: Create COW inode from parent dentry for atomic write The i_pino in f2fs_inode_info has the previous parent's i_ino when inode was renamed, which may cause f2fs_ioc_start_atomic_write to fail. If file_wrong_pino is true and i_nlink is 1, then to find a valid pino, we should refer to the dentry from inode. To resolve this issue, let's get parent inode using parent dentry directly. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Reviewed-by: Sungjong Seo Reviewed-by: Sunmin Jeong Signed-off-by: Yeongjin Gil Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2c591fbc75a9..082e81ba09b9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2127,7 +2127,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; @@ -2180,15 +2179,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); if (ret) { f2fs_up_write(&fi->i_gc_rwsem[WRITE]); goto out; -- cgit v1.2.3 From a4d7f2b3238fd5f76b9e6434a0bd5d2e29049cff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Aug 2024 22:12:42 +0800 Subject: f2fs: fix to wait page writeback before setting gcing flag Soft IRQ Thread - f2fs_write_end_io - f2fs_defragment_range - set_page_private_gcing - type = WB_DATA_TYPE(page, false); : assign type w/ F2FS_WB_CP_DATA due to page_private_gcing() is true - dec_page_count() w/ wrong type - end_page_writeback() Value of F2FS_WB_CP_DATA reference count may become negative under above race condition, the root cause is we missed to wait page writeback before setting gcing page private flag, let's fix it. Fixes: 2d1fe8a86bf5 ("f2fs: fix to tag gcing flag on page during file defragment") Fixes: 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 082e81ba09b9..73f70a7a2567 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2807,6 +2807,8 @@ do_map: goto clear_out; } + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -4207,6 +4209,8 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) /* It will never fail, when page has pinned above */ f2fs_bug_on(F2FS_I_SB(inode), !page); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); -- cgit v1.2.3 From ebd3309aec6271c4616573b0cb83ea25e623070a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 7 Aug 2024 18:24:35 +0800 Subject: f2fs: atomic: fix to truncate pagecache before on-disk metadata truncation We should always truncate pagecache while truncating on-disk data. Fixes: a46bebd502fe ("f2fs: synchronize atomic write aborts") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 73f70a7a2567..5fb8e4242c7a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2195,6 +2195,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); if (ret) { f2fs_up_write(&fi->i_gc_rwsem[WRITE]); -- cgit v1.2.3 From c7f114d864ac91515bb07ac271e9824a20f5ed95 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 30 Jul 2024 09:08:55 +0800 Subject: f2fs: fix to avoid use-after-free in f2fs_stop_gc_thread() syzbot reports a f2fs bug as below: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_report+0xe8/0x550 mm/kasan/report.c:491 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_fetch_add_relaxed include/linux/atomic/atomic-instrumented.h:252 [inline] __refcount_add include/linux/refcount.h:184 [inline] __refcount_inc include/linux/refcount.h:241 [inline] refcount_inc include/linux/refcount.h:258 [inline] get_task_struct include/linux/sched/task.h:118 [inline] kthread_stop+0xca/0x630 kernel/kthread.c:704 f2fs_stop_gc_thread+0x65/0xb0 fs/f2fs/gc.c:210 f2fs_do_shutdown+0x192/0x540 fs/f2fs/file.c:2283 f2fs_ioc_shutdown fs/f2fs/file.c:2325 [inline] __f2fs_ioctl+0x443a/0xbe60 fs/f2fs/file.c:4325 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is below race condition, it may cause use-after-free issue in sbi->gc_th pointer. - remount - f2fs_remount - f2fs_stop_gc_thread - kfree(gc_th) - f2fs_ioc_shutdown - f2fs_do_shutdown - f2fs_stop_gc_thread - kthread_stop(gc_th->f2fs_gc_task) : sbi->gc_thread = NULL; We will call f2fs_do_shutdown() in two paths: - for f2fs_ioc_shutdown() path, we should grab sb->s_umount semaphore for fixing. - for f2fs_shutdown() path, it's safe since caller has already grabbed sb->s_umount semaphore. Reported-by: syzbot+1a8e2b31f2ac9bd3d148@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/0000000000005c7ccb061e032b9b@google.com Fixes: 7950e9ac638e ("f2fs: stop gc/discard thread after fs shutdown") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 11 +++++++++-- fs/f2fs/super.c | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51fd5063a69c..0e0189084462 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3508,7 +3508,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly); + bool readonly, bool need_lock); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5fb8e4242c7a..50075f8d5e9b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2297,7 +2297,7 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) } int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly) + bool readonly, bool need_lock) { struct super_block *sb = sbi->sb; int ret = 0; @@ -2344,12 +2344,19 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, if (readonly) goto out; + /* grab sb->s_umount to avoid racing w/ remount() */ + if (need_lock) + down_read(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_read(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: @@ -2386,7 +2393,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } } - ret = f2fs_do_shutdown(sbi, in, readonly); + ret = f2fs_do_shutdown(sbi, in, readonly, true); if (need_drop) mnt_drop_write_file(filp); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 977f038f3557..1c9316788a16 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2560,7 +2560,7 @@ restore_opts: static void f2fs_shutdown(struct super_block *sb) { - f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false); + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false); } #ifdef CONFIG_QUOTA -- cgit v1.2.3 From 43aec4d01bd2ce961817a777b3846f8318f398e4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 16 Aug 2024 13:07:03 +0900 Subject: f2fs: check discard support for conventional zones As the helper function f2fs_bdev_support_discard() shows, f2fs checks if the target block devices support discard by calling bdev_max_discard_sectors() and bdev_is_zoned(). This check works well for most cases, but it does not work for conventional zones on zoned block devices. F2fs assumes that zoned block devices support discard, and calls __submit_discard_cmd(). When __submit_discard_cmd() is called for sequential write required zones, it works fine since __submit_discard_cmd() issues zone reset commands instead of discard commands. However, when __submit_discard_cmd() is called for conventional zones, __blkdev_issue_discard() is called even when the devices do not support discard. The inappropriate __blkdev_issue_discard() call was not a problem before the commit 30f1e7241422 ("block: move discard checks into the ioctl handler") because __blkdev_issue_discard() checked if the target devices support discard or not. If not, it returned EOPNOTSUPP. After the commit, __blkdev_issue_discard() no longer checks it. It always returns zero and sets NULL to the given bio pointer. This NULL pointer triggers f2fs_bug_on() in __submit_discard_cmd(). The BUG is recreated with the commands below at the umount step, where /dev/nullb0 is a zoned null_blk with 5GB total size, 128MB zone size and 10 conventional zones. $ mkfs.f2fs -f -m /dev/nullb0 $ mount /dev/nullb0 /mnt $ for ((i=0;i<5;i++)); do dd if=/dev/zero of=/mnt/test bs=65536 count=1600 conv=fsync; done $ umount /mnt To fix the BUG, avoid the inappropriate __blkdev_issue_discard() call. When discard is requested for conventional zones, check if the device supports discard or not. If not, return EOPNOTSUPP. Fixes: 30f1e7241422 ("block: move discard checks into the ioctl handler") Cc: stable@vger.kernel.org Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Damien Le Moal Reviewed-by: Chao Yu Reviewed-by: Christoph Hellwig Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3aee71c9f3c6..ed070b9ef91b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1282,6 +1282,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, wait_list, issued); return 0; } + + /* + * Issue discard for conventional zones only if the device + * supports discard. + */ + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; } #endif -- cgit v1.2.3 From f785cec298c95d00058560c0715233294a04b8f3 Mon Sep 17 00:00:00 2001 From: Yeongjin Gil Date: Mon, 19 Aug 2024 17:34:30 +0900 Subject: f2fs: compress: don't redirty sparse cluster during {,de}compress In f2fs_do_write_data_page, when the data block is NULL_ADDR, it skips writepage considering that it has been already truncated. This results in an infinite loop as the PAGECACHE_TAG_TOWRITE tag is not cleared during the writeback process for a compressed file including NULL_ADDR in compress_mode=user. This is the reproduction process: 1. dd if=/dev/zero bs=4096 count=1024 seek=1024 of=testfile 2. f2fs_io compress testfile 3. dd if=/dev/zero bs=4096 count=1 conv=notrunc of=testfile 4. f2fs_io decompress testfile To prevent the problem, let's check whether the cluster is fully allocated before redirty its pages. Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE") Reviewed-by: Sungjong Seo Reviewed-by: Sunmin Jeong Tested-by: Jaewook Kim Signed-off-by: Yeongjin Gil Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 36 ++++++++++++++++++++++++++++-------- fs/f2fs/f2fs.h | 12 ++++++++++++ fs/f2fs/file.c | 39 +++++++++++++++++++++------------------ 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 990b93689b46..f55d54bb12f4 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -945,7 +945,7 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; int count, i; - for (i = 1, count = 1; i < cluster_size; i++) { + for (i = 0, count = 0; i < cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + i); @@ -956,8 +956,8 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, return count; } -static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr_blks) +static int __f2fs_cluster_blocks(struct inode *inode, unsigned int cluster_idx, + enum cluster_check_type type) { struct dnode_of_data dn; unsigned int start_idx = cluster_idx << @@ -978,10 +978,12 @@ static int __f2fs_cluster_blocks(struct inode *inode, } if (dn.data_blkaddr == COMPRESS_ADDR) { - if (compr_blks) - ret = __f2fs_get_cluster_blocks(inode, &dn); - else + if (type == CLUSTER_COMPR_BLKS) + ret = 1 + __f2fs_get_cluster_blocks(inode, &dn); + else if (type == CLUSTER_IS_COMPR) ret = 1; + } else if (type == CLUSTER_RAW_BLKS) { + ret = __f2fs_get_cluster_blocks(inode, &dn); } fail: f2fs_put_dnode(&dn); @@ -991,7 +993,16 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, + CLUSTER_COMPR_BLKS); +} + +/* return # of raw blocks in non-compressed cluster */ +static int f2fs_decompressed_blocks(struct inode *inode, + unsigned int cluster_idx) +{ + return __f2fs_cluster_blocks(inode, cluster_idx, + CLUSTER_RAW_BLKS); } /* return whether cluster is compressed one or not */ @@ -999,7 +1010,16 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, index >> F2FS_I(inode)->i_log_cluster_size, - false); + CLUSTER_IS_COMPR); +} + +/* return whether cluster contains non raw blocks or not */ +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index) +{ + unsigned int cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size; + + return f2fs_decompressed_blocks(inode, cluster_idx) != + F2FS_I(inode)->i_cluster_size; } static bool cluster_may_compress(struct compress_ctx *cc) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0e0189084462..ec82bde0c51d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4302,6 +4302,11 @@ static inline bool f2fs_meta_inode_gc_required(struct inode *inode) * compress.c */ #ifdef CONFIG_F2FS_FS_COMPRESSION +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; bool f2fs_is_compressed_page(struct page *page); struct page *f2fs_compress_control_page(struct page *page); int f2fs_prepare_compress_overwrite(struct inode *inode, @@ -4328,6 +4333,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len); @@ -4414,6 +4420,12 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } static inline void f2fs_update_read_extent_tree_range_compressed( struct inode *inode, pgoff_t fofs, block_t blkaddr, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 50075f8d5e9b..0bfcc1bf5832 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4236,9 +4236,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4273,10 +4272,15 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; + + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4286,9 +4290,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4315,9 +4316,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4351,10 +4352,15 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4364,9 +4370,6 @@ static int f2fs_ioc_compress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; -- cgit v1.2.3 From b2c160f4f3cfe9334b3e6bb52cb89a3a20ef0385 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 16 Aug 2024 09:13:42 +0800 Subject: f2fs: atomic: fix to forbid dio in atomic_file atomic write can only be used via buffered IO, let's fail direct IO on atomic_file and return -EOPNOTSUPP. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0bfcc1bf5832..81d0b86c5718 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2162,6 +2162,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[READ]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2171,10 +2172,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { @@ -2183,10 +2182,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct inode *dir = d_inode(dentry->d_parent); ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); @@ -2200,10 +2197,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; } f2fs_write_inode(inode, NULL); @@ -2222,7 +2217,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) } f2fs_i_size_write(fi->cow_inode, isize); +out_unlock: + f2fs_up_write(&fi->i_gc_rwsem[READ]); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (ret) + goto out; f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; @@ -4570,6 +4569,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_down_read(&fi->i_gc_rwsem[READ]); } + /* dio is not compatible w/ atomic file */ + if (f2fs_is_atomic_file(inode)) { + f2fs_up_read(&fi->i_gc_rwsem[READ]); + ret = -EOPNOTSUPP; + goto out; + } + /* * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of * the higher-level function iomap_dio_rw() in order to ensure that the @@ -4985,6 +4991,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Determine whether we will do a direct write or a buffered write. */ dio = f2fs_should_use_dio(inode, iocb, from); + /* dio is not compatible w/ atomic write */ + if (dio && f2fs_is_atomic_file(inode)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); preallocated = f2fs_preallocate_blocks(iocb, from, dio); -- cgit v1.2.3 From f7a678bbe5a8f22cfcef5369757cc9b95f73e027 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 15 Aug 2024 08:47:28 +0800 Subject: f2fs: Use sysfs_emit_at() to simplify code This file already uses sysfs_emit(). So be consistent and also use sysfs_emit_at(). This slightly simplifies the code and makes it more readable. Reviewed-by: Chao Yu Signed-off-by: Christophe JAILLET Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ed4bf434207a..654a541dcc45 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -188,50 +188,50 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (f2fs_sb_has_encrypt(sbi)) - len += scnprintf(buf, PAGE_SIZE - len, "%s", + len += sysfs_emit_at(buf, len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_readonly(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "readonly"); if (f2fs_sb_has_compression(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "compression"); - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "pin_file"); - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } @@ -329,17 +329,14 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += scnprintf(buf + len, PAGE_SIZE - len, - "cold file extension:\n"); + len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); - len += scnprintf(buf + len, PAGE_SIZE - len, - "hot file extension:\n"); + len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + return len; } -- cgit v1.2.3 From 10de741a5d7ab110eb026ad214eea2c5e5fe7e9c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:23 +0800 Subject: f2fs: convert f2fs_compress_ctx_add_page() to use folio onvert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 14 +++++++------- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index f55d54bb12f4..a96c7b16fe19 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -160,17 +160,17 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->cluster_idx = NULL_CLUSTER; } -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio) { unsigned int cluster_ofs; - if (!f2fs_cluster_can_merge_page(cc, page->index)) + if (!f2fs_cluster_can_merge_page(cc, folio->index)) f2fs_bug_on(F2FS_I_SB(cc->inode), 1); - cluster_ofs = offset_in_cluster(cc, page->index); - cc->rpages[cluster_ofs] = page; + cluster_ofs = offset_in_cluster(cc, folio->index); + cc->rpages[cluster_ofs] = folio_page(folio, 0); cc->nr_rpages++; - cc->cluster_idx = cluster_idx(cc, page->index); + cc->cluster_idx = cluster_idx(cc, folio->index); } #ifdef CONFIG_F2FS_FS_LZO @@ -1113,7 +1113,7 @@ retry: if (PageUptodate(page)) f2fs_put_page(page, 1); else - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); } if (!f2fs_cluster_is_empty(cc)) { @@ -1143,7 +1143,7 @@ retry: } f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); if (!PageUptodate(page)) { release_and_retry: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e9570f4e0f21..100b6526717f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2429,7 +2429,7 @@ static int f2fs_mpage_readpages(struct inode *inode, if (ret) goto set_error_page; - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); goto next_page; read_single_page: @@ -3161,7 +3161,7 @@ continue_unlock: #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { folio_get(folio); - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); continue; } #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ec82bde0c51d..569d9d2faae1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4327,7 +4327,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio); int f2fs_write_multi_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, -- cgit v1.2.3 From aec5755951b74e3bbb5ddee39ac142a788547854 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:24 +0800 Subject: f2fs: convert f2fs_vm_page_mkwrite() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 81d0b86c5718..a8d153eb0a95 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -53,7 +53,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -85,7 +85,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - int ret = f2fs_is_compressed_cluster(inode, page->index); + int ret = f2fs_is_compressed_cluster(inode, folio->index); if (ret < 0) { err = ret; @@ -105,11 +105,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page))) { - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || + !folio_test_uptodate(folio))) { + folio_unlock(folio); err = -EFAULT; goto out_sem; } @@ -117,9 +117,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - err = f2fs_get_block_locked(&dn, page->index); + err = f2fs_get_block_locked(&dn, folio->index); } else { - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); f2fs_put_dnode(&dn); if (f2fs_is_pinned_file(inode) && !__is_valid_data_blkaddr(dn.data_blkaddr)) @@ -127,11 +127,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } if (err) { - unlock_page(page); + folio_unlock(folio); goto out_sem; } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -139,18 +139,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto out_sem; /* page is wholly or partially inside EOF */ - if (((loff_t)(page->index + 1) << PAGE_SHIFT) > + if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { loff_t offset; offset = i_size_read(inode) & ~PAGE_MASK; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); } - set_page_dirty(page); + folio_mark_dirty(folio); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); @@ -162,7 +162,7 @@ out_sem: out: ret = vmf_fs_error(err); - trace_f2fs_vm_page_mkwrite(inode, page->index, vmf->vma->vm_flags, ret); + trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); return ret; } -- cgit v1.2.3 From fd3a11aff4bef4df7dff25c71e4c51fe8d93e2e7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:25 +0800 Subject: f2fs: convert f2fs_clear_page_cache_dirty_tag() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +-- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 2 +- fs/f2fs/node.c | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 100b6526717f..0df5a0c66ede 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4117,9 +4117,8 @@ const struct address_space_operations f2fs_dblock_aops = { .swap_deactivate = f2fs_swap_deactivate, }; -void f2fs_clear_page_cache_dirty_tag(struct page *page) +void f2fs_clear_page_cache_dirty_tag(struct folio *folio) { - struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; unsigned long flags; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 14900ca8a9ff..5fcc952107e9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -868,7 +868,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, page->index, page->index + 1)) { - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); ClearPageUptodate(page); clear_page_private_all(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 569d9d2faae1..fafc648a6642 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3890,7 +3890,7 @@ void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); -void f2fs_clear_page_cache_dirty_tag(struct page *page); +void f2fs_clear_page_cache_dirty_tag(struct folio *folio); int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d76a3bed7093..98d122164cd1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -281,7 +281,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) page, 0, MAX_INLINE_DATA(inode)); set_page_dirty(ipage); - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(page_folio(page)); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9e7a6e21f30c..20518c12f5b7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -123,7 +123,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { if (PageDirty(page)) { - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); } -- cgit v1.2.3 From b084662139ae964073bfdbcf633f3ca9fa1a6d27 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:26 +0800 Subject: f2fs: convert f2fs_write_inline_data() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 10 +++++----- fs/f2fs/node.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0df5a0c66ede..6d2a37db0c9e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2866,7 +2866,7 @@ write: err = -EAGAIN; if (f2fs_has_inline_data(inode)) { - err = f2fs_write_inline_data(inode, page); + err = f2fs_write_inline_data(inode, folio); if (!err) goto out; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fafc648a6642..12f6589c83b9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4185,7 +4185,7 @@ int f2fs_read_inline_data(struct inode *inode, struct folio *folio); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); -int f2fs_write_inline_data(struct inode *inode, struct page *page); +int f2fs_write_inline_data(struct inode *inode, struct folio *folio); int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 98d122164cd1..005babf1bed1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -260,7 +260,7 @@ out: return err; } -int f2fs_write_inline_data(struct inode *inode, struct page *page) +int f2fs_write_inline_data(struct inode *inode, struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *ipage; @@ -274,14 +274,14 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return -EAGAIN; } - f2fs_bug_on(F2FS_I_SB(inode), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); f2fs_wait_on_page_writeback(ipage, NODE, true, true); - memcpy_from_page(inline_data_addr(inode, ipage), - page, 0, MAX_INLINE_DATA(inode)); + memcpy_from_folio(inline_data_addr(inode, ipage), + folio, 0, MAX_INLINE_DATA(inode)); set_page_dirty(ipage); - f2fs_clear_page_cache_dirty_tag(page_folio(page)); + f2fs_clear_page_cache_dirty_tag(folio); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 20518c12f5b7..2b764c86aacc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1535,7 +1535,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!clear_page_dirty_for_io(page)) goto page_out; - ret = f2fs_write_inline_data(inode, page); + ret = f2fs_write_inline_data(inode, page_folio(page)); inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); if (ret) -- cgit v1.2.3 From 46a75ef866030b3500bf15fd3400c6779eb33752 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:27 +0800 Subject: f2fs: convert f2fs_write_single_data_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 ++- fs/f2fs/data.c | 29 +++++++++++++++-------------- fs/f2fs/f2fs.h | 2 +- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index a96c7b16fe19..630bb78655d5 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1543,7 +1543,8 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, + ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]), + &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6d2a37db0c9e..0779e222f709 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2764,7 +2764,7 @@ out: return err; } -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, @@ -2772,12 +2772,13 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, int compr_blocks, bool allow_balance) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; + struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; bool quota_inode = IS_NOQUOTA(inode); @@ -2801,11 +2802,11 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .last_block = last_block, }; - trace_f2fs_writepage(page_folio(page), DATA); + trace_f2fs_writepage(folio, DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); + mapping_set_error(folio->mapping, -EIO); /* * don't drop any dirty dentry pages for keeping lastest * directory structure. @@ -2823,7 +2824,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || + if (folio->index < end_index || f2fs_verity_in_progress(inode) || compr_blocks) goto write; @@ -2833,10 +2834,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, * this page does not have to be written to disk. */ offset = i_size & (PAGE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) + if ((folio->index >= end_index + 1) || !offset) goto out; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); write: /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { @@ -2896,7 +2897,7 @@ done: out: inode_dec_dirty_pages(inode); if (err) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); clear_page_private_gcing(page); } @@ -2906,7 +2907,7 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); @@ -2924,7 +2925,7 @@ out: return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); /* * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). @@ -2933,7 +2934,7 @@ redirty_out: */ if (!err || wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; - unlock_page(page); + folio_unlock(folio); return err; } @@ -2955,7 +2956,7 @@ static int f2fs_write_data_page(struct page *page, out: #endif - return f2fs_write_single_data_page(page, NULL, NULL, NULL, + return f2fs_write_single_data_page(page_folio(page), NULL, NULL, NULL, wbc, FS_DATA_IO, 0, true); } @@ -3165,7 +3166,7 @@ continue_unlock: continue; } #endif - ret = f2fs_write_single_data_page(&folio->page, + ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); if (ret == AOP_WRITEPAGE_ACTIVATE) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 12f6589c83b9..994661d97635 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3881,7 +3881,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, -- cgit v1.2.3 From e55561d278f2b03c0131e5408df4ab9fed2ae51d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:28 +0800 Subject: f2fs: convert f2fs_do_write_meta_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f3d22b8ef2ff..d73d4d05313e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -362,7 +362,7 @@ static int __f2fs_write_meta_page(struct page *page, if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - f2fs_do_write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, page_folio(page), io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 994661d97635..2c1664f951e9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3732,7 +3732,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type); void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); void f2fs_outplace_write_data(struct dnode_of_data *dn, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ed070b9ef91b..e3cc2706f544 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3811,7 +3811,7 @@ out: f2fs_up_read(&fio->sbi->io_order_lock); } -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -3820,20 +3820,20 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, - .old_blkaddr = page->index, - .new_blkaddr = page->index, - .page = page, + .old_blkaddr = folio->index, + .new_blkaddr = folio->index, + .page = folio_page(folio, 0), .encrypted_page = NULL, .in_list = 0, }; - if (unlikely(page->index >= MAIN_BLKADDR(sbi))) + if (unlikely(folio->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_submit_page_write(&fio); - stat_inc_meta_count(sbi, page->index); + stat_inc_meta_count(sbi, folio->index); f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } -- cgit v1.2.3 From 138a762e7f45474c33dfb269f3e6641741be1508 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:29 +0800 Subject: f2fs: convert __f2fs_write_meta_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d73d4d05313e..ffaf295a198a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -345,30 +345,31 @@ static int __f2fs_write_meta_page(struct page *page, enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); - trace_f2fs_writepage(page_folio(page), META); + trace_f2fs_writepage(folio, META); if (unlikely(f2fs_cp_error(sbi))) { if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); + folio_unlock(folio); return 0; } goto redirty_out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) + if (wbc->for_reclaim && folio->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - f2fs_do_write_meta_page(sbi, page_folio(page), io_type); + f2fs_do_write_meta_page(sbi, folio, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_write(sbi, META); -- cgit v1.2.3 From 3981e94285886cd712b563a82a2e99f902510c78 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Aug 2024 21:50:59 +0800 Subject: f2fs: convert f2fs_read_multi_pages() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0779e222f709..4f4e76c33611 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2207,19 +2207,22 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; + struct folio *folio; if (!page) continue; - if ((sector_t)page->index >= last_block_in_file) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - } else if (!PageUptodate(page)) { + + folio = page_folio(page); + if ((sector_t)folio->index >= last_block_in_file) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + } else if (!folio_test_uptodate(folio)) { continue; } - unlock_page(page); + folio_unlock(folio); if (for_write) - put_page(page); + folio_put(folio); cc->rpages[i] = NULL; cc->nr_rpages--; } @@ -2279,7 +2282,7 @@ skip_reading_dnode: } for (i = 0; i < cc->nr_cpages; i++) { - struct page *page = dic->cpages[i]; + struct folio *folio = page_folio(dic->cpages[i]); block_t blkaddr; struct bio_post_read_ctx *ctx; @@ -2289,7 +2292,8 @@ skip_reading_dnode: f2fs_wait_on_block_writeback(inode, blkaddr); - if (f2fs_load_compressed_page(sbi, page, blkaddr)) { + if (f2fs_load_compressed_page(sbi, folio_page(folio, 0), + blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); break; @@ -2299,7 +2303,7 @@ skip_reading_dnode: if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(sbi, bio, DATA); bio = NULL; @@ -2308,7 +2312,7 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, f2fs_ra_op_flags(rac), - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); f2fs_decompress_end_io(dic, ret, true); @@ -2318,7 +2322,7 @@ submit_and_realloc: } } - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; ctx = get_post_read_ctx(bio); -- cgit v1.2.3 From 763a0dc792f9d7c79c1d85415c6cebc1f79bba16 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Aug 2024 22:13:31 +0800 Subject: f2fs: convert f2fs_handle_page_eio() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/node.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ffaf295a198a..7f76460b721f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -99,7 +99,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { - f2fs_handle_page_eio(sbi, page->index, META); + f2fs_handle_page_eio(sbi, page_folio(page), META); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c1664f951e9..460c05191c28 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4678,9 +4678,11 @@ static inline void f2fs_io_schedule_timeout(long timeout) io_schedule_timeout(timeout); } -static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, - enum page_type type) +static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, + struct folio *folio, enum page_type type) { + pgoff_t ofs = folio->index; + if (unlikely(f2fs_cp_error(sbi))) return; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2b764c86aacc..7beaa3690e03 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1492,7 +1492,7 @@ out_err: out_put_err: /* ENOENT comes from read_node_page which is not an error. */ if (err != -ENOENT) - f2fs_handle_page_eio(sbi, page->index, NODE); + f2fs_handle_page_eio(sbi, page_folio(page), NODE); f2fs_put_page(page, 1); return ERR_PTR(err); } -- cgit v1.2.3 From 1e5df24c1721d077d9a397982c8b68e486243daf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:54:59 +0800 Subject: f2fs: convert f2fs_submit_page_read() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4f4e76c33611..0655fddfc4ba 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1086,7 +1086,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, } /* This can handle encryption stuffs */ -static int f2fs_submit_page_read(struct inode *inode, struct page *page, +static int f2fs_submit_page_read(struct inode *inode, struct folio *folio, block_t blkaddr, blk_opf_t op_flags, bool for_write) { @@ -1094,14 +1094,14 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) { iostat_update_and_unbind_ctx(bio); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); @@ -1269,7 +1269,7 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr, op_flags, for_write); if (err) goto put_err; @@ -3668,8 +3668,8 @@ repeat: goto fail; } err = f2fs_submit_page_read(use_cow ? - F2FS_I(inode)->cow_inode : inode, page, - blkaddr, 0, true); + F2FS_I(inode)->cow_inode : inode, + page_folio(page), blkaddr, 0, true); if (err) goto fail; -- cgit v1.2.3 From 12e61ce27137dc42c1f2fbd704d0285f8582df62 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:00 +0800 Subject: f2fs: convert f2fs_write_begin() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0655fddfc4ba..85ac05c3655a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3378,11 +3378,11 @@ void f2fs_write_failed(struct inode *inode, loff_t to) } static int prepare_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed) { - struct inode *inode = page->mapping->host; - pgoff_t index = page->index; + struct inode *inode = folio->mapping->host; + pgoff_t index = folio->index; struct dnode_of_data dn; struct page *ipage; bool locked = false; @@ -3419,13 +3419,13 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(page_folio(page), ipage); + f2fs_do_read_inline_data(folio, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_page_private_inline(ipage); goto out; } - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_page(&dn, folio_page(folio, 0)); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } @@ -3518,12 +3518,12 @@ unlock_out: } static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned int len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed, bool *use_cow) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; - pgoff_t index = page->index; + pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; @@ -3566,6 +3566,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page = NULL; + struct folio *folio; pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; bool need_balance = false; bool use_cow = false; @@ -3625,22 +3626,23 @@ repeat: /* TODO: cluster can be compressed due to race with .writepage */ *pagep = page; + folio = page_folio(page); if (f2fs_is_atomic_file(inode)) - err = prepare_atomic_write_begin(sbi, page, pos, len, + err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); else - err = prepare_write_begin(sbi, page, pos, len, + err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) goto fail; if (need_balance && !IS_NOQUOTA(inode) && has_not_enough_free_secs(sbi, 0, 0)) { - unlock_page(page); + folio_unlock(folio); f2fs_balance_fs(sbi, true); - lock_page(page); - if (page->mapping != mapping) { + folio_lock(folio); + if (folio->mapping != mapping) { /* The page got truncated from under us */ f2fs_put_page(page, 1); goto repeat; @@ -3649,18 +3651,18 @@ repeat: f2fs_wait_on_page_writeback(page, DATA, false, true); - if (len == PAGE_SIZE || PageUptodate(page)) + if (len == PAGE_SIZE || folio_test_uptodate(folio)) return 0; if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && !f2fs_verity_in_progress(inode)) { - zero_user_segment(page, len, PAGE_SIZE); + folio_zero_segment(folio, len, folio_size(folio)); return 0; } if (blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { @@ -3669,16 +3671,16 @@ repeat: } err = f2fs_submit_page_read(use_cow ? F2FS_I(inode)->cow_inode : inode, - page_folio(page), blkaddr, 0, true); + folio, blkaddr, 0, true); if (err) goto fail; - lock_page(page); - if (unlikely(page->mapping != mapping)) { + folio_lock(folio); + if (unlikely(folio->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } - if (unlikely(!PageUptodate(page))) { + if (unlikely(!folio_test_uptodate(folio))) { err = -EIO; goto fail; } -- cgit v1.2.3 From 3c86d1eec7891c53ea9c71d7a2970c248413a855 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:01 +0800 Subject: f2fs: convert f2fs_write_end() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 85ac05c3655a..e114d738b6b4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3698,7 +3698,8 @@ static int f2fs_write_end(struct file *file, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(page); + struct inode *inode = folio->mapping->host; trace_f2fs_write_end(inode, pos, len, copied); @@ -3707,17 +3708,17 @@ static int f2fs_write_end(struct file *file, * should be PAGE_SIZE. Otherwise, we treat it with zero copied and * let generic_perform_write() try to copy data again through copied=0. */ - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { if (unlikely(copied != len)) copied = 0; else - SetPageUptodate(page); + folio_mark_uptodate(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION /* overwrite compressed file */ if (f2fs_compressed_file(inode) && fsdata) { - f2fs_compress_write_end(inode, fsdata, page->index, copied); + f2fs_compress_write_end(inode, fsdata, folio->index, copied); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (pos + copied > i_size_read(inode) && @@ -3730,10 +3731,10 @@ static int f2fs_write_end(struct file *file, if (!copied) goto unlock_out; - set_page_dirty(page); + folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) - set_page_private_atomic(page); + set_page_private_atomic(folio_page(folio, 0)); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { -- cgit v1.2.3 From 1efc78310c9d25bdc2020202a50c012025602dd3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:02 +0800 Subject: f2fs: convert f2fs_set_compressed_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 630bb78655d5..e642c869db4a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -90,11 +90,13 @@ bool f2fs_is_compressed_page(struct page *page) static void f2fs_set_compressed_page(struct page *page, struct inode *inode, pgoff_t index, void *data) { - attach_page_private(page, (void *)data); + struct folio *folio = page_folio(page); + + folio_attach_private(folio, (void *)data); /* i_crypto_info and iv index */ - page->index = index; - page->mapping = inode->i_mapping; + folio->index = index; + folio->mapping = inode->i_mapping; } static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) -- cgit v1.2.3 From bcf4d2dd840e5f4401a2d9425bd0ce94cab12336 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:03 +0800 Subject: f2fs: convert f2fs_do_write_data_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e114d738b6b4..c57ddee2c7c5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2648,8 +2648,8 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct page *page = fio->page; - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(fio->page); + struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; @@ -2658,14 +2658,14 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* Use COW inode to make dnode_of_data for atomic write */ atomic_commit = f2fs_is_atomic_file(inode) && - page_private_atomic(fio->page); + page_private_atomic(folio_page(folio, 0)); if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_read_extent_cache_block(inode, page->index, + f2fs_lookup_read_extent_cache_block(inode, folio->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) @@ -2680,7 +2680,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); if (err) goto out; @@ -2688,8 +2688,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { - ClearPageUptodate(page); - clear_page_private_gcing(page); + folio_clear_uptodate(folio); + clear_page_private_gcing(folio_page(folio, 0)); goto out_writepage; } got_it: @@ -2715,7 +2715,7 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); @@ -2723,11 +2723,11 @@ got_it: if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - end_page_writeback(page); + folio_end_writeback(folio); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } - trace_f2fs_do_write_data_page(page_folio(page), IPU); + trace_f2fs_do_write_data_page(folio, IPU); return err; } @@ -2749,17 +2749,17 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); - trace_f2fs_do_write_data_page(page_folio(page), OPU); + trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (atomic_commit) - clear_page_private_atomic(page); + clear_page_private_atomic(folio_page(folio, 0)); out_writepage: f2fs_put_dnode(&dn); out: -- cgit v1.2.3 From 2ac0aa31a15132b88de25f4b687ea24efd81c823 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:04 +0800 Subject: f2fs: convert f2fs_write_data_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c57ddee2c7c5..e69097267b99 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2945,22 +2945,23 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); #ifdef CONFIG_F2FS_FS_COMPRESSION - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) goto out; if (f2fs_compressed_file(inode)) { - if (f2fs_is_compressed_cluster(inode, page->index)) { - redirty_page_for_writepage(wbc, page); + if (f2fs_is_compressed_cluster(inode, folio->index)) { + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } } out: #endif - return f2fs_write_single_data_page(page_folio(page), NULL, NULL, NULL, + return f2fs_write_single_data_page(folio, NULL, NULL, NULL, wbc, FS_DATA_IO, 0, true); } -- cgit v1.2.3 From 4deccfbdc4464b66fe50d71e9d64240483efb07d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:05 +0800 Subject: f2fs: convert __write_node_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7beaa3690e03..aeda3eecc9e2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1608,6 +1608,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { @@ -1624,15 +1625,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, }; unsigned int seq; - trace_f2fs_writepage(page_folio(page), NODE); + trace_f2fs_writepage(folio, NODE); if (unlikely(f2fs_cp_error(sbi))) { /* keep node pages in remount-ro mode */ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) goto redirty_out; - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1646,7 +1647,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* get old block addr of this node page */ nid = nid_of_node(page); - f2fs_bug_on(sbi, page->index != nid); + f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; @@ -1660,10 +1661,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1684,7 +1685,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, *seq_id = seq; } - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); @@ -1697,7 +1698,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, NODE); @@ -1711,7 +1712,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } -- cgit v1.2.3 From 2eaa98e5203979fd5fa6386f307f4f8277a72593 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:06 +0800 Subject: f2fs: convert read_node_page() to use folio Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index aeda3eecc9e2..f5e5abce695b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1369,6 +1369,7 @@ fail: */ static int read_node_page(struct page *page, blk_opf_t op_flags) { + struct folio *folio = page_folio(page); struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { @@ -1381,21 +1382,21 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) }; int err; - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { if (!f2fs_inode_chksum_verify(sbi, page)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -EFSBADCRC; } return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni, false); + err = f2fs_get_node_info(sbi, folio->index, &ni, false); if (err) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -ENOENT; } -- cgit v1.2.3 From 5697e94daab9e40cbf1fb368781d110f22fb6760 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Aug 2024 22:55:07 +0800 Subject: f2fs: get rid of page->index Convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1] https://lore.kernel.org/all/Zp8fgUSIBGQ1TN0D@casper.infradead.org/ Cc: Matthew Wilcox Signed-off-by: Chao Yu Reviewed-by: Li Zetao Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/dir.c | 3 ++- fs/f2fs/inode.c | 3 ++- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 3 ++- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index e642c869db4a..7f26440e8595 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -881,7 +881,7 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) f2fs_bug_on(F2FS_I_SB(cc->inode), !page); /* beyond EOF */ - if (page->index >= nr_pages) + if (page_folio(page)->index >= nr_pages) return true; } return false; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e69097267b99..c6d688208f8b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -354,7 +354,7 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && - page->index != nid_of_node(page)); + page_folio(page)->index != nid_of_node(page)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, page)) @@ -703,7 +703,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio = __bio_alloc(fio, 1); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -802,7 +802,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, fio->page->mapping->host, - fio->page->index, fio) && + page_folio(fio->page)->index, fio) && bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { ret = 0; @@ -902,7 +902,7 @@ alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { @@ -995,13 +995,13 @@ next: (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, - bio_page->index, fio))) + page_folio(bio_page)->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, - bio_page->index, fio, GFP_NOIO); + page_folio(bio_page)->index, fio, GFP_NOIO); io->fio = *fio; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 5fcc952107e9..1136539a57a8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -842,6 +842,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + pgoff_t index = page_folio(page)->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -867,7 +868,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); if (bit_pos == NR_DENTRY_IN_BLOCK && - !f2fs_truncate_hole(dir, page->index, page->index + 1)) { + !f2fs_truncate_hole(dir, index, index + 1)) { f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); ClearPageUptodate(page); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 5d7e4c7f5969..b2d5c3ef8e24 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -174,7 +174,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + page_folio(page)->index, ino_of_node(page), + provided, calculated); return provided == calculated; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f5e5abce695b..59b13ff243fa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -919,7 +919,7 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); - index = dn->node_page->index; + index = page_folio(dn->node_page)->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), @@ -1869,7 +1869,7 @@ continue_unlock: } if (!ret && atomic && !marked) { f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + ino, page_folio(last_page)->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e3cc2706f544..314be0d7816d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3552,7 +3552,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, fio->page->index); + type = __get_age_segment_type(inode, + page_folio(fio->page)->index); if (type != NO_CHECK_TYPE) return type; -- cgit v1.2.3 From fccaa81de87e80b1809906f7e438e5766fbdc172 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 4 Sep 2024 08:33:06 -0700 Subject: f2fs: prevent atomic file from being dirtied before commit Keep atomic file clean while updating and make it dirtied during commit in order to avoid unnecessary and excessive inode updates in the previous fix. Fixes: 4bf78322346f ("f2fs: mark inode dirty for FI_ATOMIC_COMMITTED flag") Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 5 +++++ fs/f2fs/segment.c | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 460c05191c28..8d850192dfdf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -808,6 +808,7 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ @@ -3067,7 +3068,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: - case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b2d5c3ef8e24..1ed86df343a5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -34,6 +34,11 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (f2fs_inode_dirtied(inode, sync)) return; + if (f2fs_is_atomic_file(inode)) { + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return; + } + mark_inode_dirty_sync(inode); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 314be0d7816d..5ec0d9f655f8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -199,6 +199,10 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; @@ -366,6 +370,10 @@ out: } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); -- cgit v1.2.3 From 884ee6dc85b959bc152f15bca80c30f06069e6c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Sep 2024 14:27:24 +0800 Subject: f2fs: get rid of online repaire on corrupted directory syzbot reports a f2fs bug as below: kernel BUG at fs/f2fs/inode.c:896! RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Call Trace: evict+0x532/0x950 fs/inode.c:704 dispose_list fs/inode.c:747 [inline] evict_inodes+0x5f9/0x690 fs/inode.c:797 generic_shutdown_super+0x9d/0x2d0 fs/super.c:627 kill_block_super+0x44/0x90 fs/super.c:1696 kill_f2fs_super+0x344/0x690 fs/f2fs/super.c:4898 deactivate_locked_super+0xc4/0x130 fs/super.c:473 cleanup_mnt+0x41f/0x4b0 fs/namespace.c:1373 task_work_run+0x24f/0x310 kernel/task_work.c:228 ptrace_notify+0x2d2/0x380 kernel/signal.c:2402 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc6/0x190 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x279/0x370 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Online repaire on corrupted directory in f2fs_lookup() can generate dirty data/meta while racing w/ readonly remount, it may leave dirty inode after filesystem becomes readonly, however, checkpoint() will skips flushing dirty inode in a state of readonly mode, result in above panic. Let's get rid of online repaire in f2fs_lookup(), and leave the work to fsck.f2fs. Fixes: 510022a85839 ("f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries") Reported-by: syzbot+ebea2790904673d7c618@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a7b20f061ff2d56a@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 -------- fs/f2fs/namei.c | 68 ------------------------------------------------- include/linux/f2fs_fs.h | 2 +- 3 files changed, 1 insertion(+), 80 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8d850192dfdf..0abb7bb39296 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -790,7 +790,6 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ @@ -3065,7 +3064,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: f2fs_mark_inode_dirty_sync(inode, true); @@ -3189,8 +3187,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -3211,8 +3207,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3293,11 +3287,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 38b4750475db..57d46e1439de 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -457,62 +457,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - if (!S_ISDIR(dir->i_mode)) { - f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)", - dir->i_ino, dir->i_mode, pino); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return -ENOTDIR; - } - - err = f2fs_dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot_name, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot_name, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -522,7 +466,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -558,17 +501,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; - } - - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f17f89a259e2..b0b821edfd97 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -278,7 +278,7 @@ struct node_footer { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ #define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */ -- cgit v1.2.3 From bfe5c02654261bfb8bd9cb174a67f3279ea99e58 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Sep 2024 11:20:47 +0800 Subject: f2fs: fix to check atomic_file in f2fs ioctl interfaces Some f2fs ioctl interfaces like f2fs_ioc_set_pin_file(), f2fs_move_file_range(), and f2fs_defragment_range() missed to check atomic_write status, which may cause potential race issue, fix it. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a8d153eb0a95..99903eafa7fe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2710,7 +2710,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, (range->start + range->len) >> PAGE_SHIFT, DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2943,6 +2944,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -3326,6 +3332,11 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); -- cgit v1.2.3 From 0638a3197c194bed837c157c3574685e36febc7b Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Thu, 5 Sep 2024 14:24:33 +0900 Subject: f2fs: avoid unused block when dio write in LFS mode This patch addresses the problem that when using LFS mode, unused blocks may occur in f2fs_map_blocks() during block allocation for dio writes. If a new section is allocated during block allocation, it will not be included in the map struct by map_is_mergeable() if the LBA of the allocated block is not contiguous. However, the block already allocated in this process will remain unused due to the LFS mode. This patch avoids the possibility of unused blocks by escaping f2fs_map_blocks() when allocating the last block in a section. Signed-off-by: Daejun Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c6d688208f8b..b94cf6eea2f9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1712,6 +1712,14 @@ skip: dn.ofs_in_node = end_offset; } + if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create) { + /* the next block to be allocated may not be contiguous. */ + if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == + CAP_BLKS_PER_SEC(sbi) - 1) + goto sync_out; + } + if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) -- cgit v1.2.3 From 5062b5bed4323275f2f89bc185c6a28d62cfcfd5 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:40 -0700 Subject: f2fs: make BG GC more aggressive for zoned devices Since we don't have any GC on device side for zoned devices, need more aggressive BG GC. So, tune the parameters for that. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 20 ++++++++++++++++++-- fs/f2fs/gc.c | 25 +++++++++++++++++++++---- fs/f2fs/gc.h | 21 +++++++++++++++++++++ fs/f2fs/super.c | 5 +++++ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0abb7bb39296..5031e7fa287a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2879,13 +2879,26 @@ static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) return false; } +static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ); +} + static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + bool zoned_gc = (type == GC_TIME && + F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED)); + if (sbi->gc_mode == GC_URGENT_HIGH) return true; - if (is_inflight_io(sbi, type)) - return false; + if (zoned_gc) { + if (is_inflight_read_io(sbi)) + return false; + } else { + if (is_inflight_io(sbi, type)) + return false; + } if (sbi->gc_mode == GC_URGENT_MID) return true; @@ -2894,6 +2907,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) (type == DISCARD_TIME || type == GC_TIME)) return true; + if (zoned_gc) + return true; + return f2fs_time_over(sbi, type); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 724bbcb447d3..46e3bc26b78a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -116,7 +116,17 @@ static int gc_thread_func(void *data) goto next; } - if (has_enough_invalid_blocks(sbi)) + if (f2fs_sb_has_blkzoned(sbi)) { + if (has_enough_free_blocks(sbi, LIMIT_NO_ZONED_GC)) { + wait_ms = gc_th->no_gc_sleep_time; + f2fs_up_write(&sbi->gc_lock); + goto next; + } + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->max_sleep_time; + } + + if (need_to_boost_gc(sbi)) decrease_sleep_time(gc_th, &wait_ms); else increase_sleep_time(gc_th, &wait_ms); @@ -179,9 +189,16 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; - gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; - gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + + if (f2fs_sb_has_blkzoned(sbi)) { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED; + } else { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + } gc_th->gc_wake = false; diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index a8ea3301b815..55c4ba73362e 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -15,6 +15,11 @@ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +/* GC sleep parameters for zoned deivces */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED 10 +#define DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED 20 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED 60000 + /* choose candidates from sections which has age of more than 7 days */ #define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7) #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ @@ -25,6 +30,9 @@ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ +#define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ + #define DEF_GC_FAILED_PINNED_FILES 2048 #define MAX_GC_FAILED_PINNED_FILES USHRT_MAX @@ -152,6 +160,12 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, *wait -= min_time; } +static inline bool has_enough_free_blocks(struct f2fs_sb_info *sbi, + unsigned int limit_perc) +{ + return free_sections(sbi) > ((sbi->total_sections * limit_perc) / 100); +} + static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { block_t user_block_count = sbi->user_block_count; @@ -167,3 +181,10 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) free_user_blocks(sbi) < limit_free_user_blocks(invalid_user_blocks)); } + +static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) +{ + if (f2fs_sb_has_blkzoned(sbi)) + return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return has_enough_invalid_blocks(sbi); +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1c9316788a16..69aee4c3d97f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -706,6 +706,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (!strcmp(name, "on")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (!strcmp(name, "off")) { + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_warn(sbi, "zoned devices need bggc"); + kfree(name); + return -EINVAL; + } F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; -- cgit v1.2.3 From 8c890c4c60342719526520133fb1b6f69f196ab8 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:41 -0700 Subject: f2fs: introduce migration_window_granularity We can control the scanning window granularity for GC migration. For more frequent scanning and GC on zoned devices, we need a fine grained control knob for it. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/gc.c | 31 +++++++++++++++++++++---------- fs/f2fs/gc.h | 1 + fs/f2fs/super.c | 2 ++ fs/f2fs/sysfs.c | 7 +++++++ 6 files changed, 41 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index ca61ffcae126..e6fa55037daf 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -783,3 +783,11 @@ Description: The zone UFS we are currently using consists of two parts: blkzone_alloc_policy = 1 Only allow writing to sequential zones blkzone_alloc_policy = 2 Prioritize writing to conventional zones ======================== ========================================= + +What: /sys/fs/f2fs//migration_window_granularity +Date: September 2024 +Contact: "Daeho Jeong" +Description: Controls migration window granularity of garbage collection on large + section. it can control the scanning window granularity for GC migration + in a unit of segment, while migration_granularity controls the number + of segments which can be migrated at the same turn. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5031e7fa287a..635318af9c43 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1696,6 +1696,8 @@ struct f2fs_sb_info { unsigned int max_victim_search; /* migration granularity of garbage collection, unit: segment */ unsigned int migration_granularity; + /* migration window granularity of garbage collection, unit: segment */ + unsigned int migration_window_granularity; /* * for stat information. diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 46e3bc26b78a..5cd316d2102d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1708,24 +1708,34 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); + unsigned int sec_end_segno; int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; int submitted = 0; - if (__is_large_section(sbi)) - end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); + if (__is_large_section(sbi)) { + sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); - /* - * zone-capacity can be less than zone-size in zoned devices, - * resulting in less than expected usable segments in the zone, - * calculate the end segno in the zone which can be garbage collected - */ - if (f2fs_sb_has_blkzoned(sbi)) - end_segno -= SEGS_PER_SEC(sbi) - + /* + * zone-capacity can be less than zone-size in zoned devices, + * resulting in less than expected usable segments in the zone, + * calculate the end segno in the zone which can be garbage + * collected + */ + if (f2fs_sb_has_blkzoned(sbi)) + sec_end_segno -= SEGS_PER_SEC(sbi) - f2fs_usable_segs_in_sec(sbi, segno); + if (gc_type == BG_GC) + end_segno = start_segno + + sbi->migration_window_granularity; + + if (end_segno > sec_end_segno) + end_segno = sec_end_segno; + } + sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); /* readahead multi ssa blocks those have contiguous address */ @@ -1803,7 +1813,8 @@ freed: if (__is_large_section(sbi)) sbi->next_victim_seg[gc_type] = - (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; + (segno + 1 < sec_end_segno) ? + segno + 1 : NULL_SEGNO; skip: f2fs_put_page(sum_page, 0); } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 55c4ba73362e..245f93663745 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -32,6 +32,7 @@ #define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ #define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ +#define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 #define DEF_GC_FAILED_PINNED_FILES 2048 #define MAX_GC_FAILED_PINNED_FILES USHRT_MAX diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 69aee4c3d97f..e68f9c38af7d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3807,6 +3807,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->migration_granularity = SEGS_PER_SEC(sbi); + sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ? + DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi); sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 654a541dcc45..02b35edc9e1d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -564,6 +564,11 @@ out: return -EINVAL; } + if (!strcmp(a->attr.name, "migration_window_granularity")) { + if (t == 0 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + } + if (!strcmp(a->attr.name, "gc_urgent")) { if (t == 0) { sbi->gc_mode = GC_NORMAL; @@ -1013,6 +1018,7 @@ F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold); F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs); F2FS_SBI_GENERAL_RW_ATTR(max_victim_search); F2FS_SBI_GENERAL_RW_ATTR(migration_granularity); +F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity); F2FS_SBI_GENERAL_RW_ATTR(dir_level); #ifdef CONFIG_F2FS_IOSTAT F2FS_SBI_GENERAL_RW_ATTR(iostat_enable); @@ -1154,6 +1160,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), ATTR_LIST(migration_granularity), + ATTR_LIST(migration_window_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), -- cgit v1.2.3 From 4cdca5a904b1c72a0ba6ac51a121351128c02fd8 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:42 -0700 Subject: f2fs: add reserved_segments sysfs node For the fine tuning of GC behavior, add reserved_segments sysfs node. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/sysfs.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index e6fa55037daf..6527a14082d2 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -791,3 +791,9 @@ Description: Controls migration window granularity of garbage collection on larg section. it can control the scanning window granularity for GC migration in a unit of segment, while migration_granularity controls the number of segments which can be migrated at the same turn. + +What: /sys/fs/f2fs//reserved_segments +Date: September 2024 +Contact: "Daeho Jeong" +Description: In order to fine tune GC behavior, we can control the number of + reserved segments. diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 02b35edc9e1d..8eedba8ae710 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -986,6 +986,7 @@ SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks); SM_INFO_GENERAL_RW_ATTR(min_seq_blocks); SM_INFO_GENERAL_RW_ATTR(min_hot_blocks); SM_INFO_GENERAL_RW_ATTR(min_ssr_sections); +SM_INFO_GENERAL_RW_ATTR(reserved_segments); /* DCC_INFO ATTR */ DCC_INFO_RW_ATTR(max_small_discards, max_discards); @@ -1158,6 +1159,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), + ATTR_LIST(reserved_segments), ATTR_LIST(max_victim_search), ATTR_LIST(migration_granularity), ATTR_LIST(migration_window_granularity), -- cgit v1.2.3 From 2223fe652f759649ae1d520e47e5f06727c0acbd Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:43 -0700 Subject: f2fs: increase BG GC migration window granularity when boosted for zoned devices Need bigger BG GC migration window granularity when free section is running low. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 12 ++++++++++-- fs/f2fs/gc.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5cd316d2102d..9a3d3994cf2b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1728,10 +1728,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sec_end_segno -= SEGS_PER_SEC(sbi) - f2fs_usable_segs_in_sec(sbi, segno); - if (gc_type == BG_GC) - end_segno = start_segno + + if (gc_type == BG_GC) { + unsigned int window_granularity = sbi->migration_window_granularity; + if (f2fs_sb_has_blkzoned(sbi) && + !has_enough_free_blocks(sbi, + LIMIT_BOOST_ZONED_GC)) + window_granularity *= BOOST_GC_MULTIPLE; + + end_segno = start_segno + window_granularity; + } + if (end_segno > sec_end_segno) end_segno = sec_end_segno; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 245f93663745..78abeebd68b5 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -33,6 +33,7 @@ #define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ #define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ #define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 +#define BOOST_GC_MULTIPLE 5 #define DEF_GC_FAILED_PINNED_FILES 2048 #define MAX_GC_FAILED_PINNED_FILES USHRT_MAX -- cgit v1.2.3 From 9748c2ddea4a3f46a498bff4cf2bf9a5629e3f8b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:44 -0700 Subject: f2fs: do FG_GC when GC boosting is required for zoned devices Under low free section count, we need to use FG_GC instead of BG_GC to recover free sections. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 635318af9c43..3e4371d51042 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1300,6 +1300,7 @@ struct f2fs_gc_control { bool no_bg_gc; /* check the space and stop bg_gc */ bool should_migrate_blocks; /* should migrate blocks */ bool err_gc_skipped; /* return EAGAIN if GC skipped */ + bool one_time; /* require one time GC in one migration unit */ unsigned int nr_free_secs; /* # of free sections to do GC */ }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9a3d3994cf2b..a59fec64eccf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -81,6 +81,8 @@ static int gc_thread_func(void *data) continue; } + gc_control.one_time = false; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -126,15 +128,19 @@ static int gc_thread_func(void *data) wait_ms = gc_th->max_sleep_time; } - if (need_to_boost_gc(sbi)) + if (need_to_boost_gc(sbi)) { decrease_sleep_time(gc_th, &wait_ms); - else + if (f2fs_sb_has_blkzoned(sbi)) + gc_control.one_time = true; + } else { increase_sleep_time(gc_th, &wait_ms); + } do_gc: stat_inc_gc_call_count(sbi, foreground ? FOREGROUND : BACKGROUND); - sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || + gc_control.one_time; /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground) @@ -1701,7 +1707,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int start_segno, struct gc_inode_list *gc_list, int gc_type, - bool force_migrate) + bool force_migrate, bool one_time) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -1728,7 +1734,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sec_end_segno -= SEGS_PER_SEC(sbi) - f2fs_usable_segs_in_sec(sbi, segno); - if (gc_type == BG_GC) { + if (gc_type == BG_GC || one_time) { unsigned int window_granularity = sbi->migration_window_granularity; @@ -1911,7 +1917,8 @@ retry: } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, - gc_control->should_migrate_blocks); + gc_control->should_migrate_blocks, + gc_control->one_time); if (seg_freed < 0) goto stop; @@ -1922,6 +1929,9 @@ retry: total_sec_freed++; } + if (gc_control->one_time) + goto stop; + if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -2047,7 +2057,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, }; do_garbage_collect(sbi, segno, &gc_list, FG_GC, - dry_run_sections == 0); + dry_run_sections == 0, false); put_gc_inode(&gc_list); if (!dry_run && get_valid_blocks(sbi, segno, true)) -- cgit v1.2.3 From 9a481a1c16f4653c3414d996c3603eb42926e28b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:45 -0700 Subject: f2fs: create gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent Added control knobs for gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 14 ++++++++++++++ fs/f2fs/gc.c | 12 +++++++++--- fs/f2fs/gc.h | 4 ++++ fs/f2fs/sysfs.c | 4 ++++ 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 6527a14082d2..0be1a53925c4 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -797,3 +797,17 @@ Date: September 2024 Contact: "Daeho Jeong" Description: In order to fine tune GC behavior, we can control the number of reserved segments. + +What: /sys/fs/f2fs//gc_no_zoned_gc_percent +Date: September 2024 +Contact: "Daeho Jeong" +Description: If the percentage of free sections over total sections is above this + number, F2FS do not garbage collection for zoned devices through the + background GC thread. the default number is "60". + +What: /sys/fs/f2fs//gc_boost_zoned_gc_percent +Date: September 2024 +Contact: "Daeho Jeong" +Description: If the percentage of free sections over total sections is under this + number, F2FS boosts garbage collection for zoned devices through the + background GC thread. the default number is "25". diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a59fec64eccf..319b4b80ba72 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -119,7 +119,8 @@ static int gc_thread_func(void *data) } if (f2fs_sb_has_blkzoned(sbi)) { - if (has_enough_free_blocks(sbi, LIMIT_NO_ZONED_GC)) { + if (has_enough_free_blocks(sbi, + gc_th->no_zoned_gc_percent)) { wait_ms = gc_th->no_gc_sleep_time; f2fs_up_write(&sbi->gc_lock); goto next; @@ -200,10 +201,14 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED; + gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC; + gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC; } else { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->no_zoned_gc_percent = 0; + gc_th->boost_zoned_gc_percent = 0; } gc_th->gc_wake = false; @@ -1740,8 +1745,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi) && !has_enough_free_blocks(sbi, - LIMIT_BOOST_ZONED_GC)) - window_granularity *= BOOST_GC_MULTIPLE; + sbi->gc_thread->boost_zoned_gc_percent)) + window_granularity *= + BOOST_GC_MULTIPLE; end_segno = start_segno + window_granularity; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 78abeebd68b5..c063310c8dde 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -61,6 +61,10 @@ struct f2fs_gc_kthread { * caller of f2fs_balance_fs() * will wait on this wait queue. */ + + /* for gc control for zoned devices */ + unsigned int no_zoned_gc_percent; + unsigned int boost_zoned_gc_percent; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 8eedba8ae710..a65c356a2769 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -977,6 +977,8 @@ GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time); GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time); GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1137,6 +1139,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_no_zoned_gc_percent), + ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), -- cgit v1.2.3 From e791d00bd06cb2d3a10afa43e57f6364931d0ada Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 9 Sep 2024 15:19:46 -0700 Subject: f2fs: add valid block ratio not to do excessive GC for one time GC We need to introduce a valid block ratio threshold not to trigger excessive GC for zoned deivces. The initial value of it is 95%. So, F2FS will stop the thread from intiating GC for sections having valid blocks exceeding the ratio. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++++++ fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 16 ++++++++++++---- fs/f2fs/gc.h | 2 ++ fs/f2fs/segment.c | 6 ++++-- fs/f2fs/segment.h | 1 + fs/f2fs/sysfs.c | 2 ++ 7 files changed, 30 insertions(+), 7 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 0be1a53925c4..fdedf1ea944b 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -811,3 +811,11 @@ Contact: "Daeho Jeong" Description: If the percentage of free sections over total sections is under this number, F2FS boosts garbage collection for zoned devices through the background GC thread. the default number is "25". + +What: /sys/fs/f2fs//gc_valid_thresh_ratio +Date: September 2024 +Contact: "Daeho Jeong" +Description: It controls the valid block ratio threshold not to trigger excessive GC + for zoned deivces. The initial value of it is 95(%). F2FS will stop the + background GC thread from intiating GC for sections having valid blocks + exceeding the ratio. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3e4371d51042..9a226d36a36c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3922,7 +3922,7 @@ void f2fs_destroy_garbage_collection_cache(void); /* victim selection function for cleaning and SSR */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age); + unsigned long long age, bool one_time); /* * recovery.c diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 319b4b80ba72..d3e3104c4492 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -196,6 +196,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; + gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; @@ -396,6 +397,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / + 100)) + return UINT_MAX; + /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) return get_valid_blocks(sbi, segno, true); @@ -770,7 +776,7 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type, */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age) + unsigned long long age, bool one_time) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); @@ -787,6 +793,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; + p.one_time_gc = one_time; retry: select_policy(sbi, gc_type, type, &p); @@ -1698,13 +1705,14 @@ next_step: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type) + int gc_type, bool one_time) { struct sit_info *sit_i = SIT_I(sbi); int ret; down_write(&sit_i->sentry_lock); - ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0); + ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, + LFS, 0, one_time); up_write(&sit_i->sentry_lock); return ret; } @@ -1911,7 +1919,7 @@ gc_more: goto stop; } retry: - ret = __get_victim(sbi, &segno, gc_type); + ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ if (ret == -ENODATA && gc_type == FG_GC && diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index c063310c8dde..2914b678bf8f 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -25,6 +25,7 @@ #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ #define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */ #define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */ +#define DEF_GC_THREAD_VALID_THRESH_RATIO 95 /* do not GC over 95% valid block ratio for one time GC */ #define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ @@ -65,6 +66,7 @@ struct f2fs_gc_kthread { /* for gc control for zoned devices */ unsigned int no_zoned_gc_percent; unsigned int boost_zoned_gc_percent; + unsigned int valid_thresh_ratio; }; struct gc_inode_list { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5ec0d9f655f8..112d58d566d4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3090,7 +3090,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ - if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3117,7 +3118,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; - if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, i, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index bfc01a521cb9..43db2d3e8c85 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -188,6 +188,7 @@ struct victim_sel_policy { unsigned int min_segno; /* segment # having min. cost */ unsigned long long age; /* mtime of GCed section*/ unsigned long long age_threshold;/* age threshold */ + bool one_time_gc; /* one time GC */ }; struct seg_entry { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a65c356a2769..d18931b7b094 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -979,6 +979,7 @@ GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1141,6 +1142,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_no_zoned_gc_percent), ATTR_LIST(gc_boost_zoned_gc_percent), + ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), -- cgit v1.2.3 From 930c6ab93492c4b15436524e704950b364b2930c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Sep 2024 11:07:13 +0800 Subject: f2fs: fix to don't set SB_RDONLY in f2fs_handle_critical_error() syzbot reports a f2fs bug as below: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 58 at kernel/rcu/sync.c:177 rcu_sync_dtor+0xcd/0x180 kernel/rcu/sync.c:177 CPU: 1 UID: 0 PID: 58 Comm: kworker/1:2 Not tainted 6.10.0-syzkaller-12562-g1722389b0d86 #0 Workqueue: events destroy_super_work RIP: 0010:rcu_sync_dtor+0xcd/0x180 kernel/rcu/sync.c:177 Call Trace: percpu_free_rwsem+0x41/0x80 kernel/locking/percpu-rwsem.c:42 destroy_super_work+0xec/0x130 fs/super.c:282 process_one_work kernel/workqueue.c:3231 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 worker_thread+0x86d/0xd40 kernel/workqueue.c:3390 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 As Christian Brauner pointed out [1]: the root cause is f2fs sets SB_RDONLY flag in internal function, rather than setting the flag covered w/ sb->s_umount semaphore via remount procedure, then below race condition causes this bug: - freeze_super() - sb_wait_write(sb, SB_FREEZE_WRITE) - sb_wait_write(sb, SB_FREEZE_PAGEFAULT) - sb_wait_write(sb, SB_FREEZE_FS) - f2fs_handle_critical_error - sb->s_flags |= SB_RDONLY - thaw_super - thaw_super_locked - sb_rdonly() is true, so it skips sb_freeze_unlock(sb, SB_FREEZE_FS) - deactivate_locked_super Since f2fs has almost the same logic as ext4 [2] when handling critical error in filesystem if it mounts w/ errors=remount-ro option: - set CP_ERROR_FLAG flag which indicates filesystem is stopped - record errors to superblock - set SB_RDONLY falg Once we set CP_ERROR_FLAG flag, all writable interfaces can detect the flag and stop any further updates on filesystem. So, it is safe to not set SB_RDONLY flag, let's remove the logic and keep in line w/ ext4 [3]. [1] https://lore.kernel.org/all/20240729-himbeeren-funknetz-96e62f9c7aee@brauner [2] https://lore.kernel.org/all/20240729132721.hxih6ehigadqf7wx@quack3 [3] https://lore.kernel.org/linux-ext4/20240805201241.27286-1-jack@suse.cz Fixes: b62e71be2110 ("f2fs: support errors=remount-ro|continue|panic mountoption") Reported-by: syzbot+20d7e439f76bbbd863a7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000b90a8e061e21d12f@google.com/ Cc: Jan Kara Cc: Christian Brauner Signed-off-by: Chao Yu Reviewed-by: Christian Brauner Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e68f9c38af7d..fc2c586c7619 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4199,12 +4199,14 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, } f2fs_warn(sbi, "Remounting filesystem read-only"); + /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * We have already set CP_ERROR_FLAG flag to stop all updates + * to filesystem, so it doesn't need to set SB_RDONLY flag here + * because the flag should be set covered w/ sb->s_umount semaphore + * via remount procedure, otherwise, it will confuse code like + * freeze_super() which will lead to deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void f2fs_record_error_work(struct work_struct *work) -- cgit v1.2.3 From 65a6ce4726c27b45600303f06496fef46d00b57f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Sep 2024 09:16:19 +0800 Subject: f2fs: fix to don't panic system for no free segment fault injection f2fs: fix to don't panic system for no free segment fault injection syzbot reports a f2fs bug as below: F2FS-fs (loop0): inject no free segment in get_new_segment of __allocate_new_segment+0x1ce/0x940 fs/f2fs/segment.c:3167 F2FS-fs (loop0): Stopped filesystem due to reason: 7 ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:2748! CPU: 0 UID: 0 PID: 5109 Comm: syz-executor304 Not tainted 6.11.0-rc6-syzkaller-00363-g89f5e14d05b4 #0 RIP: 0010:get_new_segment fs/f2fs/segment.c:2748 [inline] RIP: 0010:new_curseg+0x1f61/0x1f70 fs/f2fs/segment.c:2836 Call Trace: __allocate_new_segment+0x1ce/0x940 fs/f2fs/segment.c:3167 f2fs_allocate_new_section fs/f2fs/segment.c:3181 [inline] f2fs_allocate_pinning_section+0xfa/0x4e0 fs/f2fs/segment.c:3195 f2fs_expand_inode_data+0x5d6/0xbb0 fs/f2fs/file.c:1799 f2fs_fallocate+0x448/0x960 fs/f2fs/file.c:1903 vfs_fallocate+0x553/0x6c0 fs/open.c:334 do_vfs_ioctl+0x2592/0x2e50 fs/ioctl.c:886 __do_sys_ioctl fs/ioctl.c:905 [inline] __se_sys_ioctl+0x81/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0010:get_new_segment fs/f2fs/segment.c:2748 [inline] RIP: 0010:new_curseg+0x1f61/0x1f70 fs/f2fs/segment.c:2836 The root cause is when we inject no free segment fault into f2fs, we should not panic system, fix it. Fixes: 8b10d3653735 ("f2fs: introduce FAULT_NO_SEGMENT") Reported-by: syzbot+341e5f32ebafbb46b81c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000f0ee5b0621ab694b@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 112d58d566d4..b7c1ca51ea32 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2730,6 +2730,7 @@ find_other_zone: MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; + f2fs_bug_on(sbi, 1); goto out_unlock; } } @@ -2740,6 +2741,7 @@ find_other_zone: MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; + f2fs_bug_on(sbi, 1); goto out_unlock; } } @@ -2781,10 +2783,8 @@ got_it: out_unlock: spin_unlock(&free_i->segmap_lock); - if (ret == -ENOSPC) { + if (ret == -ENOSPC) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); - f2fs_bug_on(sbi, 1); - } return ret; } -- cgit v1.2.3 From 2af583afcf9d54b988b68d1033101e73edd72a9b Mon Sep 17 00:00:00 2001 From: liuderong Date: Wed, 11 Sep 2024 10:40:20 +0800 Subject: f2fs: remove unused parameters Remove unused parameter segno from f2fs_usable_segs_in_sec. Signed-off-by: liuderong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/gc.c | 6 +++--- fs/f2fs/segment.c | 3 +-- fs/f2fs/segment.h | 4 ++-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a226d36a36c..33f5449dc22d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3780,8 +3780,7 @@ void f2fs_destroy_segment_manager_caches(void); int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp); -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno); +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d3e3104c4492..2903c4c4a76c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -368,7 +368,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) unsigned char age = 0; unsigned char u; unsigned int i; - unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); for (i = 0; i < usable_segs_per_sec; i++) mtime += get_seg_entry(sbi, start + i)->mtime; @@ -1745,7 +1745,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, */ if (f2fs_sb_has_blkzoned(sbi)) sec_end_segno -= SEGS_PER_SEC(sbi) - - f2fs_usable_segs_in_sec(sbi, segno); + f2fs_usable_segs_in_sec(sbi); if (gc_type == BG_GC || one_time) { unsigned int window_granularity = @@ -1938,7 +1938,7 @@ retry: total_freed += seg_freed; - if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) { + if (seg_freed == f2fs_usable_segs_in_sec(sbi)) { sec_freed++; total_sec_freed++; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b7c1ca51ea32..1766254279d2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5422,8 +5422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, return BLKS_PER_SEG(sbi); } -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno) +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 43db2d3e8c85..71adb4a43bec 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -431,7 +431,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -465,7 +465,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { -- cgit v1.2.3 From 5cc69a27abfa91abbb39fc584f82d6c867b60f47 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 12 Sep 2024 09:59:58 -0700 Subject: f2fs: forcibly migrate to secure space for zoned device file pinning We need to migrate data blocks even though it is full to secure space for zoned device file pinning. Fixes: 9703d69d9d15 ("f2fs: support file pinning for zoned devices") Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2903c4c4a76c..9322a7200e31 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2070,8 +2070,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - do_garbage_collect(sbi, segno, &gc_list, FG_GC, - dry_run_sections == 0, false); + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); if (!dry_run && get_valid_blocks(sbi, segno, true)) -- cgit v1.2.3 From ae87b9c2dc9800e6ab52febd09341140599ff8e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 14 Sep 2024 21:44:10 +0000 Subject: f2fs: allow F2FS_IPU_NOCACHE for pinned file This patch allows f2fs to submit bios of in-place writes on pinned file. Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d18931b7b094..c56e8c873935 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -792,7 +792,8 @@ out: if (!strcmp(a->attr.name, "ipu_policy")) { if (t >= BIT(F2FS_IPU_MAX)) return -EINVAL; - if (t && f2fs_lfs_mode(sbi)) + /* allow F2FS_IPU_NOCACHE only for IPU in the pinned file */ + if (f2fs_lfs_mode(sbi) && (t & ~BIT(F2FS_IPU_NOCACHE))) return -EINVAL; SM_I(sbi)->ipu_policy = (unsigned int)t; return count; -- cgit v1.2.3