diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-fs-f2fs | 56 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 17 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 63 | ||||
-rw-r--r-- | fs/f2fs/data.c | 164 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 2 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 8 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 4 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 148 | ||||
-rw-r--r-- | fs/f2fs/file.c | 199 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 113 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 29 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 31 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 9 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 68 | ||||
-rw-r--r-- | fs/f2fs/node.c | 46 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 72 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 5 | ||||
-rw-r--r-- | fs/f2fs/super.c | 119 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 82 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 5 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 14 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 4 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 3 |
23 files changed, 798 insertions, 463 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index cad6c3dc1f9c..fdedf1ea944b 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -579,6 +579,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young candidates whose age is not beyond the threshold, by default it was initialized as 604800 seconds (equals to 7 days). +What: /sys/fs/f2fs/<disk>/atgc_enabled +Date: Feb 2024 +Contact: "Jinbao Liu" <liujinbao1@xiaomi.com> +Description: It represents whether ATGC is on or off. The value is 1 which + indicates that ATGC is on, and 0 indicates that it is off. + What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments Date: July 2021 Contact: "Daeho Jeong" <daehojeong@google.com> @@ -763,3 +769,53 @@ Date: November 2023 Contact: "Chao Yu" <chao@kernel.org> Description: It controls to enable/disable IO aware feature for background discard. By default, the value is 1 which indicates IO aware is on. + +What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy +Date: July 2024 +Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com> +Description: The zone UFS we are currently using consists of two parts: + conventional zones and sequential zones. It can be used to control which part + to prioritize for writes, with a default value of 0. + + ======================== ========================================= + value description + blkzone_alloc_policy = 0 Prioritize writing to sequential zones + blkzone_alloc_policy = 1 Only allow writing to sequential zones + blkzone_alloc_policy = 2 Prioritize writing to conventional zones + ======================== ========================================= + +What: /sys/fs/f2fs/<disk>/migration_window_granularity +Date: September 2024 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: Controls migration window granularity of garbage collection on large + section. it can control the scanning window granularity for GC migration + in a unit of segment, while migration_granularity controls the number + of segments which can be migrated at the same turn. + +What: /sys/fs/f2fs/<disk>/reserved_segments +Date: September 2024 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: In order to fine tune GC behavior, we can control the number of + reserved segments. + +What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent +Date: September 2024 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: If the percentage of free sections over total sections is above this + number, F2FS do not garbage collection for zoned devices through the + background GC thread. the default number is "60". + +What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent +Date: September 2024 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: If the percentage of free sections over total sections is under this + number, F2FS boosts garbage collection for zoned devices through the + background GC thread. the default number is "25". + +What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio +Date: September 2024 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: It controls the valid block ratio threshold not to trigger excessive GC + for zoned deivces. The initial value of it is 95(%). F2FS will stop the + background GC thread from intiating GC for sections having valid blocks + exceeding the ratio. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bdd96329dddd..7f76460b721f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -99,7 +99,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { - f2fs_handle_page_eio(sbi, page->index, META); + f2fs_handle_page_eio(sbi, page_folio(page), META); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -345,30 +345,31 @@ static int __f2fs_write_meta_page(struct page *page, enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); - trace_f2fs_writepage(page_folio(page), META); + trace_f2fs_writepage(folio, META); if (unlikely(f2fs_cp_error(sbi))) { if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); + folio_unlock(folio); return 0; } goto redirty_out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) + if (wbc->for_reclaim && folio->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - f2fs_do_write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, folio, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_write(sbi, META); @@ -1551,7 +1552,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + - (i << F2FS_BLKSIZE_BITS), blk + i); + F2FS_BLK_TO_BYTES(i), blk + i); } /* write out checkpoint buffer at block 0 */ diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 990b93689b46..7f26440e8595 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -90,11 +90,13 @@ bool f2fs_is_compressed_page(struct page *page) static void f2fs_set_compressed_page(struct page *page, struct inode *inode, pgoff_t index, void *data) { - attach_page_private(page, (void *)data); + struct folio *folio = page_folio(page); + + folio_attach_private(folio, (void *)data); /* i_crypto_info and iv index */ - page->index = index; - page->mapping = inode->i_mapping; + folio->index = index; + folio->mapping = inode->i_mapping; } static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) @@ -160,17 +162,17 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->cluster_idx = NULL_CLUSTER; } -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio) { unsigned int cluster_ofs; - if (!f2fs_cluster_can_merge_page(cc, page->index)) + if (!f2fs_cluster_can_merge_page(cc, folio->index)) f2fs_bug_on(F2FS_I_SB(cc->inode), 1); - cluster_ofs = offset_in_cluster(cc, page->index); - cc->rpages[cluster_ofs] = page; + cluster_ofs = offset_in_cluster(cc, folio->index); + cc->rpages[cluster_ofs] = folio_page(folio, 0); cc->nr_rpages++; - cc->cluster_idx = cluster_idx(cc, page->index); + cc->cluster_idx = cluster_idx(cc, folio->index); } #ifdef CONFIG_F2FS_FS_LZO @@ -879,7 +881,7 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) f2fs_bug_on(F2FS_I_SB(cc->inode), !page); /* beyond EOF */ - if (page->index >= nr_pages) + if (page_folio(page)->index >= nr_pages) return true; } return false; @@ -945,7 +947,7 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; int count, i; - for (i = 1, count = 1; i < cluster_size; i++) { + for (i = 0, count = 0; i < cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + i); @@ -956,8 +958,8 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, return count; } -static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr_blks) +static int __f2fs_cluster_blocks(struct inode *inode, unsigned int cluster_idx, + enum cluster_check_type type) { struct dnode_of_data dn; unsigned int start_idx = cluster_idx << @@ -978,10 +980,12 @@ static int __f2fs_cluster_blocks(struct inode *inode, } if (dn.data_blkaddr == COMPRESS_ADDR) { - if (compr_blks) - ret = __f2fs_get_cluster_blocks(inode, &dn); - else + if (type == CLUSTER_COMPR_BLKS) + ret = 1 + __f2fs_get_cluster_blocks(inode, &dn); + else if (type == CLUSTER_IS_COMPR) ret = 1; + } else if (type == CLUSTER_RAW_BLKS) { + ret = __f2fs_get_cluster_blocks(inode, &dn); } fail: f2fs_put_dnode(&dn); @@ -991,7 +995,16 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, + CLUSTER_COMPR_BLKS); +} + +/* return # of raw blocks in non-compressed cluster */ +static int f2fs_decompressed_blocks(struct inode *inode, + unsigned int cluster_idx) +{ + return __f2fs_cluster_blocks(inode, cluster_idx, + CLUSTER_RAW_BLKS); } /* return whether cluster is compressed one or not */ @@ -999,7 +1012,16 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, index >> F2FS_I(inode)->i_log_cluster_size, - false); + CLUSTER_IS_COMPR); +} + +/* return whether cluster contains non raw blocks or not */ +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index) +{ + unsigned int cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size; + + return f2fs_decompressed_blocks(inode, cluster_idx) != + F2FS_I(inode)->i_cluster_size; } static bool cluster_may_compress(struct compress_ctx *cc) @@ -1093,7 +1115,7 @@ retry: if (PageUptodate(page)) f2fs_put_page(page, 1); else - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); } if (!f2fs_cluster_is_empty(cc)) { @@ -1123,7 +1145,7 @@ retry: } f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); if (!PageUptodate(page)) { release_and_retry: @@ -1523,7 +1545,8 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, + ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]), + &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5dfa0207ad8f..94f7b084f601 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/sched/mm.h> #include <linux/mpage.h> #include <linux/writeback.h> @@ -355,7 +354,7 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && - page->index != nid_of_node(page)); + page_folio(page)->index != nid_of_node(page)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, page)) @@ -704,7 +703,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio = __bio_alloc(fio, 1); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -803,7 +802,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, fio->page->mapping->host, - fio->page->index, fio) && + page_folio(fio->page)->index, fio) && bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { ret = 0; @@ -903,7 +902,7 @@ alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { @@ -996,13 +995,13 @@ next: (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, - bio_page->index, fio))) + page_folio(bio_page)->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, - bio_page->index, fio, GFP_NOIO); + page_folio(bio_page)->index, fio, GFP_NOIO); io->fio = *fio; } @@ -1087,7 +1086,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, } /* This can handle encryption stuffs */ -static int f2fs_submit_page_read(struct inode *inode, struct page *page, +static int f2fs_submit_page_read(struct inode *inode, struct folio *folio, block_t blkaddr, blk_opf_t op_flags, bool for_write) { @@ -1095,14 +1094,14 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) { iostat_update_and_unbind_ctx(bio); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); @@ -1270,7 +1269,7 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr, op_flags, for_write); if (err) goto put_err; @@ -1713,6 +1712,14 @@ skip: dn.ofs_in_node = end_offset; } + if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create) { + /* the next block to be allocated may not be contiguous. */ + if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == + CAP_BLKS_PER_SEC(sbi) - 1) + goto sync_out; + } + if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -1939,7 +1946,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock_shared(inode); - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); if (start > maxbytes) { ret = -EFBIG; goto out; @@ -2064,7 +2071,7 @@ out: static inline loff_t f2fs_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) - return inode->i_sb->s_maxbytes; + return F2FS_BLK_TO_BYTES(max_file_blocks(inode)); return i_size_read(inode); } @@ -2208,19 +2215,22 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; + struct folio *folio; if (!page) continue; - if ((sector_t)page->index >= last_block_in_file) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - } else if (!PageUptodate(page)) { + + folio = page_folio(page); + if ((sector_t)folio->index >= last_block_in_file) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + } else if (!folio_test_uptodate(folio)) { continue; } - unlock_page(page); + folio_unlock(folio); if (for_write) - put_page(page); + folio_put(folio); cc->rpages[i] = NULL; cc->nr_rpages--; } @@ -2280,7 +2290,7 @@ skip_reading_dnode: } for (i = 0; i < cc->nr_cpages; i++) { - struct page *page = dic->cpages[i]; + struct folio *folio = page_folio(dic->cpages[i]); block_t blkaddr; struct bio_post_read_ctx *ctx; @@ -2290,7 +2300,8 @@ skip_reading_dnode: f2fs_wait_on_block_writeback(inode, blkaddr); - if (f2fs_load_compressed_page(sbi, page, blkaddr)) { + if (f2fs_load_compressed_page(sbi, folio_page(folio, 0), + blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); break; @@ -2300,7 +2311,7 @@ skip_reading_dnode: if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(sbi, bio, DATA); bio = NULL; @@ -2309,7 +2320,7 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, f2fs_ra_op_flags(rac), - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); f2fs_decompress_end_io(dic, ret, true); @@ -2319,7 +2330,7 @@ submit_and_realloc: } } - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; ctx = get_post_read_ctx(bio); @@ -2430,7 +2441,7 @@ static int f2fs_mpage_readpages(struct inode *inode, if (ret) goto set_error_page; - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); goto next_page; read_single_page: @@ -2645,21 +2656,24 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct page *page = fio->page; - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(fio->page); + struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; + bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ - if (f2fs_is_atomic_file(inode)) + atomic_commit = f2fs_is_atomic_file(inode) && + page_private_atomic(folio_page(folio, 0)); + if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_read_extent_cache_block(inode, page->index, + f2fs_lookup_read_extent_cache_block(inode, folio->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) @@ -2674,7 +2688,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); if (err) goto out; @@ -2682,8 +2696,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { - ClearPageUptodate(page); - clear_page_private_gcing(page); + folio_clear_uptodate(folio); + clear_page_private_gcing(folio_page(folio, 0)); goto out_writepage; } got_it: @@ -2709,7 +2723,7 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); @@ -2717,11 +2731,11 @@ got_it: if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - end_page_writeback(page); + folio_end_writeback(folio); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } - trace_f2fs_do_write_data_page(page_folio(page), IPU); + trace_f2fs_do_write_data_page(folio, IPU); return err; } @@ -2743,15 +2757,17 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); - trace_f2fs_do_write_data_page(page_folio(page), OPU); + trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); + if (atomic_commit) + clear_page_private_atomic(folio_page(folio, 0)); out_writepage: f2fs_put_dnode(&dn); out: @@ -2760,7 +2776,7 @@ out: return err; } -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, @@ -2768,12 +2784,13 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, int compr_blocks, bool allow_balance) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; + struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; bool quota_inode = IS_NOQUOTA(inode); @@ -2797,11 +2814,11 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .last_block = last_block, }; - trace_f2fs_writepage(page_folio(page), DATA); + trace_f2fs_writepage(folio, DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); + mapping_set_error(folio->mapping, -EIO); /* * don't drop any dirty dentry pages for keeping lastest * directory structure. @@ -2819,7 +2836,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || + if (folio->index < end_index || f2fs_verity_in_progress(inode) || compr_blocks) goto write; @@ -2829,10 +2846,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, * this page does not have to be written to disk. */ offset = i_size & (PAGE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) + if ((folio->index >= end_index + 1) || !offset) goto out; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); write: /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { @@ -2862,7 +2879,7 @@ write: err = -EAGAIN; if (f2fs_has_inline_data(inode)) { - err = f2fs_write_inline_data(inode, page); + err = f2fs_write_inline_data(inode, folio); if (!err) goto out; } @@ -2892,7 +2909,7 @@ done: out: inode_dec_dirty_pages(inode); if (err) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); clear_page_private_gcing(page); } @@ -2902,7 +2919,7 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); @@ -2920,7 +2937,7 @@ out: return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); /* * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). @@ -2929,29 +2946,30 @@ redirty_out: */ if (!err || wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; - unlock_page(page); + folio_unlock(folio); return err; } static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); #ifdef CONFIG_F2FS_FS_COMPRESSION - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) goto out; if (f2fs_compressed_file(inode)) { - if (f2fs_is_compressed_cluster(inode, page->index)) { - redirty_page_for_writepage(wbc, page); + if (f2fs_is_compressed_cluster(inode, folio->index)) { + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } } out: #endif - return f2fs_write_single_data_page(page, NULL, NULL, NULL, + return f2fs_write_single_data_page(folio, NULL, NULL, NULL, wbc, FS_DATA_IO, 0, true); } @@ -3157,11 +3175,11 @@ continue_unlock: #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { folio_get(folio); - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); continue; } #endif - ret = f2fs_write_single_data_page(&folio->page, + ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); if (ret == AOP_WRITEPAGE_ACTIVATE) @@ -3369,11 +3387,11 @@ void f2fs_write_failed(struct inode *inode, loff_t to) } static int prepare_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed) { - struct inode *inode = page->mapping->host; - pgoff_t index = page->index; + struct inode *inode = folio->mapping->host; + pgoff_t index = folio->index; struct dnode_of_data dn; struct page *ipage; bool locked = false; @@ -3410,13 +3428,13 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(page_folio(page), ipage); + f2fs_do_read_inline_data(folio, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_page_private_inline(ipage); goto out; } - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_page(&dn, folio_page(folio, 0)); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } @@ -3509,12 +3527,12 @@ unlock_out: } static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned int len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed, bool *use_cow) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; - pgoff_t index = page->index; + pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; @@ -3620,10 +3638,10 @@ repeat: *foliop = folio; if (f2fs_is_atomic_file(inode)) - err = prepare_atomic_write_begin(sbi, &folio->page, pos, len, + err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); else - err = prepare_write_begin(sbi, &folio->page, pos, len, + err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) goto put_folio; @@ -3648,7 +3666,7 @@ repeat: if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && !f2fs_verity_in_progress(inode)) { - folio_zero_segment(folio, len, PAGE_SIZE); + folio_zero_segment(folio, len, folio_size(folio)); return 0; } @@ -3662,8 +3680,8 @@ repeat: goto put_folio; } err = f2fs_submit_page_read(use_cow ? - F2FS_I(inode)->cow_inode : inode, &folio->page, - blkaddr, 0, true); + F2FS_I(inode)->cow_inode : inode, + folio, blkaddr, 0, true); if (err) goto put_folio; @@ -3727,6 +3745,9 @@ static int f2fs_write_end(struct file *file, folio_mark_dirty(folio); + if (f2fs_is_atomic_file(inode)) + set_page_private_atomic(folio_page(folio, 0)); + if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { f2fs_i_size_write(inode, pos + copied); @@ -4117,9 +4138,8 @@ const struct address_space_operations f2fs_dblock_aops = { .swap_deactivate = f2fs_swap_deactivate, }; -void f2fs_clear_page_cache_dirty_tag(struct page *page) +void f2fs_clear_page_cache_dirty_tag(struct folio *folio) { - struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; unsigned long flags; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8b0e1e71b667..546b8ba91261 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -275,7 +275,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); + si->base_mem += F2FS_BLK_TO_BYTES(NM_I(sbi)->nat_bits_blocks); si->base_mem += NM_I(sbi)->nat_blocks * f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cbd7a5e96a37..1136539a57a8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -166,7 +166,8 @@ static unsigned long dir_block_index(unsigned int level, unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i, dir_level) * bucket_blocks(i); + bidx += mul_u32_u32(dir_buckets(i, dir_level), + bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } @@ -841,6 +842,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + pgoff_t index = page_folio(page)->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -866,8 +868,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); if (bit_pos == NR_DENTRY_IN_BLOCK && - !f2fs_truncate_hole(dir, page->index, page->index + 1)) { - f2fs_clear_page_cache_dirty_tag(page); + !f2fs_truncate_hole(dir, index, index + 1)) { + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); ClearPageUptodate(page); clear_page_private_all(page); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index fd1fc06359ee..62ac440d9416 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - if (fofs < et->largest.fofs + et->largest.len && + if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; @@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (type == EX_READ && et->largest.fofs <= pgofs && - et->largest.fofs + et->largest.len > pgofs) { + (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ac19c61f0c3e..33f5449dc22d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -11,7 +11,6 @@ #include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> -#include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/crc32.h> #include <linux/magic.h> @@ -134,6 +133,12 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +enum blkzone_allocation_policy { + BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */ + BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */ + BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */ +}; + /* * An implementation of an rwsem that is explicitly unfair to readers. This * prevents priority inversion when a low-priority reader acquires the read lock @@ -285,6 +290,7 @@ enum { APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -784,7 +790,6 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ @@ -802,6 +807,7 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ @@ -1155,6 +1161,7 @@ enum cp_reason_type { CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, CP_RECOVER_DIR, + CP_XATTR_DIR, }; enum iostat_type { @@ -1293,6 +1300,7 @@ struct f2fs_gc_control { bool no_bg_gc; /* check the space and stop bg_gc */ bool should_migrate_blocks; /* should migrate blocks */ bool err_gc_skipped; /* return EAGAIN if GC skipped */ + bool one_time; /* require one time GC in one migration unit */ unsigned int nr_free_secs; /* # of free sections to do GC */ }; @@ -1418,7 +1426,8 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION * bit 2 PAGE_PRIVATE_INLINE_INODE * bit 3 PAGE_PRIVATE_REF_RESOURCE - * bit 4- f2fs private data + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE + * bit 5- f2fs private data * * Layout B: lowest bit should be 0 * page.private is a wrapped pointer. @@ -1428,6 +1437,7 @@ enum { PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ PAGE_PRIVATE_MAX }; @@ -1559,6 +1569,8 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ unsigned int max_open_zones; /* max open zone resources of the zoned device */ + /* For adjust the priority writing position of data in zone UFS */ + unsigned int blkzone_alloc_policy; #endif /* for node-related operations */ @@ -1685,6 +1697,8 @@ struct f2fs_sb_info { unsigned int max_victim_search; /* migration granularity of garbage collection, unit: segment */ unsigned int migration_granularity; + /* migration window granularity of garbage collection, unit: segment */ + unsigned int migration_window_granularity; /* * for stat information. @@ -1994,6 +2008,16 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) return (struct f2fs_super_block *)(sbi->raw_super); } +static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, + pgoff_t index) +{ + pgoff_t idx_in_folio = index % (1 << folio_order(folio)); + + return (struct f2fs_super_block *) + (page_address(folio_page(folio, idx_in_folio)) + + F2FS_SUPER_OFFSET); +} + static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) { return (struct f2fs_checkpoint *)(sbi->ckpt); @@ -2396,14 +2420,17 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); static inline unsigned long get_page_private_data(struct page *page) { @@ -2435,6 +2462,7 @@ static inline void clear_page_private_all(struct page *page) clear_page_private_reference(page); clear_page_private_gcing(page); clear_page_private_inline(page); + clear_page_private_atomic(page); f2fs_bug_on(F2FS_P_SB(page), page_private(page)); } @@ -2854,13 +2882,26 @@ static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) return false; } +static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ); +} + static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + bool zoned_gc = (type == GC_TIME && + F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED)); + if (sbi->gc_mode == GC_URGENT_HIGH) return true; - if (is_inflight_io(sbi, type)) - return false; + if (zoned_gc) { + if (is_inflight_read_io(sbi)) + return false; + } else { + if (is_inflight_io(sbi, type)) + return false; + } if (sbi->gc_mode == GC_URGENT_MID) return true; @@ -2869,6 +2910,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) (type == DISCARD_TIME || type == GC_TIME)) return true; + if (zoned_gc) + return true; + return f2fs_time_over(sbi, type); } @@ -2900,26 +2944,27 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t data_blkaddr(struct inode *inode, - struct page *node_page, unsigned int offset) +static inline unsigned int get_dnode_base(struct inode *inode, + struct page *node_page) { - struct f2fs_node *raw_node; - __le32 *addr_array; - int base = 0; - bool is_inode = IS_INODE(node_page); + if (!IS_INODE(node_page)) + return 0; - raw_node = F2FS_NODE(node_page); + return inode ? get_extra_isize(inode) : + offset_in_addr(&F2FS_NODE(node_page)->i); +} - if (is_inode) { - if (!inode) - /* from GC path only */ - base = offset_in_addr(&raw_node->i); - else if (f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + return blkaddr_in_node(F2FS_NODE(node_page)) + + get_dnode_base(inode, node_page); +} - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[base + offset]); +static inline block_t data_blkaddr(struct inode *inode, + struct page *node_page, unsigned int offset) +{ + return le32_to_cpu(*(get_dnode_addr(inode, node_page) + offset)); } static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) @@ -3038,10 +3083,8 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: - case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -3163,8 +3206,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -3185,8 +3226,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3267,11 +3306,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); @@ -3292,8 +3326,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { __le32 *addr = get_dnode_addr(inode, page); @@ -3432,17 +3464,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page) -{ - int base = 0; - - if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - - return blkaddr_in_node(F2FS_NODE(node_page)) + base; -} - #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -3495,7 +3516,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly); + bool readonly, bool need_lock); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, @@ -3719,7 +3740,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type); void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); void f2fs_outplace_write_data(struct dnode_of_data *dn, @@ -3759,8 +3780,7 @@ void f2fs_destroy_segment_manager_caches(void); int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp); -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno); +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); @@ -3868,7 +3888,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, @@ -3877,7 +3897,7 @@ void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); -void f2fs_clear_page_cache_dirty_tag(struct page *page); +void f2fs_clear_page_cache_dirty_tag(struct folio *folio); int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); @@ -3901,7 +3921,7 @@ void f2fs_destroy_garbage_collection_cache(void); /* victim selection function for cleaning and SSR */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age); + unsigned long long age, bool one_time); /* * recovery.c @@ -3987,7 +4007,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_call_count(sbi, foreground) \ atomic_inc(&sbi->cp_call_count[(foreground)]) -#define stat_inc_cp_count(si) (F2FS_STAT(sbi)->cp_count++) +#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -4172,7 +4192,7 @@ int f2fs_read_inline_data(struct inode *inode, struct folio *folio); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); -int f2fs_write_inline_data(struct inode *inode, struct page *page); +int f2fs_write_inline_data(struct inode *inode, struct folio *folio); int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, @@ -4289,6 +4309,11 @@ static inline bool f2fs_meta_inode_gc_required(struct inode *inode) * compress.c */ #ifdef CONFIG_F2FS_FS_COMPRESSION +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; bool f2fs_is_compressed_page(struct page *page); struct page *f2fs_compress_control_page(struct page *page); int f2fs_prepare_compress_overwrite(struct inode *inode, @@ -4309,12 +4334,13 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio); int f2fs_write_multi_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len); @@ -4401,6 +4427,12 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } static inline void f2fs_update_read_extent_tree_range_compressed( struct inode *inode, pgoff_t fofs, block_t blkaddr, @@ -4653,9 +4685,11 @@ static inline void f2fs_io_schedule_timeout(long timeout) io_schedule_timeout(timeout); } -static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, - enum page_type type) +static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, + struct folio *folio, enum page_type type) { + pgoff_t ofs = folio->index; + if (unlikely(f2fs_cp_error(sbi))) return; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 903337f8d21a..9ae54c4c72fe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -8,7 +8,6 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/stat.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/falloc.h> @@ -54,7 +53,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -86,7 +85,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - int ret = f2fs_is_compressed_cluster(inode, page->index); + int ret = f2fs_is_compressed_cluster(inode, folio->index); if (ret < 0) { err = ret; @@ -106,11 +105,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page))) { - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || + !folio_test_uptodate(folio))) { + folio_unlock(folio); err = -EFAULT; goto out_sem; } @@ -118,9 +117,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - err = f2fs_get_block_locked(&dn, page->index); + err = f2fs_get_block_locked(&dn, folio->index); } else { - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); f2fs_put_dnode(&dn); if (f2fs_is_pinned_file(inode) && !__is_valid_data_blkaddr(dn.data_blkaddr)) @@ -128,11 +127,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } if (err) { - unlock_page(page); + folio_unlock(folio); goto out_sem; } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -140,18 +139,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto out_sem; /* page is wholly or partially inside EOF */ - if (((loff_t)(page->index + 1) << PAGE_SHIFT) > + if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { loff_t offset; offset = i_size_read(inode) & ~PAGE_MASK; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); } - set_page_dirty(page); + folio_mark_dirty(folio); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); @@ -163,7 +162,7 @@ out_sem: out: ret = vmf_fs_error(err); - trace_f2fs_vm_page_mkwrite(inode, page->index, vmf->vma->vm_flags, ret); + trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); return ret; } @@ -218,6 +217,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } @@ -373,8 +375,7 @@ sync_nodes: f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) || - (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi))) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); @@ -431,7 +432,7 @@ static bool __found_offset(struct address_space *mapping, static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); struct dnode_of_data dn; pgoff_t pgofs, end_offset; loff_t data_ofs = offset; @@ -513,10 +514,7 @@ fail: static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; - - if (f2fs_compressed_file(inode)) - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); switch (whence) { case SEEK_SET: @@ -1052,6 +1050,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -1888,6 +1893,12 @@ static long f2fs_fallocate(struct file *file, int mode, if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; @@ -2116,10 +2127,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2149,6 +2162,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[READ]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2158,27 +2172,18 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); + if (ret) + goto out_unlock; set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); @@ -2187,11 +2192,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; } f2fs_write_inode(inode, NULL); @@ -2210,7 +2217,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) } f2fs_i_size_write(fi->cow_inode, isize); +out_unlock: + f2fs_up_write(&fi->i_gc_rwsem[READ]); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (ret) + goto out; f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; @@ -2228,6 +2239,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2260,6 +2274,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2279,7 +2296,7 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) } int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly) + bool readonly, bool need_lock) { struct super_block *sb = sbi->sb; int ret = 0; @@ -2326,12 +2343,19 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, if (readonly) goto out; + /* grab sb->s_umount to avoid racing w/ remount() */ + if (need_lock) + down_read(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_read(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: @@ -2368,7 +2392,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } } - ret = f2fs_do_shutdown(sbi, in, readonly); + ret = f2fs_do_shutdown(sbi, in, readonly, true); if (need_drop) mnt_drop_write_file(filp); @@ -2686,7 +2710,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, (range->start + range->len) >> PAGE_SHIFT, DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2710,7 +2735,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } @@ -2793,6 +2818,8 @@ do_map: goto clear_out; } + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -2917,6 +2944,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -2968,9 +3000,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, - pos_out >> F2FS_BLKSIZE_BITS, - len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), + F2FS_BYTES_TO_BLK(pos_out), + F2FS_BYTES_TO_BLK(len), false); if (!ret) { if (dst_max_i_size) @@ -3300,6 +3332,11 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); @@ -4193,6 +4230,8 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) /* It will never fail, when page has pinned above */ f2fs_bug_on(F2FS_I_SB(inode), !page); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -4207,9 +4246,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4244,10 +4282,15 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4257,9 +4300,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4286,9 +4326,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4322,10 +4362,15 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; + + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4335,9 +4380,6 @@ static int f2fs_ioc_compress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4538,6 +4580,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_down_read(&fi->i_gc_rwsem[READ]); } + /* dio is not compatible w/ atomic file */ + if (f2fs_is_atomic_file(inode)) { + f2fs_up_read(&fi->i_gc_rwsem[READ]); + ret = -EOPNOTSUPP; + goto out; + } + /* * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of * the higher-level function iomap_dio_rw() in order to ensure that the @@ -4597,6 +4646,10 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode))) + inode_dio_wait(inode); + if (f2fs_should_use_dio(inode, iocb, to)) { ret = f2fs_dio_read_iter(iocb, to); } else { @@ -4949,6 +5002,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Determine whether we will do a direct write or a buffered write. */ dio = f2fs_should_use_dio(inode, iocb, from); + /* dio is not compatible w/ atomic write */ + if (dio && f2fs_is_atomic_file(inode)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); preallocated = f2fs_preallocate_blocks(iocb, from, dio); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 724bbcb447d3..9322a7200e31 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -81,6 +81,8 @@ static int gc_thread_func(void *data) continue; } + gc_control.one_time = false; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -116,15 +118,30 @@ static int gc_thread_func(void *data) goto next; } - if (has_enough_invalid_blocks(sbi)) + if (f2fs_sb_has_blkzoned(sbi)) { + if (has_enough_free_blocks(sbi, + gc_th->no_zoned_gc_percent)) { + wait_ms = gc_th->no_gc_sleep_time; + f2fs_up_write(&sbi->gc_lock); + goto next; + } + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->max_sleep_time; + } + + if (need_to_boost_gc(sbi)) { decrease_sleep_time(gc_th, &wait_ms); - else + if (f2fs_sb_has_blkzoned(sbi)) + gc_control.one_time = true; + } else { increase_sleep_time(gc_th, &wait_ms); + } do_gc: stat_inc_gc_call_count(sbi, foreground ? FOREGROUND : BACKGROUND); - sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || + gc_control.one_time; /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground) @@ -179,9 +196,21 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; - gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; - gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + + if (f2fs_sb_has_blkzoned(sbi)) { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED; + gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC; + gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC; + } else { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->no_zoned_gc_percent = 0; + gc_th->boost_zoned_gc_percent = 0; + } gc_th->gc_wake = false; @@ -339,7 +368,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) unsigned char age = 0; unsigned char u; unsigned int i; - unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); for (i = 0; i < usable_segs_per_sec; i++) mtime += get_seg_entry(sbi, start + i)->mtime; @@ -368,6 +397,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / + 100)) + return UINT_MAX; + /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) return get_valid_blocks(sbi, segno, true); @@ -742,7 +776,7 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type, */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age) + unsigned long long age, bool one_time) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); @@ -759,6 +793,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; + p.one_time_gc = one_time; retry: select_policy(sbi, gc_type, type, &p); @@ -1670,13 +1705,14 @@ next_step: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type) + int gc_type, bool one_time) { struct sit_info *sit_i = SIT_I(sbi); int ret; down_write(&sit_i->sentry_lock); - ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0); + ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, + LFS, 0, one_time); up_write(&sit_i->sentry_lock); return ret; } @@ -1684,30 +1720,49 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int start_segno, struct gc_inode_list *gc_list, int gc_type, - bool force_migrate) + bool force_migrate, bool one_time) { struct page *sum_page; struct f2fs_summary_block *sum; struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); + unsigned int sec_end_segno; int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; int submitted = 0; - if (__is_large_section(sbi)) - end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); + if (__is_large_section(sbi)) { + sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); - /* - * zone-capacity can be less than zone-size in zoned devices, - * resulting in less than expected usable segments in the zone, - * calculate the end segno in the zone which can be garbage collected - */ - if (f2fs_sb_has_blkzoned(sbi)) - end_segno -= SEGS_PER_SEC(sbi) - - f2fs_usable_segs_in_sec(sbi, segno); + /* + * zone-capacity can be less than zone-size in zoned devices, + * resulting in less than expected usable segments in the zone, + * calculate the end segno in the zone which can be garbage + * collected + */ + if (f2fs_sb_has_blkzoned(sbi)) + sec_end_segno -= SEGS_PER_SEC(sbi) - + f2fs_usable_segs_in_sec(sbi); + + if (gc_type == BG_GC || one_time) { + unsigned int window_granularity = + sbi->migration_window_granularity; + + if (f2fs_sb_has_blkzoned(sbi) && + !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent)) + window_granularity *= + BOOST_GC_MULTIPLE; + + end_segno = start_segno + window_granularity; + } + + if (end_segno > sec_end_segno) + end_segno = sec_end_segno; + } sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); @@ -1786,7 +1841,8 @@ freed: if (__is_large_section(sbi)) sbi->next_victim_seg[gc_type] = - (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; + (segno + 1 < sec_end_segno) ? + segno + 1 : NULL_SEGNO; skip: f2fs_put_page(sum_page, 0); } @@ -1863,7 +1919,7 @@ gc_more: goto stop; } retry: - ret = __get_victim(sbi, &segno, gc_type); + ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ if (ret == -ENODATA && gc_type == FG_GC && @@ -1875,17 +1931,21 @@ retry: } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, - gc_control->should_migrate_blocks); + gc_control->should_migrate_blocks, + gc_control->one_time); if (seg_freed < 0) goto stop; total_freed += seg_freed; - if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) { + if (seg_freed == f2fs_usable_segs_in_sec(sbi)) { sec_freed++; total_sec_freed++; } + if (gc_control->one_time) + goto stop; + if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -2010,8 +2070,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - do_garbage_collect(sbi, segno, &gc_list, FG_GC, - dry_run_sections == 0); + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); if (!dry_run && get_valid_blocks(sbi, segno, true)) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index a8ea3301b815..2914b678bf8f 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -15,16 +15,27 @@ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +/* GC sleep parameters for zoned deivces */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED 10 +#define DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED 20 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED 60000 + /* choose candidates from sections which has age of more than 7 days */ #define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7) #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ #define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */ #define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */ +#define DEF_GC_THREAD_VALID_THRESH_RATIO 95 /* do not GC over 95% valid block ratio for one time GC */ #define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ +#define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ +#define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 +#define BOOST_GC_MULTIPLE 5 + #define DEF_GC_FAILED_PINNED_FILES 2048 #define MAX_GC_FAILED_PINNED_FILES USHRT_MAX @@ -51,6 +62,11 @@ struct f2fs_gc_kthread { * caller of f2fs_balance_fs() * will wait on this wait queue. */ + + /* for gc control for zoned devices */ + unsigned int no_zoned_gc_percent; + unsigned int boost_zoned_gc_percent; + unsigned int valid_thresh_ratio; }; struct gc_inode_list { @@ -152,6 +168,12 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, *wait -= min_time; } +static inline bool has_enough_free_blocks(struct f2fs_sb_info *sbi, + unsigned int limit_perc) +{ + return free_sections(sbi) > ((sbi->total_sections * limit_perc) / 100); +} + static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { block_t user_block_count = sbi->user_block_count; @@ -167,3 +189,10 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) free_user_blocks(sbi) < limit_free_user_blocks(invalid_user_blocks)); } + +static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) +{ + if (f2fs_sb_has_blkzoned(sbi)) + return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return has_enough_invalid_blocks(sbi); +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cca7d448e55c..005babf1bed1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -260,35 +260,34 @@ out: return err; } -int f2fs_write_inline_data(struct inode *inode, struct page *page) +int f2fs_write_inline_data(struct inode *inode, struct folio *folio) { - struct dnode_of_data dn; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *ipage; - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); - if (err) - return err; + ipage = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); if (!f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); + f2fs_put_page(ipage, 1); return -EAGAIN; } - f2fs_bug_on(F2FS_I_SB(inode), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); - memcpy_from_page(inline_data_addr(inode, dn.inode_page), - page, 0, MAX_INLINE_DATA(inode)); - set_page_dirty(dn.inode_page); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); + memcpy_from_folio(inline_data_addr(inode, ipage), + folio, 0, MAX_INLINE_DATA(inode)); + set_page_dirty(ipage); - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(folio); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(dn.inode_page); - f2fs_put_dnode(&dn); + clear_page_private_inline(ipage); + f2fs_put_page(ipage, 1); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aef57172014f..1ed86df343a5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/sched/mm.h> #include <linux/lz4.h> @@ -35,6 +34,11 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (f2fs_inode_dirtied(inode, sync)) return; + if (f2fs_is_atomic_file(inode)) { + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return; + } + mark_inode_dirty_sync(inode); } @@ -175,7 +179,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + page_folio(page)->index, ino_of_node(page), + provided, calculated); return provided == calculated; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 38b4750475db..57d46e1439de 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -457,62 +457,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - if (!S_ISDIR(dir->i_mode)) { - f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)", - dir->i_ino, dir->i_mode, pino); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return -ENOTDIR; - } - - err = f2fs_dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot_name, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot_name, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -522,7 +466,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -558,17 +501,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; - } - - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b72ef96f7e33..59b13ff243fa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -20,7 +20,7 @@ #include "iostat.h" #include <trace/events/f2fs.h> -#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -123,7 +123,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { if (PageDirty(page)) { - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); } @@ -919,7 +919,7 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); - index = dn->node_page->index; + index = page_folio(dn->node_page)->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), @@ -1369,6 +1369,7 @@ fail: */ static int read_node_page(struct page *page, blk_opf_t op_flags) { + struct folio *folio = page_folio(page); struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { @@ -1381,21 +1382,21 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) }; int err; - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { if (!f2fs_inode_chksum_verify(sbi, page)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -EFSBADCRC; } return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni, false); + err = f2fs_get_node_info(sbi, folio->index, &ni, false); if (err) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -ENOENT; } @@ -1492,7 +1493,7 @@ out_err: out_put_err: /* ENOENT comes from read_node_page which is not an error. */ if (err != -ENOENT) - f2fs_handle_page_eio(sbi, page->index, NODE); + f2fs_handle_page_eio(sbi, page_folio(page), NODE); f2fs_put_page(page, 1); return ERR_PTR(err); } @@ -1535,7 +1536,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!clear_page_dirty_for_io(page)) goto page_out; - ret = f2fs_write_inline_data(inode, page); + ret = f2fs_write_inline_data(inode, page_folio(page)); inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); if (ret) @@ -1608,6 +1609,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { @@ -1624,15 +1626,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, }; unsigned int seq; - trace_f2fs_writepage(page_folio(page), NODE); + trace_f2fs_writepage(folio, NODE); if (unlikely(f2fs_cp_error(sbi))) { /* keep node pages in remount-ro mode */ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) goto redirty_out; - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1646,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* get old block addr of this node page */ nid = nid_of_node(page); - f2fs_bug_on(sbi, page->index != nid); + f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; @@ -1660,10 +1662,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1674,7 +1676,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; } - if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi)) + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; /* should add to global list before clearing PAGECACHE status */ @@ -1684,7 +1686,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, *seq_id = seq; } - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); @@ -1697,7 +1699,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, NODE); @@ -1711,7 +1713,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } @@ -1867,7 +1869,7 @@ continue_unlock: } if (!ret && atomic && !marked) { f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + ino, page_folio(last_page)->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); @@ -3166,7 +3168,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kvzalloc(sbi, - nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); + F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3185,7 +3187,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (IS_ERR(page)) return PTR_ERR(page); - memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), + memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), page_address(page), F2FS_BLKSIZE); f2fs_put_page(page, 1); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 78c3198a6308..1766254279d2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -199,6 +199,10 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; @@ -366,6 +370,10 @@ out: } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); @@ -1282,6 +1290,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, wait_list, issued); return 0; } + + /* + * Issue discard for conventional zones only if the device + * supports discard. + */ + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; } #endif @@ -2686,22 +2701,47 @@ static int get_new_segment(struct f2fs_sb_info *sbi, goto got_it; } +#ifdef CONFIG_BLK_DEV_ZONED /* * If we format f2fs on zoned storage, let's try to get pinned sections * from beginning of the storage, which should be a conventional one. */ if (f2fs_sb_has_blkzoned(sbi)) { - segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); + /* Prioritize writing to conventional zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) + segno = 0; + else + segno = max(first_zoned_segno(sbi), *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } +#endif find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + +#ifdef CONFIG_BLK_DEV_ZONED + if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { + /* Write only to sequential zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { + hint = GET_SEC_FROM_SEG(sbi, first_zoned_segno(sbi)); + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + } else + secno = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + f2fs_bug_on(sbi, 1); + goto out_unlock; + } + } +#endif + if (secno >= MAIN_SECS(sbi)) { secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; + f2fs_bug_on(sbi, 1); goto out_unlock; } } @@ -2743,10 +2783,8 @@ got_it: out_unlock: spin_unlock(&free_i->segmap_lock); - if (ret == -ENOSPC) { + if (ret == -ENOSPC) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); - f2fs_bug_on(sbi, 1); - } return ret; } @@ -3052,7 +3090,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ - if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3079,7 +3118,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; - if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, i, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3522,7 +3562,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, fio->page->index); + type = __get_age_segment_type(inode, + page_folio(fio->page)->index); if (type != NO_CHECK_TYPE) return type; @@ -3781,7 +3822,7 @@ out: f2fs_up_read(&fio->sbi->io_order_lock); } -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -3790,20 +3831,20 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, - .old_blkaddr = page->index, - .new_blkaddr = page->index, - .page = page, + .old_blkaddr = folio->index, + .new_blkaddr = folio->index, + .page = folio_page(folio, 0), .encrypted_page = NULL, .in_list = 0, }; - if (unlikely(page->index >= MAIN_BLKADDR(sbi))) + if (unlikely(folio->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_submit_page_write(&fio); - stat_inc_meta_count(sbi, page->index); + stat_inc_meta_count(sbi, folio->index); f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } @@ -5381,8 +5422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, return BLKS_PER_SEG(sbi); } -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno) +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index bfc01a521cb9..71adb4a43bec 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -188,6 +188,7 @@ struct victim_sel_policy { unsigned int min_segno; /* segment # having min. cost */ unsigned long long age; /* mtime of GCed section*/ unsigned long long age_threshold;/* age threshold */ + bool one_time_gc; /* one time GC */ }; struct seg_entry { @@ -430,7 +431,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -464,7 +465,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 176b5177c89d..87ab5696bd48 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -11,7 +11,6 @@ #include <linux/fs_context.h> #include <linux/sched/mm.h> #include <linux/statfs.h> -#include <linux/buffer_head.h> #include <linux/kthread.h> #include <linux/parser.h> #include <linux/mount.h> @@ -707,6 +706,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (!strcmp(name, "on")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (!strcmp(name, "off")) { + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_warn(sbi, "zoned devices need bggc"); + kfree(name); + return -EINVAL; + } F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; @@ -2561,7 +2565,7 @@ restore_opts: static void f2fs_shutdown(struct super_block *sb) { - f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false); + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false); } #ifdef CONFIG_QUOTA @@ -3318,29 +3322,47 @@ loff_t max_file_blocks(struct inode *inode) * fit within U32_MAX + 1 data units. */ - result = min(result, (((loff_t)U32_MAX + 1) * 4096) >> F2FS_BLKSIZE_BITS); + result = min(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); return result; } -static int __f2fs_commit_super(struct buffer_head *bh, - struct f2fs_super_block *super) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, + pgoff_t index, bool update) { - lock_buffer(bh); - if (super) - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_dirty(bh); - unlock_buffer(bh); - + struct bio *bio; /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA; + int ret; + + folio_lock(folio); + folio_wait_writeback(folio); + if (update) + memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi), + sizeof(struct f2fs_super_block)); + folio_mark_dirty(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); + + bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS); + + /* it doesn't need to set crypto context for superblock update */ + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio)); + + if (!bio_add_folio(bio, folio, folio_size(folio), 0)) + f2fs_bug_on(sbi, 1); + + ret = submit_bio_wait(bio); + folio_end_writeback(folio); + + return ret; } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); struct super_block *sb = sbi->sb; u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); @@ -3356,9 +3378,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, u32 segment_count = le32_to_cpu(raw_super->segment_count); u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); u64 main_end_blkaddr = main_blkaddr + - (segment_count_main << log_blocks_per_seg); + ((u64)segment_count_main << log_blocks_per_seg); u64 seg_end_blkaddr = segment0_blkaddr + - (segment_count << log_blocks_per_seg); + ((u64)segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", @@ -3415,7 +3437,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, set_sbi_flag(sbi, SBI_NEED_SB_WRITE); res = "internally"; } else { - err = __f2fs_commit_super(bh, NULL); + err = __f2fs_commit_super(sbi, folio, index, false); res = err ? "failed" : "done"; } f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)", @@ -3428,12 +3450,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, } static int sanity_check_raw_super(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main; block_t total_sections, blocks_per_seg; - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); size_t crc_offset = 0; __u32 crc = 0; @@ -3591,7 +3612,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ - if (sanity_check_area_boundary(sbi, bh)) + if (sanity_check_area_boundary(sbi, folio, index)) return -EFSCORRUPTED; return 0; @@ -3786,6 +3807,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->migration_granularity = SEGS_PER_SEC(sbi); + sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ? + DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi); sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; @@ -3938,7 +3961,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, { struct super_block *sb = sbi->sb; int block; - struct buffer_head *bh; + struct folio *folio; struct f2fs_super_block *super; int err = 0; @@ -3947,32 +3970,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, return -ENOMEM; for (block = 0; block < 2; block++) { - bh = sb_bread(sb, block); - if (!bh) { + folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL); + if (IS_ERR(folio)) { f2fs_err(sbi, "Unable to read %dth superblock", block + 1); - err = -EIO; + err = PTR_ERR(folio); *recovery = 1; continue; } /* sanity checking of raw super */ - err = sanity_check_raw_super(sbi, bh); + err = sanity_check_raw_super(sbi, folio, block); if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - brelse(bh); + folio_put(folio); *recovery = 1; continue; } if (!*raw_super) { - memcpy(super, bh->b_data + F2FS_SUPER_OFFSET, + memcpy(super, F2FS_SUPER_BLOCK(folio, block), sizeof(*super)); *valid_super_block = block; *raw_super = super; } - brelse(bh); + folio_put(folio); } /* No valid superblock */ @@ -3986,7 +4009,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { - struct buffer_head *bh; + struct folio *folio; + pgoff_t index; __u32 crc = 0; int err; @@ -4004,22 +4028,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block ? 0 : 1; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; /* write current valid superblock */ - bh = sb_bread(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); return err; } @@ -4173,12 +4199,14 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, } f2fs_warn(sbi, "Remounting filesystem read-only"); + /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * We have already set CP_ERROR_FLAG flag to stop all updates + * to filesystem, so it doesn't need to set SB_RDONLY flag here + * because the flag should be set covered w/ sb->s_umount semaphore + * via remount procedure, otherwise, it will confuse code like + * freeze_super() which will lead to deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void f2fs_record_error_work(struct work_struct *work) @@ -4219,6 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->aligned_blksize = true; #ifdef CONFIG_BLK_DEV_ZONED sbi->max_open_zones = UINT_MAX; + sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ; #endif for (i = 0; i < max_devices; i++) { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index fee7ee45ceaa..c56e8c873935 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -170,6 +170,12 @@ static ssize_t undiscard_blks_show(struct f2fs_attr *a, SM_I(sbi)->dcc_info->undiscard_blks); } +static ssize_t atgc_enabled_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", sbi->am.atgc_enabled ? 1 : 0); +} + static ssize_t gc_mode_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -182,50 +188,50 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (f2fs_sb_has_encrypt(sbi)) - len += scnprintf(buf, PAGE_SIZE - len, "%s", + len += sysfs_emit_at(buf, len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_readonly(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "readonly"); if (f2fs_sb_has_compression(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "compression"); - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "pin_file"); - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } @@ -323,17 +329,14 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += scnprintf(buf + len, PAGE_SIZE - len, - "cold file extension:\n"); + len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); - len += scnprintf(buf + len, PAGE_SIZE - len, - "hot file extension:\n"); + len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + return len; } @@ -561,6 +564,11 @@ out: return -EINVAL; } + if (!strcmp(a->attr.name, "migration_window_granularity")) { + if (t == 0 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + } + if (!strcmp(a->attr.name, "gc_urgent")) { if (t == 0) { sbi->gc_mode = GC_NORMAL; @@ -627,6 +635,15 @@ out: } #endif +#ifdef CONFIG_BLK_DEV_ZONED + if (!strcmp(a->attr.name, "blkzone_alloc_policy")) { + if (t < BLKZONE_ALLOC_PRIOR_SEQ || t > BLKZONE_ALLOC_PRIOR_CONV) + return -EINVAL; + sbi->blkzone_alloc_policy = t; + return count; + } +#endif + #ifdef CONFIG_F2FS_FS_COMPRESSION if (!strcmp(a->attr.name, "compr_written_block") || !strcmp(a->attr.name, "compr_saved_block")) { @@ -775,7 +792,8 @@ out: if (!strcmp(a->attr.name, "ipu_policy")) { if (t >= BIT(F2FS_IPU_MAX)) return -EINVAL; - if (t && f2fs_lfs_mode(sbi)) + /* allow F2FS_IPU_NOCACHE only for IPU in the pinned file */ + if (f2fs_lfs_mode(sbi) && (t & ~BIT(F2FS_IPU_NOCACHE))) return -EINVAL; SM_I(sbi)->ipu_policy = (unsigned int)t; return count; @@ -960,6 +978,9 @@ GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time); GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time); GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -969,6 +990,7 @@ SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks); SM_INFO_GENERAL_RW_ATTR(min_seq_blocks); SM_INFO_GENERAL_RW_ATTR(min_hot_blocks); SM_INFO_GENERAL_RW_ATTR(min_ssr_sections); +SM_INFO_GENERAL_RW_ATTR(reserved_segments); /* DCC_INFO ATTR */ DCC_INFO_RW_ATTR(max_small_discards, max_discards); @@ -1001,6 +1023,7 @@ F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold); F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs); F2FS_SBI_GENERAL_RW_ATTR(max_victim_search); F2FS_SBI_GENERAL_RW_ATTR(migration_granularity); +F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity); F2FS_SBI_GENERAL_RW_ATTR(dir_level); #ifdef CONFIG_F2FS_IOSTAT F2FS_SBI_GENERAL_RW_ATTR(iostat_enable); @@ -1033,6 +1056,7 @@ F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif /* STAT_INFO ATTR */ @@ -1072,6 +1096,7 @@ F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(atgc_enabled); F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_GENERAL_RO_ATTR(moved_blocks_background); @@ -1116,6 +1141,9 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_no_zoned_gc_percent), + ATTR_LIST(gc_boost_zoned_gc_percent), + ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), @@ -1138,8 +1166,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), + ATTR_LIST(reserved_segments), ATTR_LIST(max_victim_search), ATTR_LIST(migration_granularity), + ATTR_LIST(migration_window_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), @@ -1187,6 +1217,7 @@ static struct attribute *f2fs_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), + ATTR_LIST(blkzone_alloc_policy), #endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), @@ -1200,6 +1231,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(atgc_candidate_count), ATTR_LIST(atgc_age_weight), ATTR_LIST(atgc_age_threshold), + ATTR_LIST(atgc_enabled), ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 84a33fe49bed..2287f238ae09 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -74,7 +74,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; - if (pos + count > inode->i_sb->s_maxbytes) + if (pos + count > F2FS_BLK_TO_BYTES(max_file_blocks(inode))) return -EFBIG; while (count) { @@ -237,7 +237,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, pos = le64_to_cpu(dloc.pos); /* Get the descriptor */ - if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + if (pos + size < pos || + pos + size > F2FS_BLK_TO_BYTES(max_file_blocks(inode)) || pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); f2fs_handle_error(F2FS_I_SB(inode), diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index f290fe9327c4..3f3874943679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -629,6 +629,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; int found, newsize; @@ -772,9 +773,18 @@ retry: if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - if (S_ISDIR(inode->i_mode)) - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); + if (!S_ISDIR(inode->i_mode)) + goto same; + /* + * In restrict mode, fsync() always try to trigger checkpoint for all + * metadata consistency, in other mode, it triggers checkpoint when + * parent's xattr metadata was updated. + */ + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + set_sbi_flag(sbi, SBI_NEED_CP); + else + f2fs_add_ino_entry(sbi, inode->i_ino, XATTR_DIR_INO); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 01bee2b289c2..b0b821edfd97 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,7 +19,6 @@ #define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ -#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ @@ -28,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) #define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) +#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1)) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 @@ -278,7 +278,7 @@ struct node_footer { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ #define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ed794b5fefbe..2851c823095b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -139,7 +139,8 @@ TRACE_DEFINE_ENUM(EX_BLOCK_AGE); { CP_NODE_NEED_CP, "node needs cp" }, \ { CP_FASTBOOT_MODE, "fastboot mode" }, \ { CP_SPEC_LOG_NUM, "log type is 2" }, \ - { CP_RECOVER_DIR, "dir needs recovery" }) + { CP_RECOVER_DIR, "dir needs recovery" }, \ + { CP_XATTR_DIR, "dir's xattr updated" }) #define show_shutdown_mode(type) \ __print_symbolic(type, \ |