diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-07-29 13:38:38 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:24 -0400 |
commit | d1542e0362de069f677dfb0e9336438afb8fae74 (patch) | |
tree | 068c1e5f042cf79ee5bdb5d2901940912a4121a0 | |
parent | 7f5e31e1a4c8821b346c9b2cc108ffbdd87778a7 (diff) | |
download | lwn-d1542e0362de069f677dfb0e9336438afb8fae74.tar.gz lwn-d1542e0362de069f677dfb0e9336438afb8fae74.zip |
bcachefs: Change buffered write path to write to partial pages
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/fs-io.c | 262 |
1 files changed, 152 insertions, 110 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 51b18ec1b1f8..4efe985da96b 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -519,7 +519,6 @@ struct bch_page_sector { /* i_sectors: */ enum { SECTOR_UNALLOCATED, - SECTOR_QUOTA_RESERVED, SECTOR_DIRTY, SECTOR_ALLOCATED, } state:2; @@ -597,31 +596,6 @@ static struct bch_page_state *bch2_page_state_create(struct page *page, return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp); } -static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, - struct page *page) -{ - struct bch_page_state *s = bch2_page_state(page); - struct disk_reservation disk_res = { 0 }; - struct quota_res quota_res = { 0 }; - unsigned i; - - if (!s) - return; - - for (i = 0; i < ARRAY_SIZE(s->s); i++) { - disk_res.sectors += s->s[i].replicas_reserved; - s->s[i].replicas_reserved = 0; - - if (s->s[i].state == SECTOR_QUOTA_RESERVED) { - quota_res.sectors++; - s->s[i].state = SECTOR_UNALLOCATED; - } - } - - bch2_quota_reservation_put(c, inode, "a_res); - bch2_disk_reservation_put(c, &disk_res); -} - static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) { /* XXX: this should not be open coded */ @@ -672,100 +646,134 @@ static int bch2_get_page_disk_reservation(struct bch_fs *c, return 0; } -static int bch2_get_page_quota_reservation(struct bch_fs *c, +struct bch2_page_reservation { + struct disk_reservation disk; + struct quota_res quota; +}; + +static void bch2_page_reservation_init(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch2_page_reservation *res) +{ + memset(res, 0, sizeof(*res)); + + res->disk.nr_replicas = inode_nr_replicas(c, inode); +} + +static void bch2_page_reservation_put(struct bch_fs *c, struct bch_inode_info *inode, - struct page *page, bool check_enospc) + struct bch2_page_reservation *res) +{ + bch2_disk_reservation_put(c, &res->disk); + bch2_quota_reservation_put(c, inode, &res->quota); +} + +static int bch2_page_reservation_get(struct bch_fs *c, + struct bch_inode_info *inode, struct page *page, + struct bch2_page_reservation *res, + unsigned offset, unsigned len, bool check_enospc) { struct bch_page_state *s = bch2_page_state_create(page, 0); - struct quota_res quota_res = { 0 }; - unsigned i, quota_res_sectors = 0; + unsigned i, disk_sectors = 0, quota_sectors = 0; int ret; if (!s) return -ENOMEM; - for (i = 0; i < ARRAY_SIZE(s->s); i++) - quota_res_sectors += s->s[i].state == SECTOR_UNALLOCATED; - - if (!quota_res_sectors) - return 0; + for (i = offset / 512; + i < DIV_ROUND_UP(offset + len, 512); + i++) { + disk_sectors += sectors_to_reserve(&s->s[i], + res->disk.nr_replicas); + quota_sectors += s->s[i].state == SECTOR_UNALLOCATED; + } - ret = bch2_quota_reservation_add(c, inode, "a_res, - quota_res_sectors, - check_enospc); - if (unlikely(ret)) - return ret; + if (disk_sectors) { + ret = bch2_disk_reservation_add(c, &res->disk, + disk_sectors, + !check_enospc + ? BCH_DISK_RESERVATION_NOFAIL + : 0); + if (unlikely(ret)) + return ret; + } - for (i = 0; i < ARRAY_SIZE(s->s); i++) - if (s->s[i].state == SECTOR_UNALLOCATED) - s->s[i].state = SECTOR_QUOTA_RESERVED; + if (quota_sectors) { + ret = bch2_quota_reservation_add(c, inode, &res->quota, + quota_sectors, + check_enospc); + if (unlikely(ret)) { + struct disk_reservation tmp = { + .sectors = disk_sectors + }; + + bch2_disk_reservation_put(c, &tmp); + res->disk.sectors -= disk_sectors; + return ret; + } + } return 0; } -static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, - struct page *page, bool check_enospc) -{ - return bch2_get_page_disk_reservation(c, inode, page, check_enospc) ?: - bch2_get_page_quota_reservation(c, inode, page, check_enospc); -} - static void bch2_clear_page_bits(struct page *page) { struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_page_state *s = bch2_page_state(page); + struct disk_reservation disk_res = { 0 }; int i, dirty_sectors = 0; if (!s) return; for (i = 0; i < ARRAY_SIZE(s->s); i++) { + disk_res.sectors += s->s[i].replicas_reserved; + s->s[i].replicas_reserved = 0; + if (s->s[i].state == SECTOR_DIRTY) { dirty_sectors++; s->s[i].state = SECTOR_UNALLOCATED; } } + bch2_disk_reservation_put(c, &disk_res); + if (dirty_sectors) i_sectors_acct(c, inode, NULL, -dirty_sectors); - bch2_put_page_reservation(c, inode, page); bch2_page_state_release(page); } -static void __bch2_set_page_dirty(struct address_space *mapping, struct folio *folio) +static void bch2_set_page_dirty(struct bch_fs *c, + struct bch_inode_info *inode, struct page *page, + struct bch2_page_reservation *res, + unsigned offset, unsigned len) { - struct bch_inode_info *inode = to_bch_ei(mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_page_state *s = bch2_page_state(&folio->page); - struct quota_res quota_res = { 0 }; + struct bch_page_state *s = bch2_page_state(page); unsigned i, dirty_sectors = 0; - BUG_ON(!s); + for (i = offset / 512; + i < DIV_ROUND_UP(offset + len, 512); + i++) { + unsigned sectors = sectors_to_reserve(&s->s[i], + res->disk.nr_replicas); - for (i = 0; i < ARRAY_SIZE(s->s); i++) { - if (s->s[i].state == SECTOR_QUOTA_RESERVED) - quota_res.sectors++; + BUG_ON(sectors > res->disk.sectors); + s->s[i].replicas_reserved += sectors; + res->disk.sectors -= sectors; - if (s->s[i].state == SECTOR_UNALLOCATED || - s->s[i].state == SECTOR_QUOTA_RESERVED) { + if (s->s[i].state == SECTOR_UNALLOCATED) { s->s[i].state = SECTOR_DIRTY; dirty_sectors++; } } if (dirty_sectors) - i_sectors_acct(c, inode, "a_res, dirty_sectors); - bch2_quota_reservation_put(c, inode, "a_res); -} - -static void bch2_set_page_dirty(struct address_space *mapping, struct page *page) -{ - struct folio *folio = page_folio(page); + i_sectors_acct(c, inode, &res->quota, dirty_sectors); - __bch2_set_page_dirty(mapping, folio); - filemap_dirty_folio(mapping, folio); + if (!PageDirty(page)) + filemap_dirty_folio(inode->v.i_mapping, page_folio(page)); } vm_fault_t bch2_page_fault(struct vm_fault *vmf) @@ -788,8 +796,11 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) struct bch_inode_info *inode = file_bch_inode(file); struct address_space *mapping = file->f_mapping; struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_page_reservation res; int ret = VM_FAULT_LOCKED; + bch2_page_reservation_init(c, inode, &res); + sb_start_pagefault(inode->v.i_sb); file_update_time(file); @@ -809,18 +820,21 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) goto out; } - if (bch2_get_page_reservation(c, inode, page, true)) { + if (bch2_page_reservation_get(c, inode, page, &res, + 0, PAGE_SIZE, true)) { unlock_page(page); ret = VM_FAULT_SIGBUS; goto out; } - if (!PageDirty(page)) - bch2_set_page_dirty(mapping, page); + bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE); wait_for_stable_page(page); out: bch2_pagecache_add_put(&inode->ei_pagecache_lock); sb_end_pagefault(inode->v.i_sb); + + bch2_page_reservation_put(c, inode, &res); + return ret; } @@ -1450,12 +1464,18 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, { struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_page_reservation *res; pgoff_t index = pos >> PAGE_SHIFT; unsigned offset = pos & (PAGE_SIZE - 1); struct page *page; int ret = -ENOMEM; - BUG_ON(inode_unhashed(&inode->v)); + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + bch2_page_reservation_init(c, inode, res); + *fsdata = res; bch2_pagecache_add_get(&inode->ei_pagecache_lock); @@ -1486,7 +1506,8 @@ readpage: if (ret) goto err; out: - ret = bch2_get_page_reservation(c, inode, page, true); + ret = bch2_page_reservation_get(c, inode, page, res, + offset, len, true); if (ret) { if (!PageUptodate(page)) { /* @@ -1509,6 +1530,8 @@ err: *pagep = NULL; err_unlock: bch2_pagecache_add_put(&inode->ei_pagecache_lock); + kfree(res); + *fsdata = NULL; return ret; } @@ -1518,6 +1541,8 @@ int bch2_write_end(struct file *file, struct address_space *mapping, { struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_page_reservation *res = fsdata; + unsigned offset = pos & (PAGE_SIZE - 1); lockdep_assert_held(&inode->v.i_rwsem); @@ -1540,18 +1565,19 @@ int bch2_write_end(struct file *file, struct address_space *mapping, if (copied) { if (!PageUptodate(page)) SetPageUptodate(page); - if (!PageDirty(page)) - bch2_set_page_dirty(mapping, page); + + bch2_set_page_dirty(c, inode, page, res, offset, copied); inode->ei_last_dirtied = (unsigned long) current; - } else { - bch2_put_page_reservation(c, inode, page); } unlock_page(page); put_page(page); bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_page_reservation_put(c, inode, res); + kfree(res); + return copied; } @@ -1564,15 +1590,19 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct page *pages[WRITE_BATCH_PAGES]; + struct bch2_page_reservation res; unsigned long index = pos >> PAGE_SHIFT; unsigned offset = pos & (PAGE_SIZE - 1); unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); - unsigned i, copied = 0, nr_pages_copied = 0; + unsigned i, reserved = 0, set_dirty = 0; + unsigned copied = 0, nr_pages_copied = 0; int ret = 0; BUG_ON(!len); BUG_ON(nr_pages > ARRAY_SIZE(pages)); + bch2_page_reservation_init(c, inode, &res); + for (i = 0; i < nr_pages; i++) { pages[i] = grab_cache_page_write_begin(mapping, index + i); if (!pages[i]) { @@ -1599,19 +1629,25 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, } } - for (i = 0; i < nr_pages; i++) { - ret = bch2_get_page_reservation(c, inode, pages[i], true); - - if (ret && !PageUptodate(pages[i])) { - ret = bch2_read_single_page(pages[i], mapping); - if (ret) - goto out; - - ret = bch2_get_page_reservation(c, inode, pages[i], true); + while (reserved < len) { + struct page *page = pages[(offset + reserved) >> PAGE_SHIFT]; + unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1); + unsigned pg_len = min_t(unsigned, len - reserved, + PAGE_SIZE - pg_offset); +retry_reservation: + ret = bch2_page_reservation_get(c, inode, page, &res, + pg_offset, pg_len, true); + + if (ret && !PageUptodate(page)) { + ret = bch2_read_single_page(page, mapping); + if (!ret) + goto retry_reservation; } if (ret) goto out; + + reserved += pg_len; } if (mapping_writably_mapped(mapping)) @@ -1621,16 +1657,16 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, while (copied < len) { struct page *page = pages[(offset + copied) >> PAGE_SHIFT]; unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1); - unsigned pg_bytes = min_t(unsigned, len - copied, - PAGE_SIZE - pg_offset); + unsigned pg_len = min_t(unsigned, len - copied, + PAGE_SIZE - pg_offset); unsigned pg_copied = copy_page_from_iter_atomic(page, - pg_offset, pg_bytes, iter); + pg_offset, pg_len, iter); + + if (!pg_copied) + break; flush_dcache_page(page); copied += pg_copied; - - if (pg_copied != pg_bytes) - break; } if (!copied) @@ -1653,23 +1689,30 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, copied -= (offset + copied) & (PAGE_SIZE - 1); } } -out: - for (i = 0; i < nr_pages_copied; i++) { - if (!PageUptodate(pages[i])) - SetPageUptodate(pages[i]); - if (!PageDirty(pages[i])) - bch2_set_page_dirty(mapping, pages[i]); - unlock_page(pages[i]); - put_page(pages[i]); - } + while (set_dirty < copied) { + struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT]; + unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1); + unsigned pg_len = min_t(unsigned, copied - set_dirty, + PAGE_SIZE - pg_offset); + + if (!PageUptodate(page)) + SetPageUptodate(page); + + bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len); + unlock_page(page); + put_page(page); + + set_dirty += pg_len; + } +out: for (i = nr_pages_copied; i < nr_pages; i++) { - if (!PageDirty(pages[i])) - bch2_put_page_reservation(c, inode, pages[i]); unlock_page(pages[i]); put_page(pages[i]); } + bch2_page_reservation_put(c, inode, &res); + return copied ?: ret; } @@ -2322,7 +2365,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, * XXX: because we aren't currently tracking whether the page has actual * data in it (vs. just 0s, or only partially written) this wrong. ick. */ - ret = bch2_get_page_reservation(c, inode, page, false); + ret = bch2_get_page_disk_reservation(c, inode, page, false); BUG_ON(ret); if (index == start >> PAGE_SHIFT && @@ -2333,8 +2376,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, else if (index == end >> PAGE_SHIFT) zero_user_segment(page, 0, end_offset); - if (!PageDirty(page)) - bch2_set_page_dirty(mapping, page); + filemap_dirty_folio(mapping, page_folio(page)); unlock: unlock_page(page); put_page(page); |