summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-07-29 13:38:38 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:24 -0400
commitd1542e0362de069f677dfb0e9336438afb8fae74 (patch)
tree068c1e5f042cf79ee5bdb5d2901940912a4121a0
parent7f5e31e1a4c8821b346c9b2cc108ffbdd87778a7 (diff)
downloadlwn-d1542e0362de069f677dfb0e9336438afb8fae74.tar.gz
lwn-d1542e0362de069f677dfb0e9336438afb8fae74.zip
bcachefs: Change buffered write path to write to partial pages
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/fs-io.c262
1 files changed, 152 insertions, 110 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 51b18ec1b1f8..4efe985da96b 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -519,7 +519,6 @@ struct bch_page_sector {
/* i_sectors: */
enum {
SECTOR_UNALLOCATED,
- SECTOR_QUOTA_RESERVED,
SECTOR_DIRTY,
SECTOR_ALLOCATED,
} state:2;
@@ -597,31 +596,6 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
}
-static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
- struct page *page)
-{
- struct bch_page_state *s = bch2_page_state(page);
- struct disk_reservation disk_res = { 0 };
- struct quota_res quota_res = { 0 };
- unsigned i;
-
- if (!s)
- return;
-
- for (i = 0; i < ARRAY_SIZE(s->s); i++) {
- disk_res.sectors += s->s[i].replicas_reserved;
- s->s[i].replicas_reserved = 0;
-
- if (s->s[i].state == SECTOR_QUOTA_RESERVED) {
- quota_res.sectors++;
- s->s[i].state = SECTOR_UNALLOCATED;
- }
- }
-
- bch2_quota_reservation_put(c, inode, &quota_res);
- bch2_disk_reservation_put(c, &disk_res);
-}
-
static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
{
/* XXX: this should not be open coded */
@@ -672,100 +646,134 @@ static int bch2_get_page_disk_reservation(struct bch_fs *c,
return 0;
}
-static int bch2_get_page_quota_reservation(struct bch_fs *c,
+struct bch2_page_reservation {
+ struct disk_reservation disk;
+ struct quota_res quota;
+};
+
+static void bch2_page_reservation_init(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct bch2_page_reservation *res)
+{
+ memset(res, 0, sizeof(*res));
+
+ res->disk.nr_replicas = inode_nr_replicas(c, inode);
+}
+
+static void bch2_page_reservation_put(struct bch_fs *c,
struct bch_inode_info *inode,
- struct page *page, bool check_enospc)
+ struct bch2_page_reservation *res)
+{
+ bch2_disk_reservation_put(c, &res->disk);
+ bch2_quota_reservation_put(c, inode, &res->quota);
+}
+
+static int bch2_page_reservation_get(struct bch_fs *c,
+ struct bch_inode_info *inode, struct page *page,
+ struct bch2_page_reservation *res,
+ unsigned offset, unsigned len, bool check_enospc)
{
struct bch_page_state *s = bch2_page_state_create(page, 0);
- struct quota_res quota_res = { 0 };
- unsigned i, quota_res_sectors = 0;
+ unsigned i, disk_sectors = 0, quota_sectors = 0;
int ret;
if (!s)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(s->s); i++)
- quota_res_sectors += s->s[i].state == SECTOR_UNALLOCATED;
-
- if (!quota_res_sectors)
- return 0;
+ for (i = offset / 512;
+ i < DIV_ROUND_UP(offset + len, 512);
+ i++) {
+ disk_sectors += sectors_to_reserve(&s->s[i],
+ res->disk.nr_replicas);
+ quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
+ }
- ret = bch2_quota_reservation_add(c, inode, &quota_res,
- quota_res_sectors,
- check_enospc);
- if (unlikely(ret))
- return ret;
+ if (disk_sectors) {
+ ret = bch2_disk_reservation_add(c, &res->disk,
+ disk_sectors,
+ !check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL
+ : 0);
+ if (unlikely(ret))
+ return ret;
+ }
- for (i = 0; i < ARRAY_SIZE(s->s); i++)
- if (s->s[i].state == SECTOR_UNALLOCATED)
- s->s[i].state = SECTOR_QUOTA_RESERVED;
+ if (quota_sectors) {
+ ret = bch2_quota_reservation_add(c, inode, &res->quota,
+ quota_sectors,
+ check_enospc);
+ if (unlikely(ret)) {
+ struct disk_reservation tmp = {
+ .sectors = disk_sectors
+ };
+
+ bch2_disk_reservation_put(c, &tmp);
+ res->disk.sectors -= disk_sectors;
+ return ret;
+ }
+ }
return 0;
}
-static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
- struct page *page, bool check_enospc)
-{
- return bch2_get_page_disk_reservation(c, inode, page, check_enospc) ?:
- bch2_get_page_quota_reservation(c, inode, page, check_enospc);
-}
-
static void bch2_clear_page_bits(struct page *page)
{
struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state *s = bch2_page_state(page);
+ struct disk_reservation disk_res = { 0 };
int i, dirty_sectors = 0;
if (!s)
return;
for (i = 0; i < ARRAY_SIZE(s->s); i++) {
+ disk_res.sectors += s->s[i].replicas_reserved;
+ s->s[i].replicas_reserved = 0;
+
if (s->s[i].state == SECTOR_DIRTY) {
dirty_sectors++;
s->s[i].state = SECTOR_UNALLOCATED;
}
}
+ bch2_disk_reservation_put(c, &disk_res);
+
if (dirty_sectors)
i_sectors_acct(c, inode, NULL, -dirty_sectors);
- bch2_put_page_reservation(c, inode, page);
bch2_page_state_release(page);
}
-static void __bch2_set_page_dirty(struct address_space *mapping, struct folio *folio)
+static void bch2_set_page_dirty(struct bch_fs *c,
+ struct bch_inode_info *inode, struct page *page,
+ struct bch2_page_reservation *res,
+ unsigned offset, unsigned len)
{
- struct bch_inode_info *inode = to_bch_ei(mapping->host);
- struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch_page_state *s = bch2_page_state(&folio->page);
- struct quota_res quota_res = { 0 };
+ struct bch_page_state *s = bch2_page_state(page);
unsigned i, dirty_sectors = 0;
- BUG_ON(!s);
+ for (i = offset / 512;
+ i < DIV_ROUND_UP(offset + len, 512);
+ i++) {
+ unsigned sectors = sectors_to_reserve(&s->s[i],
+ res->disk.nr_replicas);
- for (i = 0; i < ARRAY_SIZE(s->s); i++) {
- if (s->s[i].state == SECTOR_QUOTA_RESERVED)
- quota_res.sectors++;
+ BUG_ON(sectors > res->disk.sectors);
+ s->s[i].replicas_reserved += sectors;
+ res->disk.sectors -= sectors;
- if (s->s[i].state == SECTOR_UNALLOCATED ||
- s->s[i].state == SECTOR_QUOTA_RESERVED) {
+ if (s->s[i].state == SECTOR_UNALLOCATED) {
s->s[i].state = SECTOR_DIRTY;
dirty_sectors++;
}
}
if (dirty_sectors)
- i_sectors_acct(c, inode, &quota_res, dirty_sectors);
- bch2_quota_reservation_put(c, inode, &quota_res);
-}
-
-static void bch2_set_page_dirty(struct address_space *mapping, struct page *page)
-{
- struct folio *folio = page_folio(page);
+ i_sectors_acct(c, inode, &res->quota, dirty_sectors);
- __bch2_set_page_dirty(mapping, folio);
- filemap_dirty_folio(mapping, folio);
+ if (!PageDirty(page))
+ filemap_dirty_folio(inode->v.i_mapping, page_folio(page));
}
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
@@ -788,8 +796,11 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct bch_inode_info *inode = file_bch_inode(file);
struct address_space *mapping = file->f_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_page_reservation res;
int ret = VM_FAULT_LOCKED;
+ bch2_page_reservation_init(c, inode, &res);
+
sb_start_pagefault(inode->v.i_sb);
file_update_time(file);
@@ -809,18 +820,21 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
goto out;
}
- if (bch2_get_page_reservation(c, inode, page, true)) {
+ if (bch2_page_reservation_get(c, inode, page, &res,
+ 0, PAGE_SIZE, true)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
goto out;
}
- if (!PageDirty(page))
- bch2_set_page_dirty(mapping, page);
+ bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
wait_for_stable_page(page);
out:
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
sb_end_pagefault(inode->v.i_sb);
+
+ bch2_page_reservation_put(c, inode, &res);
+
return ret;
}
@@ -1450,12 +1464,18 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_page_reservation *res;
pgoff_t index = pos >> PAGE_SHIFT;
unsigned offset = pos & (PAGE_SIZE - 1);
struct page *page;
int ret = -ENOMEM;
- BUG_ON(inode_unhashed(&inode->v));
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ bch2_page_reservation_init(c, inode, res);
+ *fsdata = res;
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
@@ -1486,7 +1506,8 @@ readpage:
if (ret)
goto err;
out:
- ret = bch2_get_page_reservation(c, inode, page, true);
+ ret = bch2_page_reservation_get(c, inode, page, res,
+ offset, len, true);
if (ret) {
if (!PageUptodate(page)) {
/*
@@ -1509,6 +1530,8 @@ err:
*pagep = NULL;
err_unlock:
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+ kfree(res);
+ *fsdata = NULL;
return ret;
}
@@ -1518,6 +1541,8 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_page_reservation *res = fsdata;
+ unsigned offset = pos & (PAGE_SIZE - 1);
lockdep_assert_held(&inode->v.i_rwsem);
@@ -1540,18 +1565,19 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
if (copied) {
if (!PageUptodate(page))
SetPageUptodate(page);
- if (!PageDirty(page))
- bch2_set_page_dirty(mapping, page);
+
+ bch2_set_page_dirty(c, inode, page, res, offset, copied);
inode->ei_last_dirtied = (unsigned long) current;
- } else {
- bch2_put_page_reservation(c, inode, page);
}
unlock_page(page);
put_page(page);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+ bch2_page_reservation_put(c, inode, res);
+ kfree(res);
+
return copied;
}
@@ -1564,15 +1590,19 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct page *pages[WRITE_BATCH_PAGES];
+ struct bch2_page_reservation res;
unsigned long index = pos >> PAGE_SHIFT;
unsigned offset = pos & (PAGE_SIZE - 1);
unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- unsigned i, copied = 0, nr_pages_copied = 0;
+ unsigned i, reserved = 0, set_dirty = 0;
+ unsigned copied = 0, nr_pages_copied = 0;
int ret = 0;
BUG_ON(!len);
BUG_ON(nr_pages > ARRAY_SIZE(pages));
+ bch2_page_reservation_init(c, inode, &res);
+
for (i = 0; i < nr_pages; i++) {
pages[i] = grab_cache_page_write_begin(mapping, index + i);
if (!pages[i]) {
@@ -1599,19 +1629,25 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
}
}
- for (i = 0; i < nr_pages; i++) {
- ret = bch2_get_page_reservation(c, inode, pages[i], true);
-
- if (ret && !PageUptodate(pages[i])) {
- ret = bch2_read_single_page(pages[i], mapping);
- if (ret)
- goto out;
-
- ret = bch2_get_page_reservation(c, inode, pages[i], true);
+ while (reserved < len) {
+ struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
+ unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
+ unsigned pg_len = min_t(unsigned, len - reserved,
+ PAGE_SIZE - pg_offset);
+retry_reservation:
+ ret = bch2_page_reservation_get(c, inode, page, &res,
+ pg_offset, pg_len, true);
+
+ if (ret && !PageUptodate(page)) {
+ ret = bch2_read_single_page(page, mapping);
+ if (!ret)
+ goto retry_reservation;
}
if (ret)
goto out;
+
+ reserved += pg_len;
}
if (mapping_writably_mapped(mapping))
@@ -1621,16 +1657,16 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
while (copied < len) {
struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
- unsigned pg_bytes = min_t(unsigned, len - copied,
- PAGE_SIZE - pg_offset);
+ unsigned pg_len = min_t(unsigned, len - copied,
+ PAGE_SIZE - pg_offset);
unsigned pg_copied = copy_page_from_iter_atomic(page,
- pg_offset, pg_bytes, iter);
+ pg_offset, pg_len, iter);
+
+ if (!pg_copied)
+ break;
flush_dcache_page(page);
copied += pg_copied;
-
- if (pg_copied != pg_bytes)
- break;
}
if (!copied)
@@ -1653,23 +1689,30 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
copied -= (offset + copied) & (PAGE_SIZE - 1);
}
}
-out:
- for (i = 0; i < nr_pages_copied; i++) {
- if (!PageUptodate(pages[i]))
- SetPageUptodate(pages[i]);
- if (!PageDirty(pages[i]))
- bch2_set_page_dirty(mapping, pages[i]);
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ while (set_dirty < copied) {
+ struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
+ unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
+ unsigned pg_len = min_t(unsigned, copied - set_dirty,
+ PAGE_SIZE - pg_offset);
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+
+ bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
+ unlock_page(page);
+ put_page(page);
+
+ set_dirty += pg_len;
+ }
+out:
for (i = nr_pages_copied; i < nr_pages; i++) {
- if (!PageDirty(pages[i]))
- bch2_put_page_reservation(c, inode, pages[i]);
unlock_page(pages[i]);
put_page(pages[i]);
}
+ bch2_page_reservation_put(c, inode, &res);
+
return copied ?: ret;
}
@@ -2322,7 +2365,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
* XXX: because we aren't currently tracking whether the page has actual
* data in it (vs. just 0s, or only partially written) this wrong. ick.
*/
- ret = bch2_get_page_reservation(c, inode, page, false);
+ ret = bch2_get_page_disk_reservation(c, inode, page, false);
BUG_ON(ret);
if (index == start >> PAGE_SHIFT &&
@@ -2333,8 +2376,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
else if (index == end >> PAGE_SHIFT)
zero_user_segment(page, 0, end_offset);
- if (!PageDirty(page))
- bch2_set_page_dirty(mapping, page);
+ filemap_dirty_folio(mapping, page_folio(page));
unlock:
unlock_page(page);
put_page(page);