summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-26 12:41:27 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-26 12:41:27 -0800
commitfb527fc1f36e252cd1f62a26be4906949e7708ff (patch)
tree3066b8c59d8c7b37805d9ee2f50ed6db00a8c397
parentff2a7a064a69069554564f52b6a84fc8a8c7d688 (diff)
parentd1dfb5f52ffc4a142d88da5c0ed0514f3602c4b8 (diff)
downloadlwn-fb527fc1f36e252cd1f62a26be4906949e7708ff.tar.gz
lwn-fb527fc1f36e252cd1f62a26be4906949e7708ff.zip
Merge tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Add page -> folio conversions (Joanne Koong, Josef Bacik) - Allow max size of fuse requests to be configurable with a sysctl (Joanne Koong) - Allow FOPEN_DIRECT_IO to take advantage of async code path (yangyun) - Fix large kernel reads (like a module load) in virtio_fs (Hou Tao) - Fix attribute inconsistency in case readdirplus (and plain lookup in corner cases) is racing with inode eviction (Zhang Tianci) - Fix a WARN_ON triggered by virtio_fs (Asahi Lina) * tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (30 commits) virtiofs: dax: remove ->writepages() callback fuse: check attributes staleness on fuse_iget() fuse: remove pages for requests and exclusively use folios fuse: convert direct io to use folios mm/writeback: add folio_mark_dirty_lock() fuse: convert writebacks to use folios fuse: convert retrieves to use folios fuse: convert ioctls to use folios fuse: convert writes (non-writeback) to use folios fuse: convert reads to use folios fuse: convert readdir to use folios fuse: convert readlink to use folios fuse: convert cuse to use folios fuse: add support in virtio for requests using folios fuse: support folios in struct fuse_args_pages and fuse_copy_pages() fuse: convert fuse_notify_store to use folios fuse: convert fuse_retrieve to use folios fuse: use the folio based vmstat helpers fuse: convert fuse_writepage_need_send to take a folio fuse: convert fuse_do_readpage to use folios ...
-rw-r--r--Documentation/admin-guide/sysctl/fs.rst10
-rw-r--r--fs/fuse/Makefile1
-rw-r--r--fs/fuse/cuse.c29
-rw-r--r--fs/fuse/dax.c11
-rw-r--r--fs/fuse/dev.c66
-rw-r--r--fs/fuse/dir.c37
-rw-r--r--fs/fuse/file.c449
-rw-r--r--fs/fuse/fuse_i.h68
-rw-r--r--fs/fuse/inode.c67
-rw-r--r--fs/fuse/ioctl.c35
-rw-r--r--fs/fuse/readdir.c33
-rw-r--r--fs/fuse/sysctl.c40
-rw-r--r--fs/fuse/virtio_fs.c77
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/folio-compat.c6
-rw-r--r--mm/page-writeback.c22
16 files changed, 578 insertions, 374 deletions
diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst
index 30c61474dec5..f5ec6c9312e1 100644
--- a/Documentation/admin-guide/sysctl/fs.rst
+++ b/Documentation/admin-guide/sysctl/fs.rst
@@ -337,3 +337,13 @@ Each "watch" costs roughly 90 bytes on a 32-bit kernel, and roughly 160 bytes
on a 64-bit one.
The current default value for ``max_user_watches`` is 4% of the
available low memory, divided by the "watch" cost in bytes.
+
+5. /proc/sys/fs/fuse - Configuration options for FUSE filesystems
+=====================================================================
+
+This directory contains the following configuration options for FUSE
+filesystems:
+
+``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for
+setting/getting the maximum number of pages that can be used for servicing
+requests in FUSE.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index ce0ff7a9007b..2c372180d631 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -14,5 +14,6 @@ fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
+fuse-$(CONFIG_SYSCTL) += sysctl.o
virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 0b2da7b7e2ad..b39844d75a80 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -303,8 +303,8 @@ struct cuse_init_args {
struct fuse_args_pages ap;
struct cuse_init_in in;
struct cuse_init_out out;
- struct page *page;
- struct fuse_page_desc desc;
+ struct folio *folio;
+ struct fuse_folio_desc desc;
};
/**
@@ -326,7 +326,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
struct cuse_init_out *arg = &ia->out;
- struct page *page = ap->pages[0];
+ struct folio *folio = ap->folios[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
@@ -343,7 +343,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
/* parse init reply */
cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
- rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size,
+ rc = cuse_parse_devinfo(folio_address(folio), ap->args.out_args[1].size,
&devinfo);
if (rc)
goto err;
@@ -411,7 +411,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
kobject_uevent(&dev->kobj, KOBJ_ADD);
out:
kfree(ia);
- __free_page(page);
+ folio_put(folio);
return;
err_cdev:
@@ -429,7 +429,7 @@ err:
static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
- struct page *page;
+ struct folio *folio;
struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;
@@ -437,13 +437,14 @@ static int cuse_send_init(struct cuse_conn *cc)
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
rc = -ENOMEM;
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
+
+ folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0);
+ if (!folio)
goto err;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (!ia)
- goto err_free_page;
+ goto err_free_folio;
ap = &ia->ap;
ia->in.major = FUSE_KERNEL_VERSION;
@@ -459,18 +460,18 @@ static int cuse_send_init(struct cuse_conn *cc)
ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
ap->args.out_argvar = true;
ap->args.out_pages = true;
- ap->num_pages = 1;
- ap->pages = &ia->page;
+ ap->num_folios = 1;
+ ap->folios = &ia->folio;
ap->descs = &ia->desc;
- ia->page = page;
+ ia->folio = folio;
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;
rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
-err_free_page:
- __free_page(page);
+err_free_folio:
+ folio_put(folio);
}
err:
return rc;
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 12ef91d170bb..9abbc2f2894f 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -774,16 +774,6 @@ out:
return ret;
}
-static int fuse_dax_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
-
- struct inode *inode = mapping->host;
- struct fuse_conn *fc = get_fuse_conn(inode);
-
- return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
-}
-
static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order,
bool write)
{
@@ -1323,7 +1313,6 @@ bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
}
static const struct address_space_operations fuse_dax_file_aops = {
- .writepages = fuse_dax_writepages,
.direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
};
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0723c6344b20..27ccae63495d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1028,17 +1028,27 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
struct fuse_req *req = cs->req;
struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
-
- for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
+ for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) {
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
+ struct page *orig, *pagep;
+
+ orig = pagep = &ap->folios[i]->page;
- err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
+ err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
if (err)
return err;
nbytes -= count;
+
+ /*
+ * fuse_copy_page may have moved a page from a pipe instead of
+ * copying into our given page, so update the folios if it was
+ * replaced.
+ */
+ if (pagep != orig)
+ ap->folios[i] = page_folio(pagep);
}
return 0;
}
@@ -1654,24 +1664,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
num = outarg.size;
while (num) {
+ struct folio *folio;
struct page *page;
unsigned int this_num;
- err = -ENOMEM;
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
- if (!page)
+ folio = filemap_grab_folio(mapping, index);
+ err = PTR_ERR(folio);
+ if (IS_ERR(folio))
goto out_iput;
- this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ page = &folio->page;
+ this_num = min_t(unsigned, num, folio_size(folio) - offset);
err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!PageUptodate(page) && !err && offset == 0 &&
- (this_num == PAGE_SIZE || file_size == end)) {
- zero_user_segment(page, this_num, PAGE_SIZE);
- SetPageUptodate(page);
+ if (!folio_test_uptodate(folio) && !err && offset == 0 &&
+ (this_num == folio_size(folio) || file_size == end)) {
+ folio_zero_segment(folio, this_num, folio_size(folio));
+ folio_mark_uptodate(folio);
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (err)
goto out_iput;
@@ -1703,7 +1714,7 @@ static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_retrieve_args *ra =
container_of(args, typeof(*ra), ap.args);
- release_pages(ra->ap.pages, ra->ap.num_pages);
+ release_pages(ra->ap.folios, ra->ap.num_folios);
kfree(ra);
}
@@ -1717,7 +1728,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
- unsigned int num_pages;
+ unsigned int num_pages, cur_pages = 0;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
@@ -1736,15 +1747,15 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
- args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
+ args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
ra = kzalloc(args_size, GFP_KERNEL);
if (!ra)
return -ENOMEM;
ap = &ra->ap;
- ap->pages = (void *) (ra + 1);
- ap->descs = (void *) (ap->pages + num_pages);
+ ap->folios = (void *) (ra + 1);
+ ap->descs = (void *) (ap->folios + num_pages);
args = &ap->args;
args->nodeid = outarg->nodeid;
@@ -1755,19 +1766,20 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
- while (num && ap->num_pages < num_pages) {
- struct page *page;
+ while (num && cur_pages < num_pages) {
+ struct folio *folio;
unsigned int this_num;
- page = find_get_page(mapping, index);
- if (!page)
+ folio = filemap_get_folio(mapping, index);
+ if (IS_ERR(folio))
break;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
- ap->pages[ap->num_pages] = page;
- ap->descs[ap->num_pages].offset = offset;
- ap->descs[ap->num_pages].length = this_num;
- ap->num_pages++;
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = offset;
+ ap->descs[ap->num_folios].length = this_num;
+ ap->num_folios++;
+ cur_pages++;
offset = 0;
num -= this_num;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 54104dd48af7..494ac372ace0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -366,7 +366,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
- u64 attr_version;
+ u64 attr_version, evict_ctr;
int err;
*inode = NULL;
@@ -381,6 +381,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
goto out;
attr_version = fuse_get_attr_version(fm->fc);
+ evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
err = fuse_simple_request(fm, &args);
@@ -398,7 +399,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
&outarg->attr, ATTR_TIMEOUT(outarg),
- attr_version);
+ attr_version, evict_ctr);
err = -ENOMEM;
if (!*inode) {
fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
@@ -691,7 +692,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
ff->nodeid = outentry.nodeid;
ff->open_flags = outopenp->open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
- &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
+ &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags);
@@ -822,7 +823,7 @@ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
+ &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
if (!inode) {
fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM;
@@ -1585,13 +1586,13 @@ static int fuse_permission(struct mnt_idmap *idmap,
return err;
}
-static int fuse_readlink_page(struct inode *inode, struct page *page)
+static int fuse_readlink_page(struct inode *inode, struct folio *folio)
{
struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
+ struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_args_pages ap = {
- .num_pages = 1,
- .pages = &page,
+ .num_folios = 1,
+ .folios = &folio,
.descs = &desc,
};
char *link;
@@ -1614,7 +1615,7 @@ static int fuse_readlink_page(struct inode *inode, struct page *page)
if (WARN_ON(res >= PAGE_SIZE))
return -EIO;
- link = page_address(page);
+ link = folio_address(folio);
link[res] = '\0';
return 0;
@@ -1624,7 +1625,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct page *page;
+ struct folio *folio;
int err;
err = -EIO;
@@ -1638,20 +1639,20 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
if (!dentry)
goto out_err;
- page = alloc_page(GFP_KERNEL);
+ folio = folio_alloc(GFP_KERNEL, 0);
err = -ENOMEM;
- if (!page)
+ if (!folio)
goto out_err;
- err = fuse_readlink_page(inode, page);
+ err = fuse_readlink_page(inode, folio);
if (err) {
- __free_page(page);
+ folio_put(folio);
goto out_err;
}
- set_delayed_call(callback, page_put_link, page);
+ set_delayed_call(callback, page_put_link, &folio->page);
- return page_address(page);
+ return folio_address(folio);
out_err:
return ERR_PTR(err);
@@ -2028,7 +2029,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
fuse_change_attributes_common(inode, &outarg.attr, NULL,
ATTR_TIMEOUT(&outarg),
- fuse_get_cache_mask(inode));
+ fuse_get_cache_mask(inode), 0);
oldsize = inode->i_size;
/* see the comment in fuse_change_attributes() */
if (!is_wb || is_truncate)
@@ -2231,7 +2232,7 @@ void fuse_init_dir(struct inode *inode)
static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
{
- int err = fuse_readlink_page(folio->mapping->host, &folio->page);
+ int err = fuse_readlink_page(folio->mapping->host, folio);
if (!err)
folio_mark_uptodate(folio);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index dafdf766b1d5..88d0946b5bc9 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -436,7 +436,7 @@ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
- if (idx_from >= curr_index + wpa->ia.ap.num_pages)
+ if (idx_from >= curr_index + wpa->ia.ap.num_folios)
n = n->rb_right;
else if (idx_to < curr_index)
n = n->rb_left;
@@ -483,6 +483,21 @@ static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
}
+static inline bool fuse_folio_is_writeback(struct inode *inode,
+ struct folio *folio)
+{
+ pgoff_t last = folio_next_index(folio) - 1;
+ return fuse_range_is_writeback(inode, folio_index(folio), last);
+}
+
+static void fuse_wait_on_folio_writeback(struct inode *inode,
+ struct folio *folio)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ wait_event(fi->page_waitq, !fuse_folio_is_writeback(inode, folio));
+}
+
/*
* Wait for all pending writepages on the inode to finish.
*
@@ -645,17 +660,20 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_args_pages *ap,
+static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres,
bool should_dirty)
{
unsigned int i;
- for (i = 0; i < ap->num_pages; i++) {
+ for (i = 0; i < ap->num_folios; i++) {
if (should_dirty)
- set_page_dirty_lock(ap->pages[i]);
+ folio_mark_dirty_lock(ap->folios[i]);
if (ap->args.is_pinned)
- unpin_user_page(ap->pages[i]);
+ unpin_folio(ap->folios[i]);
}
+
+ if (nres > 0 && ap->args.invalidate_vmap)
+ invalidate_kernel_vmap_range(ap->args.vmap_base, nres);
}
static void fuse_io_release(struct kref *kref)
@@ -725,16 +743,16 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
}
static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
- unsigned int npages)
+ unsigned int nfolios)
{
struct fuse_io_args *ia;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (ia) {
ia->io = io;
- ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
- &ia->ap.descs);
- if (!ia->ap.pages) {
+ ia->ap.folios = fuse_folios_alloc(nfolios, GFP_KERNEL,
+ &ia->ap.descs);
+ if (!ia->ap.folios) {
kfree(ia);
ia = NULL;
}
@@ -744,7 +762,7 @@ static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
static void fuse_io_free(struct fuse_io_args *ia)
{
- kfree(ia->ap.pages);
+ kfree(ia->ap.folios);
kfree(ia);
}
@@ -754,25 +772,29 @@ static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
-
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ size_t nres;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
- } else if (ia->write.in.size != ia->write.out.size) {
- pos = ia->write.in.offset - io->offset +
- ia->write.out.size;
+ } else {
+ nres = ia->write.out.size;
+ if (ia->write.in.size != ia->write.out.size)
+ pos = ia->write.in.offset - io->offset +
+ ia->write.out.size;
}
} else {
u32 outsize = args->out_args[0].size;
+ nres = outsize;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
+ fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty);
+
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
@@ -843,33 +865,33 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
* reached the client fs yet. So the hole is not present there.
*/
if (!fc->writeback_cache) {
- loff_t pos = page_offset(ap->pages[0]) + num_read;
+ loff_t pos = folio_pos(ap->folios[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
}
-static int fuse_do_readpage(struct file *file, struct page *page)
+static int fuse_do_readfolio(struct file *file, struct folio *folio)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct fuse_mount *fm = get_fuse_mount(inode);
- loff_t pos = page_offset(page);
- struct fuse_page_desc desc = { .length = PAGE_SIZE };
+ loff_t pos = folio_pos(folio);
+ struct fuse_folio_desc desc = { .length = PAGE_SIZE };
struct fuse_io_args ia = {
.ap.args.page_zeroing = true,
.ap.args.out_pages = true,
- .ap.num_pages = 1,
- .ap.pages = &page,
+ .ap.num_folios = 1,
+ .ap.folios = &folio,
.ap.descs = &desc,
};
ssize_t res;
u64 attr_ver;
/*
- * Page writeback can extend beyond the lifetime of the
- * page-cache page, so make sure we read a properly synced
- * page.
+ * With the temporary pages that are used to complete writeback, we can
+ * have writeback that extends beyond the lifetime of the folio. So
+ * make sure we read a properly synced folio.
*/
- fuse_wait_on_page_writeback(inode, page->index);
+ fuse_wait_on_folio_writeback(inode, folio);
attr_ver = fuse_get_attr_version(fm->fc);
@@ -887,25 +909,24 @@ static int fuse_do_readpage(struct file *file, struct page *page)
if (res < desc.length)
fuse_short_read(inode, attr_ver, res, &ia.ap);
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
return 0;
}
static int fuse_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
int err;
err = -EIO;
if (fuse_is_bad(inode))
goto out;
- err = fuse_do_readpage(file, page);
+ err = fuse_do_readfolio(file, folio);
fuse_invalidate_atime(inode);
out:
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
@@ -919,8 +940,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
size_t num_read = args->out_args[0].size;
struct address_space *mapping = NULL;
- for (i = 0; mapping == NULL && i < ap->num_pages; i++)
- mapping = ap->pages[i]->mapping;
+ for (i = 0; mapping == NULL && i < ap->num_folios; i++)
+ mapping = ap->folios[i]->mapping;
if (mapping) {
struct inode *inode = mapping->host;
@@ -934,12 +955,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_invalidate_atime(inode);
}
- for (i = 0; i < ap->num_pages; i++) {
- struct folio *folio = page_folio(ap->pages[i]);
-
- folio_end_read(folio, !err);
- folio_put(folio);
- }
+ for (i = 0; i < ap->num_folios; i++)
+ folio_end_read(ap->folios[i], !err);
if (ia->ff)
fuse_file_put(ia->ff, false);
@@ -951,8 +968,9 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap;
- loff_t pos = page_offset(ap->pages[0]);
- size_t count = ap->num_pages << PAGE_SHIFT;
+ loff_t pos = folio_pos(ap->folios[0]);
+ /* Currently, all folios in FUSE are one page */
+ size_t count = ap->num_folios << PAGE_SHIFT;
ssize_t res;
int err;
@@ -963,7 +981,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
/* Don't overflow end offset */
if (pos + (count - 1) == LLONG_MAX) {
count--;
- ap->descs[ap->num_pages - 1].length--;
+ ap->descs[ap->num_folios - 1].length--;
}
WARN_ON((loff_t) (pos + count) < 0);
@@ -985,18 +1003,36 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
static void fuse_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
- unsigned int i, max_pages, nr_pages = 0;
+ unsigned int max_pages, nr_pages;
+ pgoff_t first = readahead_index(rac);
+ pgoff_t last = first + readahead_count(rac) - 1;
if (fuse_is_bad(inode))
return;
+ wait_event(fi->page_waitq, !fuse_range_is_writeback(inode, first, last));
+
max_pages = min_t(unsigned int, fc->max_pages,
fc->max_read / PAGE_SIZE);
- for (;;) {
+ /*
+ * This is only accurate the first time through, since readahead_folio()
+ * doesn't update readahead_count() from the previous folio until the
+ * next call. Grab nr_pages here so we know how many pages we're going
+ * to have to process. This means that we will exit here with
+ * readahead_count() == folio_nr_pages(last_folio), but we will have
+ * consumed all of the folios, and read_pages() will call
+ * readahead_folio() again which will clean up the rac.
+ */
+ nr_pages = readahead_count(rac);
+
+ while (nr_pages) {
struct fuse_io_args *ia;
struct fuse_args_pages *ap;
+ struct folio *folio;
+ unsigned cur_pages = min(max_pages, nr_pages);
if (fc->num_background >= fc->congestion_threshold &&
rac->ra->async_size >= readahead_count(rac))
@@ -1006,23 +1042,19 @@ static void fuse_readahead(struct readahead_control *rac)
*/
break;
- nr_pages = readahead_count(rac) - nr_pages;
- if (nr_pages > max_pages)
- nr_pages = max_pages;
- if (nr_pages == 0)
- break;
- ia = fuse_io_alloc(NULL, nr_pages);
+ ia = fuse_io_alloc(NULL, cur_pages);
if (!ia)
return;
ap = &ia->ap;
- nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
- for (i = 0; i < nr_pages; i++) {
- fuse_wait_on_page_writeback(inode,
- readahead_index(rac) + i);
- ap->descs[i].length = PAGE_SIZE;
+
+ while (ap->num_folios < cur_pages) {
+ folio = readahead_folio(rac);
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].length = folio_size(folio);
+ ap->num_folios++;
}
- ap->num_pages = nr_pages;
fuse_send_readpages(ia, rac->file);
+ nr_pages -= cur_pages;
}
}
@@ -1139,8 +1171,8 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
bool short_write;
int err;
- for (i = 0; i < ap->num_pages; i++)
- fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
+ for (i = 0; i < ap->num_folios; i++)
+ fuse_wait_on_folio_writeback(inode, ap->folios[i]);
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
@@ -1154,24 +1186,24 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
short_write = ia->write.out.size < count;
offset = ap->descs[0].offset;
count = ia->write.out.size;
- for (i = 0; i < ap->num_pages; i++) {
- struct page *page = ap->pages[i];
+ for (i = 0; i < ap->num_folios; i++) {
+ struct folio *folio = ap->folios[i];
if (err) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
} else {
- if (count >= PAGE_SIZE - offset)
- count -= PAGE_SIZE - offset;
+ if (count >= folio_size(folio) - offset)
+ count -= folio_size(folio) - offset;
else {
if (short_write)
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
count = 0;
}
offset = 0;
}
- if (ia->write.page_locked && (i == ap->num_pages - 1))
- unlock_page(page);
- put_page(page);
+ if (ia->write.folio_locked && (i == ap->num_folios - 1))
+ folio_unlock(folio);
+ folio_put(folio);
}
return err;
@@ -1185,6 +1217,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
+ unsigned int nr_pages = 0;
size_t count = 0;
int err;
@@ -1193,7 +1226,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
do {
size_t tmp;
- struct page *page;
+ struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
size_t bytes = min_t(size_t, PAGE_SIZE - offset,
iov_iter_count(ii));
@@ -1205,27 +1238,30 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
if (fault_in_iov_iter_readable(ii, bytes))
break;
- err = -ENOMEM;
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
break;
+ }
if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
- tmp = copy_page_from_iter_atomic(page, offset, bytes, ii);
- flush_dcache_page(page);
+ tmp = copy_folio_from_iter_atomic(folio, offset, bytes, ii);
+ flush_dcache_folio(folio);
if (!tmp) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
err = 0;
- ap->pages[ap->num_pages] = page;
- ap->descs[ap->num_pages].length = tmp;
- ap->num_pages++;
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].length = tmp;
+ ap->num_folios++;
+ nr_pages++;
count += tmp;
pos += tmp;
@@ -1235,18 +1271,18 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
/* If we copied full page, mark it uptodate */
if (tmp == PAGE_SIZE)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
- if (PageUptodate(page)) {
- unlock_page(page);
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
} else {
- ia->write.page_locked = true;
+ ia->write.folio_locked = true;
break;
}
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
- ap->num_pages < max_pages && offset == 0);
+ nr_pages < max_pages && offset == 0);
return count > 0 ? count : err;
}
@@ -1280,8 +1316,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
- ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
- if (!ap->pages) {
+ ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs);
+ if (!ap->folios) {
err = -ENOMEM;
break;
}
@@ -1303,7 +1339,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
err = -EIO;
}
}
- kfree(ap->pages);
+ kfree(ap->folios);
} while (!err && iov_iter_count(ii));
fuse_write_update_attr(inode, pos, res);
@@ -1430,11 +1466,7 @@ writethrough:
task_io_account_write(count);
- err = file_remove_privs(file);
- if (err)
- goto out;
-
- err = file_update_time(file);
+ err = kiocb_modified(iocb);
if (err)
goto out;
@@ -1468,35 +1500,57 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
- unsigned int max_pages)
+ unsigned int max_pages,
+ bool use_pages_for_kvec_io)
{
+ bool flush_or_invalidate = false;
+ unsigned int nr_pages = 0;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
- /* Special case for kernel I/O: can copy directly into the buffer */
+ /* Special case for kernel I/O: can copy directly into the buffer.
+ * However if the implementation of fuse_conn requires pages instead of
+ * pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead.
+ */
if (iov_iter_is_kvec(ii)) {
- unsigned long user_addr = fuse_get_user_addr(ii);
- size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+ void *user_addr = (void *)fuse_get_user_addr(ii);
- if (write)
- ap->args.in_args[1].value = (void *) user_addr;
- else
- ap->args.out_args[0].value = (void *) user_addr;
+ if (!use_pages_for_kvec_io) {
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
- iov_iter_advance(ii, frag_size);
- *nbytesp = frag_size;
- return 0;
+ if (write)
+ ap->args.in_args[1].value = user_addr;
+ else
+ ap->args.out_args[0].value = user_addr;
+
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
+ return 0;
+ }
+
+ if (is_vmalloc_addr(user_addr)) {
+ ap->args.vmap_base = user_addr;
+ flush_or_invalidate = true;
+ }
}
- while (nbytes < *nbytesp && ap->num_pages < max_pages) {
- unsigned npages;
+ /*
+ * Until there is support for iov_iter_extract_folios(), we have to
+ * manually extract pages using iov_iter_extract_pages() and then
+ * copy that to a folios array.
+ */
+ struct page **pages = kzalloc(max_pages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ while (nbytes < *nbytesp && nr_pages < max_pages) {
+ unsigned nfolios, i;
size_t start;
- struct page **pt_pages;
- pt_pages = &ap->pages[ap->num_pages];
- ret = iov_iter_extract_pages(ii, &pt_pages,
+ ret = iov_iter_extract_pages(ii, &pages,
*nbytesp - nbytes,
- max_pages - ap->num_pages,
+ max_pages - nr_pages,
0, &start);
if (ret < 0)
break;
@@ -1504,16 +1558,25 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
nbytes += ret;
ret += start;
- npages = DIV_ROUND_UP(ret, PAGE_SIZE);
+ /* Currently, all folios in FUSE are one page */
+ nfolios = DIV_ROUND_UP(ret, PAGE_SIZE);
- ap->descs[ap->num_pages].offset = start;
- fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
+ ap->descs[ap->num_folios].offset = start;
+ fuse_folio_descs_length_init(ap->descs, ap->num_folios, nfolios);
+ for (i = 0; i < nfolios; i++)
+ ap->folios[i + ap->num_folios] = page_folio(pages[i]);
- ap->num_pages += npages;
- ap->descs[ap->num_pages - 1].length -=
+ ap->num_folios += nfolios;
+ ap->descs[ap->num_folios - 1].length -=
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
+ nr_pages += nfolios;
}
+ kfree(pages);
+
+ if (write && flush_or_invalidate)
+ flush_kernel_vmap_range(ap->args.vmap_base, nbytes);
+ ap->args.invalidate_vmap = !write && flush_or_invalidate;
ap->args.is_pinned = iov_iter_extract_will_pin(ii);
ap->args.user_pages = true;
if (write)
@@ -1582,7 +1645,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
- max_pages);
+ max_pages, fc->use_pages_for_kvec_io);
if (err && !nbytes)
break;
@@ -1596,7 +1659,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
if (!io->async || nres < 0) {
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ fuse_release_user_pages(&ia->ap, nres, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
@@ -1650,7 +1713,7 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t res;
- if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
+ if (!is_sync_kiocb(iocb)) {
res = fuse_direct_IO(iocb, to);
} else {
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
@@ -1664,7 +1727,6 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
ssize_t res;
bool exclusive;
@@ -1672,9 +1734,11 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
res = generic_write_checks(iocb, from);
if (res > 0) {
task_io_account_write(res);
- if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
+ if (!is_sync_kiocb(iocb)) {
res = fuse_direct_IO(iocb, from);
} else {
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
+
res = fuse_direct_io(&io, from, &iocb->ki_pos,
FUSE_DIO_WRITE);
fuse_write_update_attr(inode, iocb->ki_pos, res);
@@ -1760,21 +1824,21 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
- for (i = 0; i < ap->num_pages; i++)
- __free_page(ap->pages[i]);
+ for (i = 0; i < ap->num_folios; i++)
+ folio_put(ap->folios[i]);
fuse_file_put(wpa->ia.ff, false);
- kfree(ap->pages);
+ kfree(ap->folios);
kfree(wpa);
}
-static void fuse_writepage_finish_stat(struct inode *inode, struct page *page)
+static void fuse_writepage_finish_stat(struct inode *inode, struct folio *folio)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(page, NR_WRITEBACK_TEMP);
+ node_stat_sub_folio(folio, NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
}
@@ -1785,8 +1849,8 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
struct fuse_inode *fi = get_fuse_inode(inode);
int i;
- for (i = 0; i < ap->num_pages; i++)
- fuse_writepage_finish_stat(inode, ap->pages[i]);
+ for (i = 0; i < ap->num_folios; i++)
+ fuse_writepage_finish_stat(inode, ap->folios[i]);
wake_up(&fi->page_waitq);
}
@@ -1801,7 +1865,8 @@ __acquires(fi->lock)
struct fuse_inode *fi = get_fuse_inode(wpa->inode);
struct fuse_write_in *inarg = &wpa->ia.write.in;
struct fuse_args *args = &wpa->ia.ap.args;
- __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
+ /* Currently, all folios in FUSE are one page */
+ __u64 data_size = wpa->ia.ap.num_folios * PAGE_SIZE;
int err;
fi->writectr++;
@@ -1841,7 +1906,8 @@ __acquires(fi->lock)
for (aux = wpa->next; aux; aux = next) {
next = aux->next;
aux->next = NULL;
- fuse_writepage_finish_stat(aux->inode, aux->ia.ap.pages[0]);
+ fuse_writepage_finish_stat(aux->inode,
+ aux->ia.ap.folios[0]);
fuse_writepage_free(aux);
}
@@ -1876,11 +1942,11 @@ static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
struct fuse_writepage_args *wpa)
{
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
- pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
+ pgoff_t idx_to = idx_from + wpa->ia.ap.num_folios - 1;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
- WARN_ON(!wpa->ia.ap.num_pages);
+ WARN_ON(!wpa->ia.ap.num_folios);
while (*p) {
struct fuse_writepage_args *curr;
pgoff_t curr_index;
@@ -1891,7 +1957,7 @@ static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
WARN_ON(curr->inode != wpa->inode);
curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
- if (idx_from >= curr_index + curr->ia.ap.num_pages)
+ if (idx_from >= curr_index + curr->ia.ap.num_folios)
p = &(*p)->rb_right;
else if (idx_to < curr_index)
p = &(*p)->rb_left;
@@ -2023,9 +2089,9 @@ static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
if (wpa) {
ap = &wpa->ia.ap;
- ap->num_pages = 0;
- ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
- if (!ap->pages) {
+ ap->num_folios = 0;
+ ap->folios = fuse_folios_alloc(1, GFP_NOFS, &ap->descs);
+ if (!ap->folios) {
kfree(wpa);
wpa = NULL;
}
@@ -2049,19 +2115,19 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
}
static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
- struct folio *tmp_folio, uint32_t page_index)
+ struct folio *tmp_folio, uint32_t folio_index)
{
struct inode *inode = folio->mapping->host;
struct fuse_args_pages *ap = &wpa->ia.ap;
folio_copy(tmp_folio, folio);
- ap->pages[page_index] = &tmp_folio->page;
- ap->descs[page_index].offset = 0;
- ap->descs[page_index].length = PAGE_SIZE;
+ ap->folios[folio_index] = tmp_folio;
+ ap->descs[folio_index].offset = 0;
+ ap->descs[folio_index].length = PAGE_SIZE;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(&tmp_folio->page, NR_WRITEBACK_TEMP);
+ node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
}
static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
@@ -2115,7 +2181,7 @@ static int fuse_writepage_locked(struct folio *folio)
goto err_writepage_args;
ap = &wpa->ia.ap;
- ap->num_pages = 1;
+ ap->num_folios = 1;
folio_start_writeback(folio);
fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0);
@@ -2143,32 +2209,32 @@ struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
struct inode *inode;
- struct page **orig_pages;
- unsigned int max_pages;
+ struct folio **orig_folios;
+ unsigned int max_folios;
};
static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
{
struct fuse_args_pages *ap = &data->wpa->ia.ap;
struct fuse_conn *fc = get_fuse_conn(data->inode);
- struct page **pages;
- struct fuse_page_desc *descs;
- unsigned int npages = min_t(unsigned int,
- max_t(unsigned int, data->max_pages * 2,
- FUSE_DEFAULT_MAX_PAGES_PER_REQ),
+ struct folio **folios;
+ struct fuse_folio_desc *descs;
+ unsigned int nfolios = min_t(unsigned int,
+ max_t(unsigned int, data->max_folios * 2,
+ FUSE_DEFAULT_MAX_PAGES_PER_REQ),
fc->max_pages);
- WARN_ON(npages <= data->max_pages);
+ WARN_ON(nfolios <= data->max_folios);
- pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
- if (!pages)
+ folios = fuse_folios_alloc(nfolios, GFP_NOFS, &descs);
+ if (!folios)
return false;
- memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
- memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
- kfree(ap->pages);
- ap->pages = pages;
+ memcpy(folios, ap->folios, sizeof(struct folio *) * ap->num_folios);
+ memcpy(descs, ap->descs, sizeof(struct fuse_folio_desc) * ap->num_folios);
+ kfree(ap->folios);
+ ap->folios = folios;
ap->descs = descs;
- data->max_pages = npages;
+ data->max_folios = nfolios;
return true;
}
@@ -2178,7 +2244,7 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
struct fuse_writepage_args *wpa = data->wpa;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- int num_pages = wpa->ia.ap.num_pages;
+ int num_folios = wpa->ia.ap.num_folios;
int i;
spin_lock(&fi->lock);
@@ -2186,8 +2252,8 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
- for (i = 0; i < num_pages; i++)
- end_page_writeback(data->orig_pages[i]);
+ for (i = 0; i < num_folios; i++)
+ folio_end_writeback(data->orig_folios[i]);
}
/*
@@ -2198,15 +2264,15 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
* swapping the new temp page with the old one.
*/
static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
- struct page *page)
+ struct folio *folio)
{
struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
struct fuse_writepage_args *tmp;
struct fuse_writepage_args *old_wpa;
struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
- WARN_ON(new_ap->num_pages != 0);
- new_ap->num_pages = 1;
+ WARN_ON(new_ap->num_folios != 0);
+ new_ap->num_folios = 1;
spin_lock(&fi->lock);
old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
@@ -2220,9 +2286,9 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
WARN_ON(tmp->inode != new_wpa->inode);
curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
- if (curr_index == page->index) {
- WARN_ON(tmp->ia.ap.num_pages != 1);
- swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
+ if (curr_index == folio->index) {
+ WARN_ON(tmp->ia.ap.num_folios != 1);
+ swap(tmp->ia.ap.folios[0], new_ap->folios[0]);
break;
}
}
@@ -2235,18 +2301,19 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
spin_unlock(&fi->lock);
if (tmp) {
- fuse_writepage_finish_stat(new_wpa->inode, new_ap->pages[0]);
+ fuse_writepage_finish_stat(new_wpa->inode,
+ folio);
fuse_writepage_free(new_wpa);
}
return false;
}
-static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
+static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
struct fuse_args_pages *ap,
struct fuse_fill_wb_data *data)
{
- WARN_ON(!ap->num_pages);
+ WARN_ON(!ap->num_folios);
/*
* Being under writeback is unlikely but possible. For example direct
@@ -2254,23 +2321,23 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
* the pages are faulted with get_user_pages(), and then after the read
* completed.
*/
- if (fuse_page_is_writeback(data->inode, page->index))
+ if (fuse_folio_is_writeback(data->inode, folio))
return true;
/* Reached max pages */
- if (ap->num_pages == fc->max_pages)
+ if (ap->num_folios == fc->max_pages)
return true;
/* Reached max write bytes */
- if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
+ if ((ap->num_folios + 1) * PAGE_SIZE > fc->max_write)
return true;
/* Discontinuity */
- if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
+ if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio_index(folio))
return true;
/* Need to grow the pages array? If so, did the expansion fail? */
- if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
+ if (ap->num_folios == data->max_folios && !fuse_pages_realloc(data))
return true;
return false;
@@ -2295,7 +2362,7 @@ static int fuse_writepages_fill(struct folio *folio,
goto out_unlock;
}
- if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
+ if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) {
fuse_writepages_send(data);
data->wpa = NULL;
}
@@ -2314,7 +2381,7 @@ static int fuse_writepages_fill(struct folio *folio,
* This is ensured by holding the page lock in page_mkwrite() while
* checking fuse_page_is_writeback(). We already hold the page lock
* since clear_page_dirty_for_io() and keep it held until we add the
- * request to the fi->writepages list and increment ap->num_pages.
+ * request to the fi->writepages list and increment ap->num_folios.
* After this fuse_page_is_writeback() will indicate that the page is
* under writeback, so we can release the page lock.
*/
@@ -2326,13 +2393,13 @@ static int fuse_writepages_fill(struct folio *folio,
goto out_unlock;
}
fuse_file_get(wpa->ia.ff);
- data->max_pages = 1;
+ data->max_folios = 1;
ap = &wpa->ia.ap;
}
folio_start_writeback(folio);
- fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_pages);
- data->orig_pages[ap->num_pages] = &folio->page;
+ fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_folios);
+ data->orig_folios[ap->num_folios] = folio;
err = 0;
if (data->wpa) {
@@ -2341,9 +2408,9 @@ static int fuse_writepages_fill(struct folio *folio,
* fuse_page_is_writeback().
*/
spin_lock(&fi->lock);
- ap->num_pages++;
+ ap->num_folios++;
spin_unlock(&fi->lock);
- } else if (fuse_writepage_add(wpa, &folio->page)) {
+ } else if (fuse_writepage_add(wpa, folio)) {
data->wpa = wpa;
} else {
folio_end_writeback(folio);
@@ -2375,21 +2442,21 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
- data.orig_pages = kcalloc(fc->max_pages,
- sizeof(struct page *),
- GFP_NOFS);
- if (!data.orig_pages)
+ data.orig_folios = kcalloc(fc->max_pages,
+ sizeof(struct folio *),
+ GFP_NOFS);
+ if (!data.orig_folios)
goto out;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.wpa) {
- WARN_ON(!data.wpa->ia.ap.num_pages);
+ WARN_ON(!data.wpa->ia.ap.num_folios);
fuse_writepages_send(&data);
}
if (data.ff)
fuse_file_put(data.ff, false);
- kfree(data.orig_pages);
+ kfree(data.orig_folios);
out:
return err;
}
@@ -2429,7 +2496,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
folio_zero_segment(folio, 0, off);
goto success;
}
- err = fuse_do_readpage(file, &folio->page);
+ err = fuse_do_readfolio(file, folio);
if (err)
goto cleanup;
success:
@@ -2518,17 +2585,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
*/
static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
file_update_time(vmf->vma->vm_file);
- lock_page(page);
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
+ folio_lock(folio);
+ if (folio->mapping != inode->i_mapping) {
+ folio_unlock(folio);
return VM_FAULT_NOPAGE;
}
- fuse_wait_on_page_writeback(inode, page->index);
+ fuse_wait_on_folio_writeback(inode, folio);
return VM_FAULT_LOCKED;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e6cc3d552b13..74744c6f2860 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -35,9 +35,6 @@
/** Default max number of pages that can be used in a single read request */
#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
-/** Maximum of max_pages received in init_out */
-#define FUSE_MAX_MAX_PAGES 256
-
/** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN
@@ -47,6 +44,9 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
+/** Maximum of max_pages received in init_out */
+extern unsigned int fuse_max_pages_limit;
+
/** List of active connections */
extern struct list_head fuse_conn_list;
@@ -285,8 +285,8 @@ struct fuse_arg {
void *value;
};
-/** FUSE page descriptor */
-struct fuse_page_desc {
+/** FUSE folio descriptor */
+struct fuse_folio_desc {
unsigned int length;
unsigned int offset;
};
@@ -309,16 +309,19 @@ struct fuse_args {
bool may_block:1;
bool is_ext:1;
bool is_pinned:1;
+ bool invalidate_vmap:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
+ /* Used for kvec iter backed by vmalloc address */
+ void *vmap_base;
};
struct fuse_args_pages {
struct fuse_args args;
- struct page **pages;
- struct fuse_page_desc *descs;
- unsigned int num_pages;
+ struct folio **folios;
+ struct fuse_folio_desc *descs;
+ unsigned int num_folios;
};
struct fuse_release_args {
@@ -857,6 +860,9 @@ struct fuse_conn {
/** Passthrough support for read/write IO */
unsigned int passthrough:1;
+ /* Use pages instead of pointer for kernel I/O */
+ unsigned int use_pages_for_kvec_io:1;
+
/** Maximum stack depth for passthrough backing files */
int max_stack_depth;
@@ -884,6 +890,9 @@ struct fuse_conn {
/** Version counter for attribute changes */
atomic64_t attr_version;
+ /** Version counter for evict inode */
+ atomic64_t evict_ctr;
+
/** Called on final put */
void (*release)(struct fuse_conn *);
@@ -978,6 +987,11 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
return atomic64_read(&fc->attr_version);
}
+static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc)
+{
+ return atomic64_read(&fc->evict_ctr);
+}
+
static inline bool fuse_stale_inode(const struct inode *inode, int generation,
struct fuse_attr *attr)
{
@@ -995,25 +1009,25 @@ static inline bool fuse_is_bad(struct inode *inode)
return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
}
-static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
- struct fuse_page_desc **desc)
+static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags,
+ struct fuse_folio_desc **desc)
{
- struct page **pages;
+ struct folio **folios;
- pages = kzalloc(npages * (sizeof(struct page *) +
- sizeof(struct fuse_page_desc)), flags);
- *desc = (void *) (pages + npages);
+ folios = kzalloc(nfolios * (sizeof(struct folio *) +
+ sizeof(struct fuse_folio_desc)), flags);
+ *desc = (void *) (folios + nfolios);
- return pages;
+ return folios;
}
-static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
- unsigned int index,
- unsigned int nr_pages)
+static inline void fuse_folio_descs_length_init(struct fuse_folio_desc *descs,
+ unsigned int index,
+ unsigned int nr_folios)
{
int i;
- for (i = index; i < index + nr_pages; i++)
+ for (i = index; i < index + nr_folios; i++)
descs[i].length = PAGE_SIZE - descs[i].offset;
}
@@ -1037,7 +1051,8 @@ extern const struct dentry_operations fuse_root_dentry_operations;
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
- u64 attr_valid, u64 attr_version);
+ u64 attr_valid, u64 attr_version,
+ u64 evict_ctr);
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode);
@@ -1062,7 +1077,7 @@ struct fuse_io_args {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
- bool page_locked;
+ bool folio_locked;
} write;
};
struct fuse_args_pages ap;
@@ -1127,7 +1142,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
- u64 attr_valid, u32 cache_mask);
+ u64 attr_valid, u32 cache_mask,
+ u64 evict_ctr);
u32 fuse_get_cache_mask(struct inode *inode);
@@ -1480,4 +1496,12 @@ ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
+#ifdef CONFIG_SYSCTL
+extern int fuse_sysctl_register(void);
+extern void fuse_sysctl_unregister(void);
+#else
+#define fuse_sysctl_register() (0)
+#define fuse_sysctl_unregister() do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd3321e29a3e..3ce4f4e81d09 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -35,6 +35,8 @@ DEFINE_MUTEX(fuse_mutex);
static int set_global_limit(const char *val, const struct kernel_param *kp);
+unsigned int fuse_max_pages_limit = 256;
+
unsigned max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
&max_user_bgreq, 0644);
@@ -173,6 +175,14 @@ static void fuse_evict_inode(struct inode *inode)
fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
fi->submount_lookup = NULL;
}
+ /*
+ * Evict of non-deleted inode may race with outstanding
+ * LOOKUP/READDIRPLUS requests and result in inconsistency when
+ * the request finishes. Deal with that here by bumping a
+ * counter that can be compared to the starting value.
+ */
+ if (inode->i_nlink > 0)
+ atomic64_inc(&fc->evict_ctr);
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(fi->iocachectr != 0);
@@ -206,17 +216,30 @@ static ino_t fuse_squash_ino(u64 ino64)
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
- u64 attr_valid, u32 cache_mask)
+ u64 attr_valid, u32 cache_mask,
+ u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
lockdep_assert_held(&fi->lock);
+ /*
+ * Clear basic stats from invalid mask.
+ *
+ * Don't do this if this is coming from a fuse_iget() call and there
+ * might have been a racing evict which would've invalidated the result
+ * if the attr_version would've been preserved.
+ *
+ * !evict_ctr -> this is create
+ * fi->attr_version != 0 -> this is not a new inode
+ * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
+ */
+ if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
+ set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
+
fi->attr_version = atomic64_inc_return(&fc->attr_version);
fi->i_time = attr_valid;
- /* Clear basic stats from invalid mask */
- set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -295,9 +318,9 @@ u32 fuse_get_cache_mask(struct inode *inode)
return STATX_MTIME | STATX_CTIME | STATX_SIZE;
}
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
- struct fuse_statx *sx,
- u64 attr_valid, u64 attr_version)
+static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx, u64 attr_valid,
+ u64 attr_version, u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -331,7 +354,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
}
old_mtime = inode_get_mtime(inode);
- fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
+ fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask,
+ evict_ctr);
oldsize = inode->i_size;
/*
@@ -372,6 +396,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx, u64 attr_valid,
+ u64 attr_version)
+{
+ fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0);
+}
+
static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
u64 nodeid)
{
@@ -426,7 +457,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
- u64 attr_valid, u64 attr_version)
+ u64 attr_valid, u64 attr_version,
+ u64 evict_ctr)
{
struct inode *inode;
struct fuse_inode *fi;
@@ -487,8 +519,8 @@ retry:
fi->nlookup++;
spin_unlock(&fi->lock);
done:
- fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
-
+ fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version,
+ evict_ctr);
return inode;
}
@@ -940,11 +972,12 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->initialized = 0;
fc->connected = 1;
atomic64_set(&fc->attr_version, 1);
+ atomic64_set(&fc->evict_ctr, 1);
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
- fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
+ fc->max_pages_limit = fuse_max_pages_limit;
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
fuse_backing_files_init(fc);
@@ -1001,7 +1034,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
attr.mode = mode;
attr.ino = FUSE_ROOT_ID;
attr.nlink = 1;
- return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
+ return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
}
struct fuse_inode_handle {
@@ -1610,7 +1643,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
return -ENOMEM;
fuse_fill_attr_from_inode(&root_attr, parent_fi);
- root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
+ root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
+ fuse_get_evict_ctr(fm->fc));
/*
* This inode is just a duplicate, so it is not looked up and
* its nlookup should not be incremented. fuse_iget() does
@@ -2063,8 +2097,14 @@ static int __init fuse_fs_init(void)
if (err)
goto out3;
+ err = fuse_sysctl_register();
+ if (err)
+ goto out4;
+
return 0;
+ out4:
+ unregister_filesystem(&fuse_fs_type);
out3:
unregister_fuseblk();
out2:
@@ -2075,6 +2115,7 @@ static int __init fuse_fs_init(void)
static void fuse_fs_cleanup(void)
{
+ fuse_sysctl_unregister();
unregister_filesystem(&fuse_fs_type);
unregister_fuseblk();
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index 572ce8a82ceb..2d9abf48828f 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -10,6 +10,8 @@
#include <linux/fileattr.h>
#include <linux/fsverity.h>
+#define FUSE_VERITY_ENABLE_ARG_MAX_PAGES 256
+
static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_ioctl_out *outarg)
{
@@ -140,7 +142,7 @@ static int fuse_setup_enable_verity(unsigned long arg, struct iovec *iov,
{
struct fsverity_enable_arg enable;
struct fsverity_enable_arg __user *uarg = (void __user *)arg;
- const __u32 max_buffer_len = FUSE_MAX_MAX_PAGES * PAGE_SIZE;
+ const __u32 max_buffer_len = FUSE_VERITY_ENABLE_ARG_MAX_PAGES * PAGE_SIZE;
if (copy_from_user(&enable, uarg, sizeof(enable)))
return -EFAULT;
@@ -249,12 +251,12 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
- ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
+ ap.folios = fuse_folios_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
- if (!ap.pages || !iov_page)
+ if (!ap.folios || !iov_page)
goto out;
- fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
+ fuse_folio_descs_length_init(ap.descs, 0, fm->fc->max_pages);
/*
* If restricted, initialize IO parameters as encoded in @cmd.
@@ -304,14 +306,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -ENOMEM;
if (max_pages > fm->fc->max_pages)
goto out;
- while (ap.num_pages < max_pages) {
- ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- if (!ap.pages[ap.num_pages])
+ while (ap.num_folios < max_pages) {
+ ap.folios[ap.num_folios] = folio_alloc(GFP_KERNEL | __GFP_HIGHMEM, 0);
+ if (!ap.folios[ap.num_folios])
goto out;
- ap.num_pages++;
+ ap.num_folios++;
}
-
/* okay, let's send it to the client */
ap.args.opcode = FUSE_IOCTL;
ap.args.nodeid = ff->nodeid;
@@ -325,8 +326,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size);
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
- c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) {
+ c = copy_folio_from_iter(ap.folios[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
@@ -364,7 +365,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
goto out;
- vaddr = kmap_local_page(ap.pages[0]);
+ vaddr = kmap_local_folio(ap.folios[0], 0);
err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
transferred, in_iovs + out_iovs,
(flags & FUSE_IOCTL_COMPAT) != 0);
@@ -392,17 +393,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred);
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
- c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) {
+ c = copy_folio_to_iter(ap.folios[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
err = 0;
out:
free_page((unsigned long) iov_page);
- while (ap.num_pages)
- __free_page(ap.pages[--ap.num_pages]);
- kfree(ap.pages);
+ while (ap.num_folios)
+ folio_put(ap.folios[--ap.num_folios]);
+ kfree(ap.folios);
return err ? err : outarg.result;
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 0377b6dc24c8..17ce9636a2b1 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -149,7 +149,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
- u64 attr_version)
+ u64 attr_version, u64 evict_ctr)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
@@ -233,7 +233,7 @@ retry:
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, ATTR_TIMEOUT(o),
- attr_version);
+ attr_version, evict_ctr);
if (!inode)
inode = ERR_PTR(-ENOMEM);
@@ -284,7 +284,8 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
- struct dir_context *ctx, u64 attr_version)
+ struct dir_context *ctx, u64 attr_version,
+ u64 evict_ctr)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
@@ -319,7 +320,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
buf += reclen;
nbytes -= reclen;
- ret = fuse_direntplus_link(file, direntplus, attr_version);
+ ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
@@ -331,26 +332,27 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus;
ssize_t res;
- struct page *page;
+ struct folio *folio;
struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
- struct fuse_page_desc desc = { .length = PAGE_SIZE };
- u64 attr_version = 0;
+ struct fuse_folio_desc desc = { .length = PAGE_SIZE };
+ u64 attr_version = 0, evict_ctr = 0;
bool locked;
- page = alloc_page(GFP_KERNEL);
- if (!page)
+ folio = folio_alloc(GFP_KERNEL, 0);
+ if (!folio)
return -ENOMEM;
plus = fuse_use_readdirplus(inode, ctx);
ap->args.out_pages = true;
- ap->num_pages = 1;
- ap->pages = &page;
+ ap->num_folios = 1;
+ ap->folios = &folio;
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
+ evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
@@ -367,15 +369,16 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
- res = parse_dirplusfile(page_address(page), res,
- file, ctx, attr_version);
+ res = parse_dirplusfile(folio_address(folio), res,
+ file, ctx, attr_version,
+ evict_ctr);
} else {
- res = parse_dirfile(page_address(page), res, file,
+ res = parse_dirfile(folio_address(folio), res, file,
ctx);
}
}
- __free_page(page);
+ folio_put(folio);
fuse_invalidate_atime(inode);
return res;
}
diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c
new file mode 100644
index 000000000000..b272bb333005
--- /dev/null
+++ b/fs/fuse/sysctl.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/fs/fuse/fuse_sysctl.c
+ *
+ * Sysctl interface to fuse parameters
+ */
+#include <linux/sysctl.h>
+
+#include "fuse_i.h"
+
+static struct ctl_table_header *fuse_table_header;
+
+/* Bound by fuse_init_out max_pages, which is a u16 */
+static unsigned int sysctl_fuse_max_pages_limit = 65535;
+
+static struct ctl_table fuse_sysctl_table[] = {
+ {
+ .procname = "max_pages_limit",
+ .data = &fuse_max_pages_limit,
+ .maxlen = sizeof(fuse_max_pages_limit),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &sysctl_fuse_max_pages_limit,
+ },
+};
+
+int fuse_sysctl_register(void)
+{
+ fuse_table_header = register_sysctl("fs/fuse", fuse_sysctl_table);
+ if (!fuse_table_header)
+ return -ENOMEM;
+ return 0;
+}
+
+void fuse_sysctl_unregister(void)
+{
+ unregister_sysctl_table(fuse_table_header);
+ fuse_table_header = NULL;
+}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 6404a189e989..d88d3fc5306a 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -97,7 +97,8 @@ struct virtio_fs_req_work {
};
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req, bool in_flight);
+ struct fuse_req *req, bool in_flight,
+ gfp_t gfp);
static const struct constant_table dax_param_enums[] = {
{"always", FUSE_DAX_ALWAYS },
@@ -575,6 +576,8 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
/* Dispatch pending requests */
while (1) {
+ unsigned int flags;
+
spin_lock(&fsvq->lock);
req = list_first_entry_or_null(&fsvq->queued_reqs,
struct fuse_req, list);
@@ -585,7 +588,9 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
list_del_init(&req->list);
spin_unlock(&fsvq->lock);
- ret = virtio_fs_enqueue_req(fsvq, req, true);
+ flags = memalloc_nofs_save();
+ ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL);
+ memalloc_nofs_restore(flags);
if (ret < 0) {
if (ret == -ENOSPC) {
spin_lock(&fsvq->lock);
@@ -686,7 +691,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
}
/* Allocate and copy args into req->argbuf */
-static int copy_args_to_argbuf(struct fuse_req *req)
+static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp)
{
struct fuse_args *args = req->args;
unsigned int offset = 0;
@@ -700,7 +705,7 @@ static int copy_args_to_argbuf(struct fuse_req *req)
len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
fuse_len_args(num_out, args->out_args);
- req->argbuf = kmalloc(len, GFP_ATOMIC);
+ req->argbuf = kmalloc(len, gfp);
if (!req->argbuf)
return -ENOMEM;
@@ -760,7 +765,7 @@ static void virtio_fs_request_complete(struct fuse_req *req,
struct fuse_args *args;
struct fuse_args_pages *ap;
unsigned int len, i, thislen;
- struct page *page;
+ struct folio *folio;
/*
* TODO verify that server properly follows FUSE protocol
@@ -772,12 +777,12 @@ static void virtio_fs_request_complete(struct fuse_req *req,
if (args->out_pages && args->page_zeroing) {
len = args->out_args[args->out_numargs - 1].size;
ap = container_of(args, typeof(*ap), args);
- for (i = 0; i < ap->num_pages; i++) {
+ for (i = 0; i < ap->num_folios; i++) {
thislen = ap->descs[i].length;
if (len < thislen) {
WARN_ON(ap->descs[i].offset);
- page = ap->pages[i];
- zero_user_segment(page, len, thislen);
+ folio = ap->folios[i];
+ folio_zero_segment(folio, len, thislen);
len = 0;
} else {
len -= thislen;
@@ -1267,15 +1272,15 @@ static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *r
}
/* Count number of scatter-gather elements required */
-static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
- unsigned int num_pages,
- unsigned int total_len)
+static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs,
+ unsigned int num_folios,
+ unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
- for (i = 0; i < num_pages && total_len; i++) {
- this_len = min(page_descs[i].length, total_len);
+ for (i = 0; i < num_folios && total_len; i++) {
+ this_len = min(folio_descs[i].length, total_len);
total_len -= this_len;
}
@@ -1294,8 +1299,8 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->in_pages) {
size = args->in_args[args->in_numargs - 1].size;
- total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
- size);
+ total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
+ size);
}
if (!test_bit(FR_ISREPLY, &req->flags))
@@ -1308,27 +1313,27 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->out_pages) {
size = args->out_args[args->out_numargs - 1].size;
- total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
- size);
+ total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
+ size);
}
return total_sgs;
}
-/* Add pages to scatter-gather list and return number of elements used */
-static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
- struct page **pages,
- struct fuse_page_desc *page_descs,
- unsigned int num_pages,
- unsigned int total_len)
+/* Add folios to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_folios(struct scatterlist *sg,
+ struct folio **folios,
+ struct fuse_folio_desc *folio_descs,
+ unsigned int num_folios,
+ unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
- for (i = 0; i < num_pages && total_len; i++) {
+ for (i = 0; i < num_folios && total_len; i++) {
sg_init_table(&sg[i], 1);
- this_len = min(page_descs[i].length, total_len);
- sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
+ this_len = min(folio_descs[i].length, total_len);
+ sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset);
total_len -= this_len;
}
@@ -1353,10 +1358,10 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
sg_init_one(&sg[total_sgs++], argbuf, len);
if (argpages)
- total_sgs += sg_init_fuse_pages(&sg[total_sgs],
- ap->pages, ap->descs,
- ap->num_pages,
- args[numargs - 1].size);
+ total_sgs += sg_init_fuse_folios(&sg[total_sgs],
+ ap->folios, ap->descs,
+ ap->num_folios,
+ args[numargs - 1].size);
if (len_used)
*len_used = len;
@@ -1366,7 +1371,8 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req, bool in_flight)
+ struct fuse_req *req, bool in_flight,
+ gfp_t gfp)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@@ -1387,8 +1393,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
if (total_sgs > ARRAY_SIZE(stack_sgs)) {
- sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
- sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
+ sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp);
+ sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp);
if (!sgs || !sg) {
ret = -ENOMEM;
goto out;
@@ -1396,7 +1402,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
}
/* Use a bounce buffer since stack args cannot be mapped */
- ret = copy_args_to_argbuf(req);
+ ret = copy_args_to_argbuf(req, gfp);
if (ret < 0)
goto out;
@@ -1490,7 +1496,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
queue_id);
fsvq = &fs->vqs[queue_id];
- ret = virtio_fs_enqueue_req(fsvq, req, false);
+ ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC);
if (ret < 0) {
if (ret == -ENOSPC) {
/*
@@ -1691,6 +1697,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
fc->delete_stale = true;
fc->auto_submounts = true;
fc->sync_fs = true;
+ fc->use_pages_for_kvec_io = true;
/* Tell FUSE to split requests that exceed the virtqueue's size */
fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2bbf73eb53e7..c39c4945946c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2550,6 +2550,7 @@ struct kvec;
struct page *get_dump_page(unsigned long addr);
bool folio_mark_dirty(struct folio *folio);
+bool folio_mark_dirty_lock(struct folio *folio);
bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 80746182e9e8..1d1832e2a599 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -52,6 +52,12 @@ bool set_page_dirty(struct page *page)
}
EXPORT_SYMBOL(set_page_dirty);
+int set_page_dirty_lock(struct page *page)
+{
+ return folio_mark_dirty_lock(page_folio(page));
+}
+EXPORT_SYMBOL(set_page_dirty_lock);
+
bool clear_page_dirty_for_io(struct page *page)
{
return folio_clear_dirty_for_io(page_folio(page));
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index fdb89ce85fff..d213ead95675 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2925,25 +2925,25 @@ bool folio_mark_dirty(struct folio *folio)
EXPORT_SYMBOL(folio_mark_dirty);
/*
- * set_page_dirty() is racy if the caller has no reference against
- * page->mapping->host, and if the page is unlocked. This is because another
- * CPU could truncate the page off the mapping and then free the mapping.
+ * folio_mark_dirty() is racy if the caller has no reference against
+ * folio->mapping->host, and if the folio is unlocked. This is because another
+ * CPU could truncate the folio off the mapping and then free the mapping.
*
- * Usually, the page _is_ locked, or the caller is a user-space process which
+ * Usually, the folio _is_ locked, or the caller is a user-space process which
* holds a reference on the inode by having an open file.
*
- * In other cases, the page should be locked before running set_page_dirty().
+ * In other cases, the folio should be locked before running folio_mark_dirty().
*/
-int set_page_dirty_lock(struct page *page)
+bool folio_mark_dirty_lock(struct folio *folio)
{
- int ret;
+ bool ret;
- lock_page(page);
- ret = set_page_dirty(page);
- unlock_page(page);
+ folio_lock(folio);
+ ret = folio_mark_dirty(folio);
+ folio_unlock(folio);
return ret;
}
-EXPORT_SYMBOL(set_page_dirty_lock);
+EXPORT_SYMBOL(folio_mark_dirty_lock);
/*
* This cancels just the dirty bit on the kernel page itself, it does NOT