summaryrefslogtreecommitdiff
path: root/fs/fuse/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse/dev.c')
-rw-r--r--fs/fuse/dev.c669
1 files changed, 443 insertions, 226 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 51e31df4c546..5dda7080f4a9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -23,8 +23,8 @@
#include <linux/swap.h>
#include <linux/splice.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
-#define CREATE_TRACE_POINTS
#include "fuse_trace.h"
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
@@ -32,6 +32,100 @@ MODULE_ALIAS("devname:fuse");
static struct kmem_cache *fuse_req_cachep;
+const unsigned long fuse_timeout_timer_freq =
+ secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ);
+
+bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list)
+{
+ struct fuse_req *req;
+
+ req = list_first_entry_or_null(list, struct fuse_req, list);
+ if (!req)
+ return false;
+ return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
+}
+
+static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
+{
+ int i;
+
+ for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+ if (fuse_request_expired(fc, &processing[i]))
+ return true;
+
+ return false;
+}
+
+/*
+ * Check if any requests aren't being completed by the time the request timeout
+ * elapses. To do so, we:
+ * - check the fiq pending list
+ * - check the bg queue
+ * - check the fpq io and processing lists
+ *
+ * To make this fast, we only check against the head request on each list since
+ * these are generally queued in order of creation time (eg newer requests get
+ * queued to the tail). We might miss a few edge cases (eg requests transitioning
+ * between lists, re-sent requests at the head of the pending list having a
+ * later creation time than other requests on that list, etc.) but that is fine
+ * since if the request never gets fulfilled, it will eventually be caught.
+ */
+void fuse_check_timeout(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
+ timeout.work);
+ struct fuse_iqueue *fiq = &fc->iq;
+ struct fuse_dev *fud;
+ struct fuse_pqueue *fpq;
+ bool expired = false;
+
+ if (!atomic_read(&fc->num_waiting))
+ goto out;
+
+ spin_lock(&fiq->lock);
+ expired = fuse_request_expired(fc, &fiq->pending);
+ spin_unlock(&fiq->lock);
+ if (expired)
+ goto abort_conn;
+
+ spin_lock(&fc->bg_lock);
+ expired = fuse_request_expired(fc, &fc->bg_queue);
+ spin_unlock(&fc->bg_lock);
+ if (expired)
+ goto abort_conn;
+
+ spin_lock(&fc->lock);
+ if (!fc->connected) {
+ spin_unlock(&fc->lock);
+ return;
+ }
+ list_for_each_entry(fud, &fc->devices, entry) {
+ fpq = &fud->pq;
+ spin_lock(&fpq->lock);
+ if (fuse_request_expired(fc, &fpq->io) ||
+ fuse_fpq_processing_expired(fc, fpq->processing)) {
+ spin_unlock(&fpq->lock);
+ spin_unlock(&fc->lock);
+ goto abort_conn;
+ }
+
+ spin_unlock(&fpq->lock);
+ }
+ spin_unlock(&fc->lock);
+
+ if (fuse_uring_request_expired(fc))
+ goto abort_conn;
+
+out:
+ queue_delayed_work(system_percpu_wq, &fc->timeout.work,
+ fuse_timeout_timer_freq);
+ return;
+
+abort_conn:
+ fuse_abort_conn(fc);
+}
+
static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
{
INIT_LIST_HEAD(&req->list);
@@ -40,6 +134,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
refcount_set(&req->count, 1);
__set_bit(FR_PENDING, &req->flags);
req->fm = fm;
+ req->create_time = jiffies;
}
static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
@@ -111,8 +206,9 @@ static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap,
if (fuse_block_alloc(fc, for_background)) {
err = -EINTR;
- if (wait_event_killable_exclusive(fc->blocked_waitq,
- !fuse_block_alloc(fc, for_background)))
+ if (wait_event_state_exclusive(fc->blocked_waitq,
+ !fuse_block_alloc(fc, for_background),
+ (TASK_KILLABLE | TASK_FREEZABLE)))
goto out;
}
/* Matches smp_wmb() in fuse_set_initialized() */
@@ -226,6 +322,7 @@ unsigned int fuse_req_hash(u64 unique)
{
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
}
+EXPORT_SYMBOL_GPL(fuse_req_hash);
/*
* A new request is available, wake fiq->waitq
@@ -273,12 +370,32 @@ void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
}
}
+static inline void fuse_request_assign_unique_locked(struct fuse_iqueue *fiq,
+ struct fuse_req *req)
+{
+ if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+ req->in.h.unique = fuse_get_unique_locked(fiq);
+
+ /* tracepoint captures in.h.unique and in.h.len */
+ trace_fuse_request_send(req);
+}
+
+inline void fuse_request_assign_unique(struct fuse_iqueue *fiq,
+ struct fuse_req *req)
+{
+ if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+ req->in.h.unique = fuse_get_unique(fiq);
+
+ /* tracepoint captures in.h.unique and in.h.len */
+ trace_fuse_request_send(req);
+}
+EXPORT_SYMBOL_GPL(fuse_request_assign_unique);
+
static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->lock);
if (fiq->connected) {
- if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
- req->in.h.unique = fuse_get_unique_locked(fiq);
+ fuse_request_assign_unique_locked(fiq, req);
list_add_tail(&req->list, &fiq->pending);
fuse_dev_wake_and_unlock(fiq);
} else {
@@ -301,7 +418,6 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
req->in.h.len = sizeof(struct fuse_in_header) +
fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args);
- trace_fuse_request_send(req);
fiq->ops->send_req(fiq, req);
}
@@ -407,6 +523,24 @@ static int queue_interrupt(struct fuse_req *req)
return 0;
}
+bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock)
+{
+ spin_lock(lock);
+ if (test_bit(FR_PENDING, &req->flags)) {
+ /*
+ * FR_PENDING does not get cleared as the request will end
+ * up in destruction anyway.
+ */
+ list_del(&req->list);
+ spin_unlock(lock);
+ __fuse_put_request(req);
+ req->out.h.error = -EINTR;
+ return true;
+ }
+ spin_unlock(lock);
+ return false;
+}
+
static void request_wait_answer(struct fuse_req *req)
{
struct fuse_conn *fc = req->fm->fc;
@@ -428,22 +562,25 @@ static void request_wait_answer(struct fuse_req *req)
}
if (!test_bit(FR_FORCE, &req->flags)) {
+ bool removed;
+
/* Only fatal signals may interrupt this */
err = wait_event_killable(req->waitq,
test_bit(FR_FINISHED, &req->flags));
if (!err)
return;
- spin_lock(&fiq->lock);
- /* Request is not yet in userspace, bail out */
- if (test_bit(FR_PENDING, &req->flags)) {
- list_del(&req->list);
- spin_unlock(&fiq->lock);
- __fuse_put_request(req);
- req->out.h.error = -EINTR;
+ if (req->args->abort_on_kill) {
+ fuse_abort_conn(fc);
return;
}
- spin_unlock(&fiq->lock);
+
+ if (test_bit(FR_URING, &req->flags))
+ removed = fuse_uring_remove_pending_req(req);
+ else
+ removed = fuse_remove_pending_req(req, &fiq->lock);
+ if (removed)
+ return;
}
/*
@@ -544,7 +681,8 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
fuse_force_creds(req);
__set_bit(FR_WAITING, &req->flags);
- __set_bit(FR_FORCE, &req->flags);
+ if (!args->abort_on_kill)
+ __set_bit(FR_FORCE, &req->flags);
} else {
WARN_ON(args->nocreds);
req = fuse_get_req(idmap, fm, false);
@@ -575,10 +713,10 @@ static bool fuse_request_queue_background_uring(struct fuse_conn *fc,
{
struct fuse_iqueue *fiq = &fc->iq;
- req->in.h.unique = fuse_get_unique(fiq);
req->in.h.len = sizeof(struct fuse_in_header) +
fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args);
+ fuse_request_assign_unique(fiq, req);
return fuse_uring_queue_bq_req(req);
}
@@ -705,7 +843,7 @@ static int unlock_request(struct fuse_req *req)
return err;
}
-void fuse_copy_init(struct fuse_copy_state *cs, int write,
+void fuse_copy_init(struct fuse_copy_state *cs, bool write,
struct iov_iter *iter)
{
memset(cs, 0, sizeof(*cs));
@@ -714,7 +852,7 @@ void fuse_copy_init(struct fuse_copy_state *cs, int write,
}
/* Unmap and put previous page of userspace buffer */
-static void fuse_copy_finish(struct fuse_copy_state *cs)
+void fuse_copy_finish(struct fuse_copy_state *cs)
{
if (cs->currbuf) {
struct pipe_buffer *buf = cs->currbuf;
@@ -823,7 +961,7 @@ static int fuse_check_folio(struct folio *folio)
{
if (folio_mapped(folio) ||
folio->mapping != NULL ||
- (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ (folio->flags.f & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_lru |
@@ -844,10 +982,10 @@ static int fuse_check_folio(struct folio *folio)
* folio that was originally in @pagep will lose a reference and the new
* folio returned in @pagep will carry a reference.
*/
-static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop)
{
int err;
- struct folio *oldfolio = page_folio(*pagep);
+ struct folio *oldfolio = *foliop;
struct folio *newfolio;
struct pipe_buffer *buf = cs->pipebufs;
@@ -868,7 +1006,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
cs->pipebufs++;
cs->nr_segs--;
- if (cs->len != PAGE_SIZE)
+ if (cs->len != folio_size(oldfolio))
goto out_fallback;
if (!pipe_buf_try_steal(cs->pipe, buf))
@@ -879,6 +1017,9 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
folio_clear_uptodate(newfolio);
folio_clear_mappedtodisk(newfolio);
+ if (folio_test_large(newfolio))
+ goto out_fallback_unlock;
+
if (fuse_check_folio(newfolio) != 0)
goto out_fallback_unlock;
@@ -914,7 +1055,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (test_bit(FR_ABORTED, &cs->req->flags))
err = -ENOENT;
else
- *pagep = &newfolio->page;
+ *foliop = newfolio;
spin_unlock(&cs->req->waitq.lock);
if (err) {
@@ -947,8 +1088,8 @@ out_fallback:
goto out_put_old;
}
-static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
- unsigned offset, unsigned count)
+static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio,
+ unsigned offset, unsigned count)
{
struct pipe_buffer *buf;
int err;
@@ -956,17 +1097,17 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;
- get_page(page);
+ folio_get(folio);
err = unlock_request(cs->req);
if (err) {
- put_page(page);
+ folio_put(folio);
return err;
}
fuse_copy_finish(cs);
buf = cs->pipebufs;
- buf->page = page;
+ buf->page = &folio->page;
buf->offset = offset;
buf->len = count;
@@ -978,20 +1119,24 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
}
/*
- * Copy a page in the request to/from the userspace buffer. Must be
+ * Copy a folio in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
- unsigned offset, unsigned count, int zeroing)
+static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop,
+ unsigned offset, unsigned count, int zeroing)
{
int err;
- struct page *page = *pagep;
+ struct folio *folio = *foliop;
+ size_t size;
- if (page && zeroing && count < PAGE_SIZE)
- clear_highpage(page);
+ if (folio) {
+ size = folio_size(folio);
+ if (zeroing && count < size)
+ folio_zero_range(folio, 0, size);
+ }
while (count) {
- if (cs->write && cs->pipebufs && page) {
+ if (cs->write && cs->pipebufs && folio) {
/*
* Can't control lifetime of pipe buffers, so always
* copy user pages.
@@ -1001,12 +1146,12 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
if (err)
return err;
} else {
- return fuse_ref_page(cs, page, offset, count);
+ return fuse_ref_folio(cs, folio, offset, count);
}
} else if (!cs->len) {
- if (cs->move_pages && page &&
- offset == 0 && count == PAGE_SIZE) {
- err = fuse_try_move_page(cs, pagep);
+ if (cs->move_folios && folio &&
+ offset == 0 && count == size) {
+ err = fuse_try_move_folio(cs, foliop);
if (err <= 0)
return err;
} else {
@@ -1015,22 +1160,30 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
return err;
}
}
- if (page) {
- void *mapaddr = kmap_local_page(page);
- void *buf = mapaddr + offset;
- offset += fuse_copy_do(cs, &buf, &count);
+ if (folio) {
+ void *mapaddr = kmap_local_folio(folio, offset);
+ void *buf = mapaddr;
+ unsigned int copy = count;
+ unsigned int bytes_copied;
+
+ if (folio_test_highmem(folio) && count > PAGE_SIZE - offset_in_page(offset))
+ copy = PAGE_SIZE - offset_in_page(offset);
+
+ bytes_copied = fuse_copy_do(cs, &buf, &copy);
kunmap_local(mapaddr);
+ offset += bytes_copied;
+ count -= bytes_copied;
} else
offset += fuse_copy_do(cs, NULL, &count);
}
- if (page && !cs->write)
- flush_dcache_page(page);
+ if (folio && !cs->write)
+ flush_dcache_folio(folio);
return 0;
}
-/* Copy pages in the request to/from userspace buffer */
-static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
- int zeroing)
+/* Copy folios in the request to/from userspace buffer */
+static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes,
+ int zeroing)
{
unsigned i;
struct fuse_req *req = cs->req;
@@ -1040,23 +1193,12 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
- struct page *orig, *pagep;
- orig = pagep = &ap->folios[i]->page;
-
- err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
+ err = fuse_copy_folio(cs, &ap->folios[i], offset, count, zeroing);
if (err)
return err;
nbytes -= count;
-
- /*
- * fuse_copy_page may have moved a page from a pipe instead of
- * copying into our given page, so update the folios if it was
- * replaced.
- */
- if (pagep != orig)
- ap->folios[i] = page_folio(pagep);
}
return 0;
}
@@ -1086,7 +1228,7 @@ int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
for (i = 0; !err && i < numargs; i++) {
struct fuse_arg *arg = &args[i];
if (i == numargs - 1 && argpages)
- err = fuse_copy_pages(cs, arg->size, zeroing);
+ err = fuse_copy_folios(cs, arg->size, zeroing);
else
err = fuse_copy_one(cs, arg->value, arg->size);
}
@@ -1406,28 +1548,40 @@ out_end:
static int fuse_dev_open(struct inode *inode, struct file *file)
{
- /*
- * The fuse device's file's private_data is used to hold
- * the fuse_conn(ection) when it is mounted, and is used to
- * keep track of whether the file has been mounted already.
- */
- file->private_data = NULL;
+ struct fuse_dev *fud = fuse_dev_alloc();
+
+ if (!fud)
+ return -ENOMEM;
+
+ file->private_data = fud;
return 0;
}
+struct fuse_dev *fuse_get_dev(struct file *file)
+{
+ struct fuse_dev *fud = fuse_file_to_fud(file);
+ int err;
+
+ err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_fc_get(fud) != NULL);
+ if (err)
+ return ERR_PTR(err);
+
+ return fud;
+}
+
static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
{
struct fuse_copy_state cs;
struct file *file = iocb->ki_filp;
struct fuse_dev *fud = fuse_get_dev(file);
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
if (!user_backed_iter(to))
return -EINVAL;
- fuse_copy_init(&cs, 1, to);
+ fuse_copy_init(&cs, true, to);
return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
}
@@ -1442,15 +1596,14 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct fuse_copy_state cs;
struct fuse_dev *fud = fuse_get_dev(in);
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
- bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
- GFP_KERNEL);
+ bufs = kvmalloc_objs(struct pipe_buffer, pipe->max_usage);
if (!bufs)
return -ENOMEM;
- fuse_copy_init(&cs, 1, NULL);
+ fuse_copy_init(&cs, true, NULL);
cs.pipebufs = bufs;
cs.pipe = pipe;
ret = fuse_dev_do_read(fud, in, &cs, len);
@@ -1487,35 +1640,31 @@ static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_poll_wakeup_out outarg;
- int err = -EINVAL;
+ int err;
if (size != sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
fuse_copy_finish(cs);
return fuse_notify_poll_wakeup(fc, &outarg);
-
-err:
- fuse_copy_finish(cs);
- return err;
}
static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_inval_inode_out outarg;
- int err = -EINVAL;
+ int err;
if (size != sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
fuse_copy_finish(cs);
down_read(&fc->killsb);
@@ -1523,39 +1672,33 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
outarg.off, outarg.len);
up_read(&fc->killsb);
return err;
-
-err:
- fuse_copy_finish(cs);
- return err;
}
static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_inval_entry_out outarg;
- int err = -ENOMEM;
+ int err;
char *buf;
struct qstr name;
- buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
- if (!buf)
- goto err;
-
- err = -EINVAL;
if (size < sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
- err = -ENAMETOOLONG;
- if (outarg.namelen > FUSE_NAME_MAX)
- goto err;
+ if (outarg.namelen > fc->name_max)
+ return -ENAMETOOLONG;
err = -EINVAL;
if (size != sizeof(outarg) + outarg.namelen + 1)
- goto err;
+ return -EINVAL;
+
+ buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
name.name = buf;
name.len = outarg.namelen;
@@ -1568,12 +1711,8 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags);
up_read(&fc->killsb);
- kfree(buf);
- return err;
-
err:
kfree(buf);
- fuse_copy_finish(cs);
return err;
}
@@ -1581,29 +1720,26 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_delete_out outarg;
- int err = -ENOMEM;
+ int err;
char *buf;
struct qstr name;
- buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
- if (!buf)
- goto err;
-
- err = -EINVAL;
if (size < sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
- err = -ENAMETOOLONG;
- if (outarg.namelen > FUSE_NAME_MAX)
- goto err;
+ if (outarg.namelen > fc->name_max)
+ return -ENAMETOOLONG;
- err = -EINVAL;
if (size != sizeof(outarg) + outarg.namelen + 1)
- goto err;
+ return -EINVAL;
+
+ buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
name.name = buf;
name.len = outarg.namelen;
@@ -1616,12 +1752,8 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0);
up_read(&fc->killsb);
- kfree(buf);
- return err;
-
err:
kfree(buf);
- fuse_copy_finish(cs);
return err;
}
@@ -1633,25 +1765,27 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
struct address_space *mapping;
u64 nodeid;
int err;
- pgoff_t index;
- unsigned int offset;
unsigned int num;
loff_t file_size;
+ loff_t pos;
loff_t end;
- err = -EINVAL;
if (size < sizeof(outarg))
- goto out_finish;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto out_finish;
+ return err;
- err = -EINVAL;
if (size - sizeof(outarg) != outarg.size)
- goto out_finish;
+ return -EINVAL;
+
+ if (outarg.offset >= MAX_LFS_FILESIZE)
+ return -EINVAL;
nodeid = outarg.nodeid;
+ pos = outarg.offset;
+ num = min(outarg.size, MAX_LFS_FILESIZE - pos);
down_read(&fc->killsb);
@@ -1661,32 +1795,31 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
goto out_up_killsb;
mapping = inode->i_mapping;
- index = outarg.offset >> PAGE_SHIFT;
- offset = outarg.offset & ~PAGE_MASK;
file_size = i_size_read(inode);
- end = outarg.offset + outarg.size;
+ end = pos + num;
if (end > file_size) {
file_size = end;
- fuse_write_update_attr(inode, file_size, outarg.size);
+ fuse_write_update_attr(inode, file_size, num);
}
- num = outarg.size;
while (num) {
struct folio *folio;
- struct page *page;
- unsigned int this_num;
+ unsigned int folio_offset;
+ unsigned int nr_bytes;
+ pgoff_t index = pos >> PAGE_SHIFT;
folio = filemap_grab_folio(mapping, index);
err = PTR_ERR(folio);
if (IS_ERR(folio))
goto out_iput;
- page = &folio->page;
- this_num = min_t(unsigned, num, folio_size(folio) - offset);
- err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!folio_test_uptodate(folio) && !err && offset == 0 &&
- (this_num == folio_size(folio) || file_size == end)) {
- folio_zero_segment(folio, this_num, folio_size(folio));
+ folio_offset = offset_in_folio(folio, pos);
+ nr_bytes = min(num, folio_size(folio) - folio_offset);
+
+ err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0);
+ if (!folio_test_uptodate(folio) && !err && folio_offset == 0 &&
+ (nr_bytes == folio_size(folio) || file_size == end)) {
+ folio_zero_segment(folio, nr_bytes, folio_size(folio));
folio_mark_uptodate(folio);
}
folio_unlock(folio);
@@ -1695,9 +1828,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (err)
goto out_iput;
- num -= this_num;
- offset = 0;
- index++;
+ pos += nr_bytes;
+ num -= nr_bytes;
}
err = 0;
@@ -1706,8 +1838,6 @@ out_iput:
iput(inode);
out_up_killsb:
up_read(&fc->killsb);
-out_finish:
- fuse_copy_finish(cs);
return err;
}
@@ -1731,29 +1861,30 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
{
int err;
struct address_space *mapping = inode->i_mapping;
- pgoff_t index;
loff_t file_size;
unsigned int num;
unsigned int offset;
size_t total_len = 0;
- unsigned int num_pages, cur_pages = 0;
+ unsigned int num_pages;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
struct fuse_args_pages *ap;
struct fuse_args *args;
+ loff_t pos = outarg->offset;
- offset = outarg->offset & ~PAGE_MASK;
+ offset = offset_in_page(pos);
file_size = i_size_read(inode);
num = min(outarg->size, fc->max_write);
- if (outarg->offset > file_size)
+ if (pos > file_size)
num = 0;
- else if (outarg->offset + num > file_size)
- num = file_size - outarg->offset;
+ else if (num > file_size - pos)
+ num = file_size - pos;
- num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE);
num_pages = min(num_pages, fc->max_pages);
+ num = min(num, num_pages << PAGE_SHIFT);
args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
@@ -1772,27 +1903,27 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
args->in_pages = true;
args->end = fuse_retrieve_end;
- index = outarg->offset >> PAGE_SHIFT;
-
- while (num && cur_pages < num_pages) {
+ while (num && ap->num_folios < num_pages) {
struct folio *folio;
- unsigned int this_num;
+ unsigned int folio_offset;
+ unsigned int nr_bytes;
+ pgoff_t index = pos >> PAGE_SHIFT;
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio))
break;
- this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ folio_offset = offset_in_folio(folio, pos);
+ nr_bytes = min(folio_size(folio) - folio_offset, num);
+
ap->folios[ap->num_folios] = folio;
- ap->descs[ap->num_folios].offset = offset;
- ap->descs[ap->num_folios].length = this_num;
+ ap->descs[ap->num_folios].offset = folio_offset;
+ ap->descs[ap->num_folios].length = nr_bytes;
ap->num_folios++;
- cur_pages++;
- offset = 0;
- num -= this_num;
- total_len += this_num;
- index++;
+ pos += nr_bytes;
+ num -= nr_bytes;
+ total_len += nr_bytes;
}
ra->inarg.offset = outarg->offset;
ra->inarg.size = total_len;
@@ -1817,16 +1948,18 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
u64 nodeid;
int err;
- err = -EINVAL;
if (size != sizeof(outarg))
- goto copy_finish;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto copy_finish;
+ return err;
fuse_copy_finish(cs);
+ if (outarg.offset >= MAX_LFS_FILESIZE)
+ return -EINVAL;
+
down_read(&fc->killsb);
err = -ENOENT;
nodeid = outarg.nodeid;
@@ -1839,10 +1972,6 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
up_read(&fc->killsb);
return err;
-
-copy_finish:
- fuse_copy_finish(cs);
- return err;
}
/*
@@ -1908,11 +2037,68 @@ static int fuse_notify_resend(struct fuse_conn *fc)
return 0;
}
+/*
+ * Increments the fuse connection epoch. This will result of dentries from
+ * previous epochs to be invalidated. Additionally, if inval_wq is set, a work
+ * queue is scheduled to trigger the invalidation.
+ */
+static int fuse_notify_inc_epoch(struct fuse_conn *fc)
+{
+ atomic_inc(&fc->epoch);
+ if (inval_wq)
+ schedule_work(&fc->epoch_work);
+
+ return 0;
+}
+
+static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_prune_out outarg;
+ const unsigned int batch = 512;
+ u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL);
+ unsigned int num, i;
+ int err;
+
+ if (!nodeids)
+ return -ENOMEM;
+
+ if (size < sizeof(outarg))
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ return err;
+
+ if (size - sizeof(outarg) != outarg.count * sizeof(u64))
+ return -EINVAL;
+
+ for (; outarg.count; outarg.count -= num) {
+ num = min(batch, outarg.count);
+ err = fuse_copy_one(cs, nodeids, num * sizeof(u64));
+ if (err)
+ return err;
+
+ scoped_guard(rwsem_read, &fc->killsb) {
+ for (i = 0; i < num; i++)
+ fuse_try_prune_one_inode(fc, nodeids[i]);
+ }
+ }
+ return 0;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
- /* Don't try to move pages (yet) */
- cs->move_pages = 0;
+ /*
+ * Only allow notifications during while the connection is in an
+ * initialized and connected state
+ */
+ if (!fc->initialized || !fc->connected)
+ return -EINVAL;
+
+ /* Don't try to move folios (yet) */
+ cs->move_folios = false;
switch (code) {
case FUSE_NOTIFY_POLL:
@@ -1936,8 +2122,13 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_RESEND:
return fuse_notify_resend(fc);
+ case FUSE_NOTIFY_INC_EPOCH:
+ return fuse_notify_inc_epoch(fc);
+
+ case FUSE_NOTIFY_PRUNE:
+ return fuse_notify_prune(fc, size, cs);
+
default:
- fuse_copy_finish(cs);
return -EINVAL;
}
}
@@ -2017,7 +2208,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
*/
if (!oh.unique) {
err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
- goto out;
+ goto copy_finish;
}
err = -EINVAL;
@@ -2060,7 +2251,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
spin_unlock(&fpq->lock);
cs->req = req;
if (!req->args->page_replace)
- cs->move_pages = 0;
+ cs->move_folios = false;
if (oh.error)
err = nbytes != sizeof(oh) ? -EINVAL : 0;
@@ -2090,7 +2281,7 @@ copy_finish:
static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
{
struct fuse_copy_state cs;
- struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
+ struct fuse_dev *fud = __fuse_get_dev(iocb->ki_filp);
if (!fud)
return -EPERM;
@@ -2098,7 +2289,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
if (!user_backed_iter(from))
return -EINVAL;
- fuse_copy_init(&cs, 0, from);
+ fuse_copy_init(&cs, false, from);
return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
}
@@ -2112,11 +2303,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
unsigned idx;
struct pipe_buffer *bufs;
struct fuse_copy_state cs;
- struct fuse_dev *fud;
+ struct fuse_dev *fud = __fuse_get_dev(out);
size_t rem;
ssize_t ret;
- fud = fuse_get_dev(out);
if (!fud)
return -EPERM;
@@ -2126,7 +2316,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
tail = pipe->tail;
count = pipe_occupancy(head, tail);
- bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
+ bufs = kvmalloc_objs(struct pipe_buffer, count);
if (!bufs) {
pipe_unlock(pipe);
return -ENOMEM;
@@ -2172,13 +2362,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
}
pipe_unlock(pipe);
- fuse_copy_init(&cs, 0, NULL);
+ fuse_copy_init(&cs, false, NULL);
cs.pipebufs = bufs;
cs.nr_segs = nbuf;
cs.pipe = pipe;
if (flags & SPLICE_F_MOVE)
- cs.move_pages = 1;
+ cs.move_folios = true;
ret = fuse_dev_do_write(fud, &cs, len);
@@ -2202,7 +2392,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
struct fuse_iqueue *fiq;
struct fuse_dev *fud = fuse_get_dev(file);
- if (!fud)
+ if (IS_ERR(fud))
return EPOLLERR;
fiq = &fud->fc->iq;
@@ -2255,7 +2445,7 @@ static void end_polls(struct fuse_conn *fc)
* The same effect is usually achievable through killing the filesystem daemon
* and all users of the filesystem. The exception is the combination of an
* asynchronous request and the tricky deadlock (see
- * Documentation/filesystems/fuse.rst).
+ * Documentation/filesystems/fuse/fuse.rst).
*
* Aborting requests under I/O goes as follows: 1: Separate out unlocked
* requests, they should be finished off immediately. Locked requests will be
@@ -2275,6 +2465,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
LIST_HEAD(to_end);
unsigned int i;
+ if (fc->timeout.req_timeout)
+ cancel_delayed_work(&fc->timeout.work);
+
/* Background queuing checks fc->connected under bg_lock */
spin_lock(&fc->bg_lock);
fc->connected = 0;
@@ -2346,13 +2539,15 @@ void fuse_wait_aborted(struct fuse_conn *fc)
int fuse_dev_release(struct inode *inode, struct file *file)
{
- struct fuse_dev *fud = fuse_get_dev(file);
+ struct fuse_dev *fud = fuse_file_to_fud(file);
+ /* Pairs with cmpxchg() in fuse_dev_install() */
+ struct fuse_conn *fc = xchg(&fud->fc, FUSE_DEV_FC_DISCONNECTED);
- if (fud) {
- struct fuse_conn *fc = fud->fc;
+ if (fc) {
struct fuse_pqueue *fpq = &fud->pq;
LIST_HEAD(to_end);
unsigned int i;
+ bool last;
spin_lock(&fpq->lock);
WARN_ON(!list_empty(&fpq->io));
@@ -2362,13 +2557,19 @@ int fuse_dev_release(struct inode *inode, struct file *file)
fuse_dev_end_requests(&to_end);
+ spin_lock(&fc->lock);
+ list_del(&fud->entry);
/* Are we the last open device? */
- if (atomic_dec_and_test(&fc->dev_count)) {
+ last = list_empty(&fc->devices);
+ spin_unlock(&fc->lock);
+
+ if (last) {
WARN_ON(fc->iq.fasync != NULL);
fuse_abort_conn(fc);
}
- fuse_dev_free(fud);
+ fuse_conn_put(fc);
}
+ fuse_dev_put(fud);
return 0;
}
EXPORT_SYMBOL_GPL(fuse_dev_release);
@@ -2377,35 +2578,17 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
{
struct fuse_dev *fud = fuse_get_dev(file);
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
/* No locking - fasync_helper does its own locking */
return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
}
-static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
-{
- struct fuse_dev *fud;
-
- if (new->private_data)
- return -EINVAL;
-
- fud = fuse_dev_alloc_install(fc);
- if (!fud)
- return -ENOMEM;
-
- new->private_data = fud;
- atomic_inc(&fc->dev_count);
-
- return 0;
-}
-
static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
{
- int res;
int oldfd;
- struct fuse_dev *fud = NULL;
+ struct fuse_dev *fud, *new_fud;
if (get_user(oldfd, argp))
return -EFAULT;
@@ -2418,17 +2601,20 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
* Check against file->f_op because CUSE
* uses the same ioctl handler.
*/
- if (fd_file(f)->f_op == file->f_op)
- fud = fuse_get_dev(fd_file(f));
+ if (fd_file(f)->f_op != file->f_op)
+ return -EINVAL;
- res = -EINVAL;
- if (fud) {
- mutex_lock(&fuse_mutex);
- res = fuse_device_clone(fud->fc, file);
- mutex_unlock(&fuse_mutex);
- }
+ fud = fuse_get_dev(fd_file(f));
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
- return res;
+ new_fud = fuse_file_to_fud(file);
+ if (fuse_dev_fc_get(new_fud))
+ return -EINVAL;
+
+ fuse_dev_install(new_fud, fud->fc);
+
+ return 0;
}
static long fuse_dev_ioctl_backing_open(struct file *file,
@@ -2437,8 +2623,8 @@ static long fuse_dev_ioctl_backing_open(struct file *file,
struct fuse_dev *fud = fuse_get_dev(file);
struct fuse_backing_map map;
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
return -EOPNOTSUPP;
@@ -2454,8 +2640,8 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
struct fuse_dev *fud = fuse_get_dev(file);
int backing_id;
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
return -EOPNOTSUPP;
@@ -2466,6 +2652,20 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
return fuse_backing_close(fud->fc, backing_id);
}
+static long fuse_dev_ioctl_sync_init(struct file *file)
+{
+ int err = -EINVAL;
+ struct fuse_dev *fud = fuse_file_to_fud(file);
+
+ mutex_lock(&fuse_mutex);
+ if (!fuse_dev_fc_get(fud)) {
+ fud->sync_init = true;
+ err = 0;
+ }
+ mutex_unlock(&fuse_mutex);
+ return err;
+}
+
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -2481,11 +2681,25 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
case FUSE_DEV_IOC_BACKING_CLOSE:
return fuse_dev_ioctl_backing_close(file, argp);
+ case FUSE_DEV_IOC_SYNC_INIT:
+ return fuse_dev_ioctl_sync_init(file);
+
default:
return -ENOTTY;
}
}
+#ifdef CONFIG_PROC_FS
+static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file)
+{
+ struct fuse_dev *fud = __fuse_get_dev(file);
+ if (!fud)
+ return;
+
+ seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev);
+}
+#endif
+
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.open = fuse_dev_open,
@@ -2501,6 +2715,9 @@ const struct file_operations fuse_dev_operations = {
#ifdef CONFIG_FUSE_IO_URING
.uring_cmd = fuse_uring_cmd,
#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = fuse_dev_show_fdinfo,
+#endif
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);