diff options
Diffstat (limited to 'fs/fuse/dev.c')
| -rw-r--r-- | fs/fuse/dev.c | 669 |
1 files changed, 443 insertions, 226 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 51e31df4c546..5dda7080f4a9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -23,8 +23,8 @@ #include <linux/swap.h> #include <linux/splice.h> #include <linux/sched.h> +#include <linux/seq_file.h> -#define CREATE_TRACE_POINTS #include "fuse_trace.h" MODULE_ALIAS_MISCDEV(FUSE_MINOR); @@ -32,6 +32,100 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; +const unsigned long fuse_timeout_timer_freq = + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); + +bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list) +{ + struct fuse_req *req; + + req = list_first_entry_or_null(list, struct fuse_req, list); + if (!req) + return false; + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); +} + +static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing) +{ + int i; + + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) + if (fuse_request_expired(fc, &processing[i])) + return true; + + return false; +} + +/* + * Check if any requests aren't being completed by the time the request timeout + * elapses. To do so, we: + * - check the fiq pending list + * - check the bg queue + * - check the fpq io and processing lists + * + * To make this fast, we only check against the head request on each list since + * these are generally queued in order of creation time (eg newer requests get + * queued to the tail). We might miss a few edge cases (eg requests transitioning + * between lists, re-sent requests at the head of the pending list having a + * later creation time than other requests on that list, etc.) but that is fine + * since if the request never gets fulfilled, it will eventually be caught. + */ +void fuse_check_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, + timeout.work); + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_dev *fud; + struct fuse_pqueue *fpq; + bool expired = false; + + if (!atomic_read(&fc->num_waiting)) + goto out; + + spin_lock(&fiq->lock); + expired = fuse_request_expired(fc, &fiq->pending); + spin_unlock(&fiq->lock); + if (expired) + goto abort_conn; + + spin_lock(&fc->bg_lock); + expired = fuse_request_expired(fc, &fc->bg_queue); + spin_unlock(&fc->bg_lock); + if (expired) + goto abort_conn; + + spin_lock(&fc->lock); + if (!fc->connected) { + spin_unlock(&fc->lock); + return; + } + list_for_each_entry(fud, &fc->devices, entry) { + fpq = &fud->pq; + spin_lock(&fpq->lock); + if (fuse_request_expired(fc, &fpq->io) || + fuse_fpq_processing_expired(fc, fpq->processing)) { + spin_unlock(&fpq->lock); + spin_unlock(&fc->lock); + goto abort_conn; + } + + spin_unlock(&fpq->lock); + } + spin_unlock(&fc->lock); + + if (fuse_uring_request_expired(fc)) + goto abort_conn; + +out: + queue_delayed_work(system_percpu_wq, &fc->timeout.work, + fuse_timeout_timer_freq); + return; + +abort_conn: + fuse_abort_conn(fc); +} + static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) { INIT_LIST_HEAD(&req->list); @@ -40,6 +134,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) refcount_set(&req->count, 1); __set_bit(FR_PENDING, &req->flags); req->fm = fm; + req->create_time = jiffies; } static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) @@ -111,8 +206,9 @@ static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap, if (fuse_block_alloc(fc, for_background)) { err = -EINTR; - if (wait_event_killable_exclusive(fc->blocked_waitq, - !fuse_block_alloc(fc, for_background))) + if (wait_event_state_exclusive(fc->blocked_waitq, + !fuse_block_alloc(fc, for_background), + (TASK_KILLABLE | TASK_FREEZABLE))) goto out; } /* Matches smp_wmb() in fuse_set_initialized() */ @@ -226,6 +322,7 @@ unsigned int fuse_req_hash(u64 unique) { return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); } +EXPORT_SYMBOL_GPL(fuse_req_hash); /* * A new request is available, wake fiq->waitq @@ -273,12 +370,32 @@ void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) } } +static inline void fuse_request_assign_unique_locked(struct fuse_iqueue *fiq, + struct fuse_req *req) +{ + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) + req->in.h.unique = fuse_get_unique_locked(fiq); + + /* tracepoint captures in.h.unique and in.h.len */ + trace_fuse_request_send(req); +} + +inline void fuse_request_assign_unique(struct fuse_iqueue *fiq, + struct fuse_req *req) +{ + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) + req->in.h.unique = fuse_get_unique(fiq); + + /* tracepoint captures in.h.unique and in.h.len */ + trace_fuse_request_send(req); +} +EXPORT_SYMBOL_GPL(fuse_request_assign_unique); + static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->lock); if (fiq->connected) { - if (req->in.h.opcode != FUSE_NOTIFY_REPLY) - req->in.h.unique = fuse_get_unique_locked(fiq); + fuse_request_assign_unique_locked(fiq, req); list_add_tail(&req->list, &fiq->pending); fuse_dev_wake_and_unlock(fiq); } else { @@ -301,7 +418,6 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); - trace_fuse_request_send(req); fiq->ops->send_req(fiq, req); } @@ -407,6 +523,24 @@ static int queue_interrupt(struct fuse_req *req) return 0; } +bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock) +{ + spin_lock(lock); + if (test_bit(FR_PENDING, &req->flags)) { + /* + * FR_PENDING does not get cleared as the request will end + * up in destruction anyway. + */ + list_del(&req->list); + spin_unlock(lock); + __fuse_put_request(req); + req->out.h.error = -EINTR; + return true; + } + spin_unlock(lock); + return false; +} + static void request_wait_answer(struct fuse_req *req) { struct fuse_conn *fc = req->fm->fc; @@ -428,22 +562,25 @@ static void request_wait_answer(struct fuse_req *req) } if (!test_bit(FR_FORCE, &req->flags)) { + bool removed; + /* Only fatal signals may interrupt this */ err = wait_event_killable(req->waitq, test_bit(FR_FINISHED, &req->flags)); if (!err) return; - spin_lock(&fiq->lock); - /* Request is not yet in userspace, bail out */ - if (test_bit(FR_PENDING, &req->flags)) { - list_del(&req->list); - spin_unlock(&fiq->lock); - __fuse_put_request(req); - req->out.h.error = -EINTR; + if (req->args->abort_on_kill) { + fuse_abort_conn(fc); return; } - spin_unlock(&fiq->lock); + + if (test_bit(FR_URING, &req->flags)) + removed = fuse_uring_remove_pending_req(req); + else + removed = fuse_remove_pending_req(req, &fiq->lock); + if (removed) + return; } /* @@ -544,7 +681,8 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, fuse_force_creds(req); __set_bit(FR_WAITING, &req->flags); - __set_bit(FR_FORCE, &req->flags); + if (!args->abort_on_kill) + __set_bit(FR_FORCE, &req->flags); } else { WARN_ON(args->nocreds); req = fuse_get_req(idmap, fm, false); @@ -575,10 +713,10 @@ static bool fuse_request_queue_background_uring(struct fuse_conn *fc, { struct fuse_iqueue *fiq = &fc->iq; - req->in.h.unique = fuse_get_unique(fiq); req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); + fuse_request_assign_unique(fiq, req); return fuse_uring_queue_bq_req(req); } @@ -705,7 +843,7 @@ static int unlock_request(struct fuse_req *req) return err; } -void fuse_copy_init(struct fuse_copy_state *cs, int write, +void fuse_copy_init(struct fuse_copy_state *cs, bool write, struct iov_iter *iter) { memset(cs, 0, sizeof(*cs)); @@ -714,7 +852,7 @@ void fuse_copy_init(struct fuse_copy_state *cs, int write, } /* Unmap and put previous page of userspace buffer */ -static void fuse_copy_finish(struct fuse_copy_state *cs) +void fuse_copy_finish(struct fuse_copy_state *cs) { if (cs->currbuf) { struct pipe_buffer *buf = cs->currbuf; @@ -823,7 +961,7 @@ static int fuse_check_folio(struct folio *folio) { if (folio_mapped(folio) || folio->mapping != NULL || - (folio->flags & PAGE_FLAGS_CHECK_AT_PREP & + (folio->flags.f & PAGE_FLAGS_CHECK_AT_PREP & ~(1 << PG_locked | 1 << PG_referenced | 1 << PG_lru | @@ -844,10 +982,10 @@ static int fuse_check_folio(struct folio *folio) * folio that was originally in @pagep will lose a reference and the new * folio returned in @pagep will carry a reference. */ -static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) +static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop) { int err; - struct folio *oldfolio = page_folio(*pagep); + struct folio *oldfolio = *foliop; struct folio *newfolio; struct pipe_buffer *buf = cs->pipebufs; @@ -868,7 +1006,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) cs->pipebufs++; cs->nr_segs--; - if (cs->len != PAGE_SIZE) + if (cs->len != folio_size(oldfolio)) goto out_fallback; if (!pipe_buf_try_steal(cs->pipe, buf)) @@ -879,6 +1017,9 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) folio_clear_uptodate(newfolio); folio_clear_mappedtodisk(newfolio); + if (folio_test_large(newfolio)) + goto out_fallback_unlock; + if (fuse_check_folio(newfolio) != 0) goto out_fallback_unlock; @@ -914,7 +1055,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (test_bit(FR_ABORTED, &cs->req->flags)) err = -ENOENT; else - *pagep = &newfolio->page; + *foliop = newfolio; spin_unlock(&cs->req->waitq.lock); if (err) { @@ -947,8 +1088,8 @@ out_fallback: goto out_put_old; } -static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, - unsigned offset, unsigned count) +static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio, + unsigned offset, unsigned count) { struct pipe_buffer *buf; int err; @@ -956,17 +1097,17 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, if (cs->nr_segs >= cs->pipe->max_usage) return -EIO; - get_page(page); + folio_get(folio); err = unlock_request(cs->req); if (err) { - put_page(page); + folio_put(folio); return err; } fuse_copy_finish(cs); buf = cs->pipebufs; - buf->page = page; + buf->page = &folio->page; buf->offset = offset; buf->len = count; @@ -978,20 +1119,24 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, } /* - * Copy a page in the request to/from the userspace buffer. Must be + * Copy a folio in the request to/from the userspace buffer. Must be * done atomically */ -static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, - unsigned offset, unsigned count, int zeroing) +static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, + unsigned offset, unsigned count, int zeroing) { int err; - struct page *page = *pagep; + struct folio *folio = *foliop; + size_t size; - if (page && zeroing && count < PAGE_SIZE) - clear_highpage(page); + if (folio) { + size = folio_size(folio); + if (zeroing && count < size) + folio_zero_range(folio, 0, size); + } while (count) { - if (cs->write && cs->pipebufs && page) { + if (cs->write && cs->pipebufs && folio) { /* * Can't control lifetime of pipe buffers, so always * copy user pages. @@ -1001,12 +1146,12 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, if (err) return err; } else { - return fuse_ref_page(cs, page, offset, count); + return fuse_ref_folio(cs, folio, offset, count); } } else if (!cs->len) { - if (cs->move_pages && page && - offset == 0 && count == PAGE_SIZE) { - err = fuse_try_move_page(cs, pagep); + if (cs->move_folios && folio && + offset == 0 && count == size) { + err = fuse_try_move_folio(cs, foliop); if (err <= 0) return err; } else { @@ -1015,22 +1160,30 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, return err; } } - if (page) { - void *mapaddr = kmap_local_page(page); - void *buf = mapaddr + offset; - offset += fuse_copy_do(cs, &buf, &count); + if (folio) { + void *mapaddr = kmap_local_folio(folio, offset); + void *buf = mapaddr; + unsigned int copy = count; + unsigned int bytes_copied; + + if (folio_test_highmem(folio) && count > PAGE_SIZE - offset_in_page(offset)) + copy = PAGE_SIZE - offset_in_page(offset); + + bytes_copied = fuse_copy_do(cs, &buf, ©); kunmap_local(mapaddr); + offset += bytes_copied; + count -= bytes_copied; } else offset += fuse_copy_do(cs, NULL, &count); } - if (page && !cs->write) - flush_dcache_page(page); + if (folio && !cs->write) + flush_dcache_folio(folio); return 0; } -/* Copy pages in the request to/from userspace buffer */ -static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, - int zeroing) +/* Copy folios in the request to/from userspace buffer */ +static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes, + int zeroing) { unsigned i; struct fuse_req *req = cs->req; @@ -1040,23 +1193,12 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, int err; unsigned int offset = ap->descs[i].offset; unsigned int count = min(nbytes, ap->descs[i].length); - struct page *orig, *pagep; - orig = pagep = &ap->folios[i]->page; - - err = fuse_copy_page(cs, &pagep, offset, count, zeroing); + err = fuse_copy_folio(cs, &ap->folios[i], offset, count, zeroing); if (err) return err; nbytes -= count; - - /* - * fuse_copy_page may have moved a page from a pipe instead of - * copying into our given page, so update the folios if it was - * replaced. - */ - if (pagep != orig) - ap->folios[i] = page_folio(pagep); } return 0; } @@ -1086,7 +1228,7 @@ int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, for (i = 0; !err && i < numargs; i++) { struct fuse_arg *arg = &args[i]; if (i == numargs - 1 && argpages) - err = fuse_copy_pages(cs, arg->size, zeroing); + err = fuse_copy_folios(cs, arg->size, zeroing); else err = fuse_copy_one(cs, arg->value, arg->size); } @@ -1406,28 +1548,40 @@ out_end: static int fuse_dev_open(struct inode *inode, struct file *file) { - /* - * The fuse device's file's private_data is used to hold - * the fuse_conn(ection) when it is mounted, and is used to - * keep track of whether the file has been mounted already. - */ - file->private_data = NULL; + struct fuse_dev *fud = fuse_dev_alloc(); + + if (!fud) + return -ENOMEM; + + file->private_data = fud; return 0; } +struct fuse_dev *fuse_get_dev(struct file *file) +{ + struct fuse_dev *fud = fuse_file_to_fud(file); + int err; + + err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_fc_get(fud) != NULL); + if (err) + return ERR_PTR(err); + + return fud; +} + static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) { struct fuse_copy_state cs; struct file *file = iocb->ki_filp; struct fuse_dev *fud = fuse_get_dev(file); - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); if (!user_backed_iter(to)) return -EINVAL; - fuse_copy_init(&cs, 1, to); + fuse_copy_init(&cs, true, to); return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to)); } @@ -1442,15 +1596,14 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, struct fuse_copy_state cs; struct fuse_dev *fud = fuse_get_dev(in); - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); - bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer), - GFP_KERNEL); + bufs = kvmalloc_objs(struct pipe_buffer, pipe->max_usage); if (!bufs) return -ENOMEM; - fuse_copy_init(&cs, 1, NULL); + fuse_copy_init(&cs, true, NULL); cs.pipebufs = bufs; cs.pipe = pipe; ret = fuse_dev_do_read(fud, in, &cs, len); @@ -1487,35 +1640,31 @@ static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_poll_wakeup_out outarg; - int err = -EINVAL; + int err; if (size != sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; fuse_copy_finish(cs); return fuse_notify_poll_wakeup(fc, &outarg); - -err: - fuse_copy_finish(cs); - return err; } static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_inval_inode_out outarg; - int err = -EINVAL; + int err; if (size != sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; fuse_copy_finish(cs); down_read(&fc->killsb); @@ -1523,39 +1672,33 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, outarg.off, outarg.len); up_read(&fc->killsb); return err; - -err: - fuse_copy_finish(cs); - return err; } static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_inval_entry_out outarg; - int err = -ENOMEM; + int err; char *buf; struct qstr name; - buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); - if (!buf) - goto err; - - err = -EINVAL; if (size < sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; - err = -ENAMETOOLONG; - if (outarg.namelen > FUSE_NAME_MAX) - goto err; + if (outarg.namelen > fc->name_max) + return -ENAMETOOLONG; err = -EINVAL; if (size != sizeof(outarg) + outarg.namelen + 1) - goto err; + return -EINVAL; + + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; name.name = buf; name.len = outarg.namelen; @@ -1568,12 +1711,8 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, down_read(&fc->killsb); err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); up_read(&fc->killsb); - kfree(buf); - return err; - err: kfree(buf); - fuse_copy_finish(cs); return err; } @@ -1581,29 +1720,26 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_delete_out outarg; - int err = -ENOMEM; + int err; char *buf; struct qstr name; - buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); - if (!buf) - goto err; - - err = -EINVAL; if (size < sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; - err = -ENAMETOOLONG; - if (outarg.namelen > FUSE_NAME_MAX) - goto err; + if (outarg.namelen > fc->name_max) + return -ENAMETOOLONG; - err = -EINVAL; if (size != sizeof(outarg) + outarg.namelen + 1) - goto err; + return -EINVAL; + + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; name.name = buf; name.len = outarg.namelen; @@ -1616,12 +1752,8 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, down_read(&fc->killsb); err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); up_read(&fc->killsb); - kfree(buf); - return err; - err: kfree(buf); - fuse_copy_finish(cs); return err; } @@ -1633,25 +1765,27 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, struct address_space *mapping; u64 nodeid; int err; - pgoff_t index; - unsigned int offset; unsigned int num; loff_t file_size; + loff_t pos; loff_t end; - err = -EINVAL; if (size < sizeof(outarg)) - goto out_finish; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto out_finish; + return err; - err = -EINVAL; if (size - sizeof(outarg) != outarg.size) - goto out_finish; + return -EINVAL; + + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; nodeid = outarg.nodeid; + pos = outarg.offset; + num = min(outarg.size, MAX_LFS_FILESIZE - pos); down_read(&fc->killsb); @@ -1661,32 +1795,31 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, goto out_up_killsb; mapping = inode->i_mapping; - index = outarg.offset >> PAGE_SHIFT; - offset = outarg.offset & ~PAGE_MASK; file_size = i_size_read(inode); - end = outarg.offset + outarg.size; + end = pos + num; if (end > file_size) { file_size = end; - fuse_write_update_attr(inode, file_size, outarg.size); + fuse_write_update_attr(inode, file_size, num); } - num = outarg.size; while (num) { struct folio *folio; - struct page *page; - unsigned int this_num; + unsigned int folio_offset; + unsigned int nr_bytes; + pgoff_t index = pos >> PAGE_SHIFT; folio = filemap_grab_folio(mapping, index); err = PTR_ERR(folio); if (IS_ERR(folio)) goto out_iput; - page = &folio->page; - this_num = min_t(unsigned, num, folio_size(folio) - offset); - err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!folio_test_uptodate(folio) && !err && offset == 0 && - (this_num == folio_size(folio) || file_size == end)) { - folio_zero_segment(folio, this_num, folio_size(folio)); + folio_offset = offset_in_folio(folio, pos); + nr_bytes = min(num, folio_size(folio) - folio_offset); + + err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); + if (!folio_test_uptodate(folio) && !err && folio_offset == 0 && + (nr_bytes == folio_size(folio) || file_size == end)) { + folio_zero_segment(folio, nr_bytes, folio_size(folio)); folio_mark_uptodate(folio); } folio_unlock(folio); @@ -1695,9 +1828,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, if (err) goto out_iput; - num -= this_num; - offset = 0; - index++; + pos += nr_bytes; + num -= nr_bytes; } err = 0; @@ -1706,8 +1838,6 @@ out_iput: iput(inode); out_up_killsb: up_read(&fc->killsb); -out_finish: - fuse_copy_finish(cs); return err; } @@ -1731,29 +1861,30 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, { int err; struct address_space *mapping = inode->i_mapping; - pgoff_t index; loff_t file_size; unsigned int num; unsigned int offset; size_t total_len = 0; - unsigned int num_pages, cur_pages = 0; + unsigned int num_pages; struct fuse_conn *fc = fm->fc; struct fuse_retrieve_args *ra; size_t args_size = sizeof(*ra); struct fuse_args_pages *ap; struct fuse_args *args; + loff_t pos = outarg->offset; - offset = outarg->offset & ~PAGE_MASK; + offset = offset_in_page(pos); file_size = i_size_read(inode); num = min(outarg->size, fc->max_write); - if (outarg->offset > file_size) + if (pos > file_size) num = 0; - else if (outarg->offset + num > file_size) - num = file_size - outarg->offset; + else if (num > file_size - pos) + num = file_size - pos; - num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE); num_pages = min(num_pages, fc->max_pages); + num = min(num, num_pages << PAGE_SHIFT); args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0])); @@ -1772,27 +1903,27 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, args->in_pages = true; args->end = fuse_retrieve_end; - index = outarg->offset >> PAGE_SHIFT; - - while (num && cur_pages < num_pages) { + while (num && ap->num_folios < num_pages) { struct folio *folio; - unsigned int this_num; + unsigned int folio_offset; + unsigned int nr_bytes; + pgoff_t index = pos >> PAGE_SHIFT; folio = filemap_get_folio(mapping, index); if (IS_ERR(folio)) break; - this_num = min_t(unsigned, num, PAGE_SIZE - offset); + folio_offset = offset_in_folio(folio, pos); + nr_bytes = min(folio_size(folio) - folio_offset, num); + ap->folios[ap->num_folios] = folio; - ap->descs[ap->num_folios].offset = offset; - ap->descs[ap->num_folios].length = this_num; + ap->descs[ap->num_folios].offset = folio_offset; + ap->descs[ap->num_folios].length = nr_bytes; ap->num_folios++; - cur_pages++; - offset = 0; - num -= this_num; - total_len += this_num; - index++; + pos += nr_bytes; + num -= nr_bytes; + total_len += nr_bytes; } ra->inarg.offset = outarg->offset; ra->inarg.size = total_len; @@ -1817,16 +1948,18 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, u64 nodeid; int err; - err = -EINVAL; if (size != sizeof(outarg)) - goto copy_finish; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto copy_finish; + return err; fuse_copy_finish(cs); + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + down_read(&fc->killsb); err = -ENOENT; nodeid = outarg.nodeid; @@ -1839,10 +1972,6 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, up_read(&fc->killsb); return err; - -copy_finish: - fuse_copy_finish(cs); - return err; } /* @@ -1908,11 +2037,68 @@ static int fuse_notify_resend(struct fuse_conn *fc) return 0; } +/* + * Increments the fuse connection epoch. This will result of dentries from + * previous epochs to be invalidated. Additionally, if inval_wq is set, a work + * queue is scheduled to trigger the invalidation. + */ +static int fuse_notify_inc_epoch(struct fuse_conn *fc) +{ + atomic_inc(&fc->epoch); + if (inval_wq) + schedule_work(&fc->epoch_work); + + return 0; +} + +static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_prune_out outarg; + const unsigned int batch = 512; + u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL); + unsigned int num, i; + int err; + + if (!nodeids) + return -ENOMEM; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (size - sizeof(outarg) != outarg.count * sizeof(u64)) + return -EINVAL; + + for (; outarg.count; outarg.count -= num) { + num = min(batch, outarg.count); + err = fuse_copy_one(cs, nodeids, num * sizeof(u64)); + if (err) + return err; + + scoped_guard(rwsem_read, &fc->killsb) { + for (i = 0; i < num; i++) + fuse_try_prune_one_inode(fc, nodeids[i]); + } + } + return 0; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { - /* Don't try to move pages (yet) */ - cs->move_pages = 0; + /* + * Only allow notifications during while the connection is in an + * initialized and connected state + */ + if (!fc->initialized || !fc->connected) + return -EINVAL; + + /* Don't try to move folios (yet) */ + cs->move_folios = false; switch (code) { case FUSE_NOTIFY_POLL: @@ -1936,8 +2122,13 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_RESEND: return fuse_notify_resend(fc); + case FUSE_NOTIFY_INC_EPOCH: + return fuse_notify_inc_epoch(fc); + + case FUSE_NOTIFY_PRUNE: + return fuse_notify_prune(fc, size, cs); + default: - fuse_copy_finish(cs); return -EINVAL; } } @@ -2017,7 +2208,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, */ if (!oh.unique) { err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); - goto out; + goto copy_finish; } err = -EINVAL; @@ -2060,7 +2251,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, spin_unlock(&fpq->lock); cs->req = req; if (!req->args->page_replace) - cs->move_pages = 0; + cs->move_folios = false; if (oh.error) err = nbytes != sizeof(oh) ? -EINVAL : 0; @@ -2090,7 +2281,7 @@ copy_finish: static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) { struct fuse_copy_state cs; - struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp); + struct fuse_dev *fud = __fuse_get_dev(iocb->ki_filp); if (!fud) return -EPERM; @@ -2098,7 +2289,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) if (!user_backed_iter(from)) return -EINVAL; - fuse_copy_init(&cs, 0, from); + fuse_copy_init(&cs, false, from); return fuse_dev_do_write(fud, &cs, iov_iter_count(from)); } @@ -2112,11 +2303,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, unsigned idx; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_dev *fud; + struct fuse_dev *fud = __fuse_get_dev(out); size_t rem; ssize_t ret; - fud = fuse_get_dev(out); if (!fud) return -EPERM; @@ -2126,7 +2316,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, tail = pipe->tail; count = pipe_occupancy(head, tail); - bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL); + bufs = kvmalloc_objs(struct pipe_buffer, count); if (!bufs) { pipe_unlock(pipe); return -ENOMEM; @@ -2172,13 +2362,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, } pipe_unlock(pipe); - fuse_copy_init(&cs, 0, NULL); + fuse_copy_init(&cs, false, NULL); cs.pipebufs = bufs; cs.nr_segs = nbuf; cs.pipe = pipe; if (flags & SPLICE_F_MOVE) - cs.move_pages = 1; + cs.move_folios = true; ret = fuse_dev_do_write(fud, &cs, len); @@ -2202,7 +2392,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) struct fuse_iqueue *fiq; struct fuse_dev *fud = fuse_get_dev(file); - if (!fud) + if (IS_ERR(fud)) return EPOLLERR; fiq = &fud->fc->iq; @@ -2255,7 +2445,7 @@ static void end_polls(struct fuse_conn *fc) * The same effect is usually achievable through killing the filesystem daemon * and all users of the filesystem. The exception is the combination of an * asynchronous request and the tricky deadlock (see - * Documentation/filesystems/fuse.rst). + * Documentation/filesystems/fuse/fuse.rst). * * Aborting requests under I/O goes as follows: 1: Separate out unlocked * requests, they should be finished off immediately. Locked requests will be @@ -2275,6 +2465,9 @@ void fuse_abort_conn(struct fuse_conn *fc) LIST_HEAD(to_end); unsigned int i; + if (fc->timeout.req_timeout) + cancel_delayed_work(&fc->timeout.work); + /* Background queuing checks fc->connected under bg_lock */ spin_lock(&fc->bg_lock); fc->connected = 0; @@ -2346,13 +2539,15 @@ void fuse_wait_aborted(struct fuse_conn *fc) int fuse_dev_release(struct inode *inode, struct file *file) { - struct fuse_dev *fud = fuse_get_dev(file); + struct fuse_dev *fud = fuse_file_to_fud(file); + /* Pairs with cmpxchg() in fuse_dev_install() */ + struct fuse_conn *fc = xchg(&fud->fc, FUSE_DEV_FC_DISCONNECTED); - if (fud) { - struct fuse_conn *fc = fud->fc; + if (fc) { struct fuse_pqueue *fpq = &fud->pq; LIST_HEAD(to_end); unsigned int i; + bool last; spin_lock(&fpq->lock); WARN_ON(!list_empty(&fpq->io)); @@ -2362,13 +2557,19 @@ int fuse_dev_release(struct inode *inode, struct file *file) fuse_dev_end_requests(&to_end); + spin_lock(&fc->lock); + list_del(&fud->entry); /* Are we the last open device? */ - if (atomic_dec_and_test(&fc->dev_count)) { + last = list_empty(&fc->devices); + spin_unlock(&fc->lock); + + if (last) { WARN_ON(fc->iq.fasync != NULL); fuse_abort_conn(fc); } - fuse_dev_free(fud); + fuse_conn_put(fc); } + fuse_dev_put(fud); return 0; } EXPORT_SYMBOL_GPL(fuse_dev_release); @@ -2377,35 +2578,17 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) { struct fuse_dev *fud = fuse_get_dev(file); - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); /* No locking - fasync_helper does its own locking */ return fasync_helper(fd, file, on, &fud->fc->iq.fasync); } -static int fuse_device_clone(struct fuse_conn *fc, struct file *new) -{ - struct fuse_dev *fud; - - if (new->private_data) - return -EINVAL; - - fud = fuse_dev_alloc_install(fc); - if (!fud) - return -ENOMEM; - - new->private_data = fud; - atomic_inc(&fc->dev_count); - - return 0; -} - static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) { - int res; int oldfd; - struct fuse_dev *fud = NULL; + struct fuse_dev *fud, *new_fud; if (get_user(oldfd, argp)) return -EFAULT; @@ -2418,17 +2601,20 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) * Check against file->f_op because CUSE * uses the same ioctl handler. */ - if (fd_file(f)->f_op == file->f_op) - fud = fuse_get_dev(fd_file(f)); + if (fd_file(f)->f_op != file->f_op) + return -EINVAL; - res = -EINVAL; - if (fud) { - mutex_lock(&fuse_mutex); - res = fuse_device_clone(fud->fc, file); - mutex_unlock(&fuse_mutex); - } + fud = fuse_get_dev(fd_file(f)); + if (IS_ERR(fud)) + return PTR_ERR(fud); - return res; + new_fud = fuse_file_to_fud(file); + if (fuse_dev_fc_get(new_fud)) + return -EINVAL; + + fuse_dev_install(new_fud, fud->fc); + + return 0; } static long fuse_dev_ioctl_backing_open(struct file *file, @@ -2437,8 +2623,8 @@ static long fuse_dev_ioctl_backing_open(struct file *file, struct fuse_dev *fud = fuse_get_dev(file); struct fuse_backing_map map; - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) return -EOPNOTSUPP; @@ -2454,8 +2640,8 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) struct fuse_dev *fud = fuse_get_dev(file); int backing_id; - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) return -EOPNOTSUPP; @@ -2466,6 +2652,20 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) return fuse_backing_close(fud->fc, backing_id); } +static long fuse_dev_ioctl_sync_init(struct file *file) +{ + int err = -EINVAL; + struct fuse_dev *fud = fuse_file_to_fud(file); + + mutex_lock(&fuse_mutex); + if (!fuse_dev_fc_get(fud)) { + fud->sync_init = true; + err = 0; + } + mutex_unlock(&fuse_mutex); + return err; +} + static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2481,11 +2681,25 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, case FUSE_DEV_IOC_BACKING_CLOSE: return fuse_dev_ioctl_backing_close(file, argp); + case FUSE_DEV_IOC_SYNC_INIT: + return fuse_dev_ioctl_sync_init(file); + default: return -ENOTTY; } } +#ifdef CONFIG_PROC_FS +static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file) +{ + struct fuse_dev *fud = __fuse_get_dev(file); + if (!fud) + return; + + seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev); +} +#endif + const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, .open = fuse_dev_open, @@ -2501,6 +2715,9 @@ const struct file_operations fuse_dev_operations = { #ifdef CONFIG_FUSE_IO_URING .uring_cmd = fuse_uring_cmd, #endif +#ifdef CONFIG_PROC_FS + .show_fdinfo = fuse_dev_show_fdinfo, +#endif }; EXPORT_SYMBOL_GPL(fuse_dev_operations); |
