summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 08:50:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 08:50:57 -0700
commitda315f6e03988a7127680bbc26e1028991b899b8 (patch)
treefc4a41dbb22ac0a7a2f683dc5c5c6789957ce5e8
parent1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (diff)
parent543b8f8662fe6d21f19958b666ab0051af9db21a (diff)
downloadlwn-da315f6e03988a7127680bbc26e1028991b899b8.tar.gz
lwn-da315f6e03988a7127680bbc26e1028991b899b8.zip
Merge tag 'fuse-update-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: "The most interesting part of this update is user namespace support, mostly done by Eric Biederman. This enables safe unprivileged fuse mounts within a user namespace. There are also a couple of fixes for bugs found by syzbot and miscellaneous fixes and cleanups" * tag 'fuse-update-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: don't keep dead fuse_conn at fuse_fill_super(). fuse: fix control dir setup and teardown fuse: fix congested state leak on aborted connections fuse: Allow fully unprivileged mounts fuse: Ensure posix acls are translated outside of init_user_ns fuse: add writeback documentation fuse: honor AT_STATX_FORCE_SYNC fuse: honor AT_STATX_DONT_SYNC fuse: Restrict allow_other to the superblock's namespace or a descendant fuse: Support fuse filesystems outside of init_user_ns fuse: Fail all requests with invalid uids or gids fuse: Remove the buggy retranslation of pids in fuse_dev_do_read fuse: return -ECONNABORTED on /dev/fuse read after abort fuse: atomic_o_trunc should truncate pagecache
-rw-r--r--Documentation/filesystems/fuse-io.txt38
-rw-r--r--fs/fuse/acl.c4
-rw-r--r--fs/fuse/control.c15
-rw-r--r--fs/fuse/cuse.c11
-rw-r--r--fs/fuse/dev.c43
-rw-r--r--fs/fuse/dir.c45
-rw-r--r--fs/fuse/fuse_i.h15
-rw-r--r--fs/fuse/inode.c50
-rw-r--r--fs/fuse/xattr.c43
-rw-r--r--include/uapi/linux/fuse.h7
-rw-r--r--kernel/user_namespace.c1
11 files changed, 209 insertions, 63 deletions
diff --git a/Documentation/filesystems/fuse-io.txt b/Documentation/filesystems/fuse-io.txt
new file mode 100644
index 000000000000..07b8f73f100f
--- /dev/null
+++ b/Documentation/filesystems/fuse-io.txt
@@ -0,0 +1,38 @@
+Fuse supports the following I/O modes:
+
+- direct-io
+- cached
+ + write-through
+ + writeback-cache
+
+The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the
+FUSE_OPEN reply.
+
+In direct-io mode the page cache is completely bypassed for reads and writes.
+No read-ahead takes place. Shared mmap is disabled.
+
+In cached mode reads may be satisfied from the page cache, and data may be
+read-ahead by the kernel to fill the cache. The cache is always kept consistent
+after any writes to the file. All mmap modes are supported.
+
+The cached mode has two sub modes controlling how writes are handled. The
+write-through mode is the default and is supported on all kernels. The
+writeback-cache mode may be selected by the FUSE_WRITEBACK_CACHE flag in the
+FUSE_INIT reply.
+
+In write-through mode each write is immediately sent to userspace as one or more
+WRITE requests, as well as updating any cached pages (and caching previously
+uncached, but fully written pages). No READ requests are ever sent for writes,
+so when an uncached page is partially written, the page is discarded.
+
+In writeback-cache mode (enabled by the FUSE_WRITEBACK_CACHE flag) writes go to
+the cache only, which means that the write(2) syscall can often complete very
+fast. Dirty pages are written back implicitly (background writeback or page
+reclaim on memory pressure) or explicitly (invoked by close(2), fsync(2) and
+when the last ref to the file is being released on munmap(2)). This mode
+assumes that all changes to the filesystem go through the FUSE kernel module
+(size and atime/ctime/mtime attributes are kept up-to-date by the kernel), so
+it's generally not suitable for network filesystems. If a partial page is
+written, then the page needs to be first read from userspace. This means, that
+even for files opened for O_WRONLY it is possible that READ requests will be
+generated by the kernel.
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index ec85765502f1..5a48cee6d7d3 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -34,7 +34,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
return ERR_PTR(-ENOMEM);
size = fuse_getxattr(inode, name, value, PAGE_SIZE);
if (size > 0)
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ acl = posix_acl_from_xattr(fc->user_ns, value, size);
else if ((size == 0) || (size == -ENODATA) ||
(size == -EOPNOTSUPP && fc->no_getxattr))
acl = NULL;
@@ -81,7 +81,7 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (!value)
return -ENOMEM;
- ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+ ret = posix_acl_to_xattr(fc->user_ns, acl, value, size);
if (ret < 0) {
kfree(value);
return ret;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index b9ea99c5b5b3..0b694655d988 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -35,7 +35,7 @@ static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
{
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
- fuse_abort_conn(fc);
+ fuse_abort_conn(fc, true);
fuse_conn_put(fc);
}
return count;
@@ -211,10 +211,11 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
if (!dentry)
return NULL;
- fc->ctl_dentry[fc->ctl_ndents++] = dentry;
inode = new_inode(fuse_control_sb);
- if (!inode)
+ if (!inode) {
+ dput(dentry);
return NULL;
+ }
inode->i_ino = get_next_ino();
inode->i_mode = mode;
@@ -228,6 +229,9 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
set_nlink(inode, nlink);
inode->i_private = fc;
d_add(dentry, inode);
+
+ fc->ctl_dentry[fc->ctl_ndents++] = dentry;
+
return dentry;
}
@@ -284,7 +288,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
for (i = fc->ctl_ndents - 1; i >= 0; i--) {
struct dentry *dentry = fc->ctl_dentry[i];
d_inode(dentry)->i_private = NULL;
- d_drop(dentry);
+ if (!i) {
+ /* Get rid of submounts: */
+ d_invalidate(dentry);
+ }
dput(dentry);
}
drop_nlink(d_inode(fuse_control_sb->s_root));
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index e9e97803442a..8f68181256c0 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -48,6 +48,7 @@
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
+#include <linux/user_namespace.h>
#include "fuse_i.h"
@@ -406,7 +407,7 @@ err_unlock:
err_region:
unregister_chrdev_region(devt, 1);
err:
- fuse_abort_conn(fc);
+ fuse_abort_conn(fc, false);
goto out;
}
@@ -498,7 +499,11 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
if (!cc)
return -ENOMEM;
- fuse_conn_init(&cc->fc);
+ /*
+ * Limit the cuse channel to requests that can
+ * be represented in file->f_cred->user_ns.
+ */
+ fuse_conn_init(&cc->fc, file->f_cred->user_ns);
fud = fuse_dev_alloc(&cc->fc);
if (!fud) {
@@ -581,7 +586,7 @@ static ssize_t cuse_class_abort_store(struct device *dev,
{
struct cuse_conn *cc = dev_get_drvdata(dev);
- fuse_abort_conn(&cc->fc);
+ fuse_abort_conn(&cc->fc, false);
return count;
}
static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5d06384c2cae..e03ca14f40e9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -112,13 +112,6 @@ static void __fuse_put_request(struct fuse_req *req)
refcount_dec(&req->count);
}
-static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
-{
- req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
- req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
- req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
-}
-
void fuse_set_initialized(struct fuse_conn *fc)
{
/* Make sure stores before this are seen on another CPU */
@@ -163,11 +156,19 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
goto out;
}
- fuse_req_init_context(fc, req);
+ req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
+ req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
+ req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
+
__set_bit(FR_WAITING, &req->flags);
if (for_background)
__set_bit(FR_BACKGROUND, &req->flags);
+ if (unlikely(req->in.h.uid == ((uid_t)-1) ||
+ req->in.h.gid == ((gid_t)-1))) {
+ fuse_put_request(fc, req);
+ return ERR_PTR(-EOVERFLOW);
+ }
return req;
out:
@@ -256,7 +257,10 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
if (!req)
req = get_reserved_req(fc, file);
- fuse_req_init_context(fc, req);
+ req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
+ req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
+ req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
+
__set_bit(FR_WAITING, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
return req;
@@ -381,8 +385,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq);
- if (fc->num_background == fc->congestion_threshold &&
- fc->connected && fc->sb) {
+ if (fc->num_background == fc->congestion_threshold && fc->sb) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
}
@@ -1234,9 +1237,10 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
if (err)
goto err_unlock;
- err = -ENODEV;
- if (!fiq->connected)
+ if (!fiq->connected) {
+ err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
goto err_unlock;
+ }
if (!list_empty(&fiq->interrupts)) {
req = list_entry(fiq->interrupts.next, struct fuse_req,
@@ -1260,12 +1264,6 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
in = &req->in;
reqsize = in->h.len;
- if (task_active_pid_ns(current) != fc->pid_ns) {
- rcu_read_lock();
- in->h.pid = pid_vnr(find_pid_ns(in->h.pid, fc->pid_ns));
- rcu_read_unlock();
- }
-
/* If request is too large, reply with an error and restart the read */
if (nbytes < reqsize) {
req->out.h.error = -EIO;
@@ -1287,7 +1285,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
spin_lock(&fpq->lock);
clear_bit(FR_LOCKED, &req->flags);
if (!fpq->connected) {
- err = -ENODEV;
+ err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
goto out_end;
}
if (err) {
@@ -2076,7 +2074,7 @@ static void end_polls(struct fuse_conn *fc)
* is OK, the request will in that case be removed from the list before we touch
* it.
*/
-void fuse_abort_conn(struct fuse_conn *fc)
+void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
{
struct fuse_iqueue *fiq = &fc->iq;
@@ -2089,6 +2087,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
fc->connected = 0;
fc->blocked = 0;
+ fc->aborted = is_abort;
fuse_set_initialized(fc);
list_for_each_entry(fud, &fc->devices, entry) {
struct fuse_pqueue *fpq = &fud->pq;
@@ -2151,7 +2150,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
/* Are we the last open device? */
if (atomic_dec_and_test(&fc->dev_count)) {
WARN_ON(fc->iq.fasync != NULL);
- fuse_abort_conn(fc);
+ fuse_abort_conn(fc, false);
}
fuse_dev_free(fud);
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 24967382a7b1..56231b31f806 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -858,8 +858,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
stat->ino = attr->ino;
stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
stat->nlink = attr->nlink;
- stat->uid = make_kuid(&init_user_ns, attr->uid);
- stat->gid = make_kgid(&init_user_ns, attr->gid);
+ stat->uid = make_kuid(fc->user_ns, attr->uid);
+ stat->gid = make_kgid(fc->user_ns, attr->gid);
stat->rdev = inode->i_rdev;
stat->atime.tv_sec = attr->atime;
stat->atime.tv_nsec = attr->atimensec;
@@ -924,12 +924,20 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
}
static int fuse_update_get_attr(struct inode *inode, struct file *file,
- struct kstat *stat)
+ struct kstat *stat, unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0;
+ bool sync;
- if (time_before64(fi->i_time, get_jiffies_64())) {
+ if (flags & AT_STATX_FORCE_SYNC)
+ sync = true;
+ else if (flags & AT_STATX_DONT_SYNC)
+ sync = false;
+ else
+ sync = time_before64(fi->i_time, get_jiffies_64());
+
+ if (sync) {
forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else if (stat) {
@@ -943,7 +951,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
int fuse_update_attributes(struct inode *inode, struct file *file)
{
- return fuse_update_get_attr(inode, file, NULL);
+ return fuse_update_get_attr(inode, file, NULL, 0);
}
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
@@ -1030,7 +1038,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
const struct cred *cred;
if (fc->allow_other)
- return 1;
+ return current_in_userns(fc->user_ns);
cred = current_cred();
if (uid_eq(cred->euid, fc->user_id) &&
@@ -1475,17 +1483,17 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
return true;
}
-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
- bool trust_local_cmtime)
+static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
+ struct fuse_setattr_in *arg, bool trust_local_cmtime)
{
unsigned ivalid = iattr->ia_valid;
if (ivalid & ATTR_MODE)
arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
if (ivalid & ATTR_UID)
- arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
+ arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
if (ivalid & ATTR_GID)
- arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
+ arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
if (ivalid & ATTR_ATIME) {
@@ -1629,8 +1637,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
return err;
if (attr->ia_valid & ATTR_OPEN) {
- if (fc->atomic_o_trunc)
+ /* This is coming from open(..., ... | O_TRUNC); */
+ WARN_ON(!(attr->ia_valid & ATTR_SIZE));
+ WARN_ON(attr->ia_size != 0);
+ if (fc->atomic_o_trunc) {
+ /*
+ * No need to send request to userspace, since actual
+ * truncation has already been done by OPEN. But still
+ * need to truncate page cache.
+ */
+ i_size_write(inode, 0);
+ truncate_pagecache(inode, 0);
return 0;
+ }
file = NULL;
}
@@ -1646,7 +1665,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
- iattr_to_fattr(attr, &inarg, trust_local_cmtime);
+ iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
if (file) {
struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH;
@@ -1783,7 +1802,7 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
if (!fuse_allow_current_process(fc))
return -EACCES;
- return fuse_update_get_attr(inode, NULL, stat);
+ return fuse_update_get_attr(inode, NULL, stat, flags);
}
static const struct inode_operations fuse_dir_inode_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c4c093bbf456..5256ad333b05 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -26,6 +26,7 @@
#include <linux/xattr.h>
#include <linux/pid_namespace.h>
#include <linux/refcount.h>
+#include <linux/user_namespace.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -466,6 +467,9 @@ struct fuse_conn {
/** The pid namespace for this mount */
struct pid_namespace *pid_ns;
+ /** The user namespace for this mount */
+ struct user_namespace *user_ns;
+
/** Maximum read size */
unsigned max_read;
@@ -515,6 +519,9 @@ struct fuse_conn {
abort and device release */
unsigned connected;
+ /** Connection aborted via sysfs */
+ bool aborted;
+
/** Connection failed (version mismatch). Cannot race with
setting other bitfields since it is only set once in INIT
reply, before any other request, and never cleared */
@@ -526,6 +533,9 @@ struct fuse_conn {
/** Do readpages asynchronously? Only set in INIT */
unsigned async_read:1;
+ /** Return an unique read error after abort. Only set in INIT */
+ unsigned abort_err:1;
+
/** Do not send separate SETATTR request before open(O_TRUNC) */
unsigned atomic_o_trunc:1;
@@ -851,7 +861,7 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
struct fuse_req *req);
/* Abort all requests */
-void fuse_abort_conn(struct fuse_conn *fc);
+void fuse_abort_conn(struct fuse_conn *fc, bool is_abort);
/**
* Invalidate inode attributes
@@ -870,7 +880,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
/**
* Initialize fuse_conn
*/
-void fuse_conn_init(struct fuse_conn *fc);
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
/**
* Release reference to fuse_conn
@@ -975,6 +985,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
int fuse_removexattr(struct inode *inode, const char *name);
extern const struct xattr_handler *fuse_xattr_handlers[];
extern const struct xattr_handler *fuse_acl_xattr_handlers[];
+extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
struct posix_acl;
struct posix_acl *fuse_get_acl(struct inode *inode, int type);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ef309958e060..ffcaf98044b9 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -171,8 +171,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
set_nlink(inode, attr->nlink);
- inode->i_uid = make_kuid(&init_user_ns, attr->uid);
- inode->i_gid = make_kgid(&init_user_ns, attr->gid);
+ inode->i_uid = make_kuid(fc->user_ns, attr->uid);
+ inode->i_gid = make_kgid(fc->user_ns, attr->gid);
inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec;
@@ -371,7 +371,7 @@ void fuse_unlock_inode(struct inode *inode)
static void fuse_umount_begin(struct super_block *sb)
{
- fuse_abort_conn(get_fuse_conn_super(sb));
+ fuse_abort_conn(get_fuse_conn_super(sb), false);
}
static void fuse_send_destroy(struct fuse_conn *fc)
@@ -393,7 +393,7 @@ static void fuse_put_super(struct super_block *sb)
fuse_send_destroy(fc);
- fuse_abort_conn(fc);
+ fuse_abort_conn(fc, false);
mutex_lock(&fuse_mutex);
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
@@ -477,7 +477,8 @@ static int fuse_match_uint(substring_t *s, unsigned int *res)
return err;
}
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
+ struct user_namespace *user_ns)
{
char *p;
memset(d, 0, sizeof(struct fuse_mount_data));
@@ -513,7 +514,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
case OPT_USER_ID:
if (fuse_match_uint(&args[0], &uv))
return 0;
- d->user_id = make_kuid(current_user_ns(), uv);
+ d->user_id = make_kuid(user_ns, uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
@@ -522,7 +523,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
case OPT_GROUP_ID:
if (fuse_match_uint(&args[0], &uv))
return 0;
- d->group_id = make_kgid(current_user_ns(), uv);
+ d->group_id = make_kgid(user_ns, uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
@@ -565,8 +566,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
struct super_block *sb = root->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
- seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
- seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
+ seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
+ seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
if (fc->default_permissions)
seq_puts(m, ",default_permissions");
if (fc->allow_other)
@@ -597,7 +598,7 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
fpq->connected = 1;
}
-void fuse_conn_init(struct fuse_conn *fc)
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
{
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
@@ -621,6 +622,7 @@ void fuse_conn_init(struct fuse_conn *fc)
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ fc->user_ns = get_user_ns(user_ns);
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -630,6 +632,7 @@ void fuse_conn_put(struct fuse_conn *fc)
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
put_pid_ns(fc->pid_ns);
+ put_user_ns(fc->user_ns);
fc->release(fc);
}
}
@@ -918,6 +921,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->posix_acl = 1;
fc->sb->s_xattr = fuse_acl_xattr_handlers;
}
+ if (arg->flags & FUSE_ABORT_ERROR)
+ fc->abort_err = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -948,7 +953,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
- FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL;
+ FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
+ FUSE_ABORT_ERROR;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1061,7 +1067,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
- if (!parse_fuse_opt(data, &d, is_bdev))
+ if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
goto err;
if (is_bdev) {
@@ -1089,16 +1095,27 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!file)
goto err;
- if ((file->f_op != &fuse_dev_operations) ||
- (file->f_cred->user_ns != &init_user_ns))
+ /*
+ * Require mount to happen from the same user namespace which
+ * opened /dev/fuse to prevent potential attacks.
+ */
+ if (file->f_op != &fuse_dev_operations ||
+ file->f_cred->user_ns != sb->s_user_ns)
goto err_fput;
+ /*
+ * If we are not in the initial user namespace posix
+ * acls must be translated.
+ */
+ if (sb->s_user_ns != &init_user_ns)
+ sb->s_xattr = fuse_no_acl_xattr_handlers;
+
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
err = -ENOMEM;
if (!fc)
goto err_fput;
- fuse_conn_init(fc);
+ fuse_conn_init(fc, sb->s_user_ns);
fc->release = fuse_free_conn;
fud = fuse_dev_alloc(fc);
@@ -1179,6 +1196,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fuse_dev_free(fud);
err_put_conn:
fuse_conn_put(fc);
+ sb->s_fs_info = NULL;
err_fput:
fput(file);
err:
@@ -1208,7 +1226,7 @@ static void fuse_kill_sb_anon(struct super_block *sb)
static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
- .fs_flags = FS_HAS_SUBTYPE,
+ .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
.mount = fuse_mount,
.kill_sb = fuse_kill_sb_anon,
};
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 3caac46b08b0..433717640f78 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -192,6 +192,26 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
return fuse_setxattr(inode, name, value, size, flags);
}
+static bool no_xattr_list(struct dentry *dentry)
+{
+ return false;
+}
+
+static int no_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+
+static int no_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *nodee,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
static const struct xattr_handler fuse_xattr_handler = {
.prefix = "",
.get = fuse_xattr_get,
@@ -209,3 +229,26 @@ const struct xattr_handler *fuse_acl_xattr_handlers[] = {
&fuse_xattr_handler,
NULL
};
+
+static const struct xattr_handler fuse_no_acl_access_xattr_handler = {
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .list = no_xattr_list,
+ .get = no_xattr_get,
+ .set = no_xattr_set,
+};
+
+static const struct xattr_handler fuse_no_acl_default_xattr_handler = {
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
+ .flags = ACL_TYPE_ACCESS,
+ .list = no_xattr_list,
+ .get = no_xattr_get,
+ .set = no_xattr_set,
+};
+
+const struct xattr_handler *fuse_no_acl_xattr_handlers[] = {
+ &fuse_no_acl_access_xattr_handler,
+ &fuse_no_acl_default_xattr_handler,
+ &fuse_xattr_handler,
+ NULL
+};
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 4b5001c57f46..92fa24c24c92 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -113,6 +113,9 @@
* 7.26
* - add FUSE_HANDLE_KILLPRIV
* - add FUSE_POSIX_ACL
+ *
+ * 7.27
+ * - add FUSE_ABORT_ERROR
*/
#ifndef _LINUX_FUSE_H
@@ -148,7 +151,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 26
+#define FUSE_KERNEL_MINOR_VERSION 27
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -245,6 +248,7 @@ struct fuse_file_lock {
* FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
* FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
* FUSE_POSIX_ACL: filesystem supports posix acls
+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -267,6 +271,7 @@ struct fuse_file_lock {
#define FUSE_PARALLEL_DIROPS (1 << 18)
#define FUSE_HANDLE_KILLPRIV (1 << 19)
#define FUSE_POSIX_ACL (1 << 20)
+#define FUSE_ABORT_ERROR (1 << 21)
/**
* CUSE INIT request/reply flags
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 246d4d4ce5c7..492c255e6c5a 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1235,6 +1235,7 @@ bool current_in_userns(const struct user_namespace *target_ns)
{
return in_userns(target_ns, current_user_ns());
}
+EXPORT_SYMBOL(current_in_userns);
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
{