diff options
Diffstat (limited to 'fs')
313 files changed, 4141 insertions, 2639 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 4dac4a0dc5f4..c397c51f80d9 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -17,34 +17,64 @@ #include "v9fs_vfs.h" #include "fid.h" -static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) +static struct posix_acl *v9fs_fid_get_acl(struct p9_fid *fid, const char *name) { ssize_t size; void *value = NULL; struct posix_acl *acl = NULL; size = v9fs_fid_xattr_get(fid, name, NULL, 0); - if (size > 0) { - value = kzalloc(size, GFP_NOFS); - if (!value) - return ERR_PTR(-ENOMEM); - size = v9fs_fid_xattr_get(fid, name, value, size); - if (size > 0) { - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (IS_ERR(acl)) - goto err_out; - } - } else if (size == -ENODATA || size == 0 || - size == -ENOSYS || size == -EOPNOTSUPP) { - acl = NULL; - } else - acl = ERR_PTR(-EIO); - -err_out: + if (size < 0) + return ERR_PTR(size); + if (size == 0) + return ERR_PTR(-ENODATA); + + value = kzalloc(size, GFP_NOFS); + if (!value) + return ERR_PTR(-ENOMEM); + + size = v9fs_fid_xattr_get(fid, name, value, size); + if (size < 0) + acl = ERR_PTR(size); + else if (size == 0) + acl = ERR_PTR(-ENODATA); + else + acl = posix_acl_from_xattr(&init_user_ns, value, size); kfree(value); return acl; } +static struct posix_acl *v9fs_acl_get(struct dentry *dentry, const char *name) +{ + struct p9_fid *fid; + struct posix_acl *acl = NULL; + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return ERR_CAST(fid); + + acl = v9fs_fid_get_acl(fid, name); + p9_fid_put(fid); + return acl; +} + +static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, const char *name) +{ + int retval; + struct posix_acl *acl = NULL; + + acl = v9fs_fid_get_acl(fid, name); + if (!IS_ERR(acl)) + return acl; + + retval = PTR_ERR(acl); + if (retval == -ENODATA || retval == -ENOSYS || retval == -EOPNOTSUPP) + return NULL; + + /* map everything else to -EIO */ + return ERR_PTR(-EIO); +} + int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) { int retval = 0; @@ -89,7 +119,7 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) return acl; } -struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu) +struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu) { struct v9fs_session_info *v9ses; @@ -109,6 +139,112 @@ struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu) } +struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type) +{ + struct v9fs_session_info *v9ses; + + v9ses = v9fs_dentry2v9ses(dentry); + /* We allow set/get/list of acl when access=client is not specified. */ + if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) + return v9fs_acl_get(dentry, posix_acl_xattr_name(type)); + return v9fs_get_cached_acl(d_inode(dentry), type); +} + +int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + struct posix_acl *acl, int type) +{ + int retval; + size_t size = 0; + void *value = NULL; + const char *acl_name; + struct v9fs_session_info *v9ses; + struct inode *inode = d_inode(dentry); + + if (acl) { + retval = posix_acl_valid(inode->i_sb->s_user_ns, acl); + if (retval) + goto err_out; + + size = posix_acl_xattr_size(acl->a_count); + + value = kzalloc(size, GFP_NOFS); + if (!value) { + retval = -ENOMEM; + goto err_out; + } + + retval = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (retval < 0) + goto err_out; + } + + /* + * set the attribute on the remote. Without even looking at the + * xattr value. We leave it to the server to validate + */ + acl_name = posix_acl_xattr_name(type); + v9ses = v9fs_dentry2v9ses(dentry); + if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { + retval = v9fs_xattr_set(dentry, acl_name, value, size, 0); + goto err_out; + } + + if (S_ISLNK(inode->i_mode)) { + retval = -EOPNOTSUPP; + goto err_out; + } + + if (!inode_owner_or_capable(&init_user_ns, inode)) { + retval = -EPERM; + goto err_out; + } + + switch (type) { + case ACL_TYPE_ACCESS: + if (acl) { + struct iattr iattr = {}; + struct posix_acl *acl_mode = acl; + + retval = posix_acl_update_mode(&init_user_ns, inode, + &iattr.ia_mode, + &acl_mode); + if (retval) + goto err_out; + if (!acl_mode) { + /* + * ACL can be represented by the mode bits. + * So don't update ACL below. + */ + kfree(value); + value = NULL; + size = 0; + } + iattr.ia_valid = ATTR_MODE; + /* + * FIXME should we update ctime ? + * What is the following setxattr update the mode ? + */ + v9fs_vfs_setattr_dotl(&init_user_ns, dentry, &iattr); + } + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) { + retval = acl ? -EINVAL : 0; + goto err_out; + } + break; + } + + retval = v9fs_xattr_set(dentry, acl_name, value, size, 0); + if (!retval) + set_cached_acl(inode, type, acl); + +err_out: + kfree(value); + return retval; +} + static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl) { int retval; @@ -207,124 +343,3 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep, *modep = mode; return 0; } - -static int v9fs_xattr_get_acl(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - struct v9fs_session_info *v9ses; - struct posix_acl *acl; - int error; - - v9ses = v9fs_dentry2v9ses(dentry); - /* - * We allow set/get/list of acl when access=client is not specified - */ - if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) - return v9fs_xattr_get(dentry, handler->name, buffer, size); - - acl = v9fs_get_cached_acl(inode, handler->flags); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int v9fs_xattr_set_acl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - int retval; - struct posix_acl *acl; - struct v9fs_session_info *v9ses; - - v9ses = v9fs_dentry2v9ses(dentry); - /* - * set the attribute on the remote. Without even looking at the - * xattr value. We leave it to the server to validate - */ - if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) - return v9fs_xattr_set(dentry, handler->name, value, size, - flags); - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - if (!inode_owner_or_capable(&init_user_ns, inode)) - return -EPERM; - if (value) { - /* update the cached acl value */ - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - else if (acl) { - retval = posix_acl_valid(inode->i_sb->s_user_ns, acl); - if (retval) - goto err_out; - } - } else - acl = NULL; - - switch (handler->flags) { - case ACL_TYPE_ACCESS: - if (acl) { - struct iattr iattr = { 0 }; - struct posix_acl *old_acl = acl; - - retval = posix_acl_update_mode(&init_user_ns, inode, - &iattr.ia_mode, &acl); - if (retval) - goto err_out; - if (!acl) { - /* - * ACL can be represented - * by the mode bits. So don't - * update ACL. - */ - posix_acl_release(old_acl); - value = NULL; - size = 0; - } - iattr.ia_valid = ATTR_MODE; - /* FIXME should we update ctime ? - * What is the following setxattr update the - * mode ? - */ - v9fs_vfs_setattr_dotl(&init_user_ns, dentry, &iattr); - } - break; - case ACL_TYPE_DEFAULT: - if (!S_ISDIR(inode->i_mode)) { - retval = acl ? -EINVAL : 0; - goto err_out; - } - break; - default: - BUG(); - } - retval = v9fs_xattr_set(dentry, handler->name, value, size, flags); - if (!retval) - set_cached_acl(inode, handler->flags, acl); -err_out: - posix_acl_release(acl); - return retval; -} - -const struct xattr_handler v9fs_xattr_acl_access_handler = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - .flags = ACL_TYPE_ACCESS, - .get = v9fs_xattr_get_acl, - .set = v9fs_xattr_set_acl, -}; - -const struct xattr_handler v9fs_xattr_acl_default_handler = { - .name = XATTR_NAME_POSIX_ACL_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .get = v9fs_xattr_get_acl, - .set = v9fs_xattr_set_acl, -}; diff --git a/fs/9p/acl.h b/fs/9p/acl.h index ce5175d463dd..4c60a2bce5de 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -8,8 +8,12 @@ #ifdef CONFIG_9P_FS_POSIX_ACL int v9fs_get_acl(struct inode *inode, struct p9_fid *fid); -struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, +struct posix_acl *v9fs_iop_get_inode_acl(struct inode *inode, int type, bool rcu); +struct posix_acl *v9fs_iop_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type); +int v9fs_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + struct posix_acl *acl, int type); int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid); int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid, struct posix_acl *dacl, struct posix_acl *acl); @@ -17,7 +21,9 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl); void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl); #else +#define v9fs_iop_get_inode_acl NULL #define v9fs_iop_get_acl NULL +#define v9fs_iop_set_acl NULL static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) { return 0; diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 47b9a1122f34..a19891015f19 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -40,7 +40,7 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) size_t len = subreq->len - subreq->transferred; int total, err; - iov_iter_xarray(&to, READ, &rreq->mapping->i_pages, pos, len); + iov_iter_xarray(&to, ITER_DEST, &rreq->mapping->i_pages, pos, len); total = p9_client_read(fid, pos, &to, &err); @@ -172,7 +172,7 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio) len = min_t(loff_t, i_size - start, len); - iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len); + iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len); /* We should have writeback_fid always set */ BUG_ON(!v9inode->writeback_fid); diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 000fbaae9b18..3bb95adc9619 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -109,7 +109,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) struct iov_iter to; int n; - iov_iter_kvec(&to, READ, &kvec, 1, buflen); + iov_iter_kvec(&to, ITER_DEST, &kvec, 1, buflen); n = p9_client_read(file->private_data, ctx->pos, &to, &err); if (err) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 5cfa4b4f070f..03c1743c4aff 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -983,14 +983,18 @@ const struct inode_operations v9fs_dir_inode_operations_dotl = { .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, + .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, + .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_file_inode_operations_dotl = { .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, + .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, + .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_symlink_inode_operations_dotl = { diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 1f9298a4bd42..b6984311e00a 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/uio.h> +#include <linux/posix_acl_xattr.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -24,7 +25,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, struct iov_iter to; int err; - iov_iter_kvec(&to, READ, &kvec, 1, buffer_size); + iov_iter_kvec(&to, ITER_DEST, &kvec, 1, buffer_size); attr_fid = p9_client_xattrwalk(fid, name, &attr_size); if (IS_ERR(attr_fid)) { @@ -109,7 +110,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, struct iov_iter from; int retval, err; - iov_iter_kvec(&from, WRITE, &kvec, 1, value_len); + iov_iter_kvec(&from, ITER_SOURCE, &kvec, 1, value_len); p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", name, value_len, flags); @@ -182,9 +183,9 @@ static struct xattr_handler v9fs_xattr_security_handler = { const struct xattr_handler *v9fs_xattr_handlers[] = { &v9fs_xattr_user_handler, &v9fs_xattr_trusted_handler, -#ifdef CONFIG_9P_FS_POSIX_ACL - &v9fs_xattr_acl_access_handler, - &v9fs_xattr_acl_default_handler, +#ifdef CONFIG_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, #endif #ifdef CONFIG_9P_FS_SECURITY &v9fs_xattr_security_handler, diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index 3e11fc3331eb..b5636e544c8a 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h @@ -11,8 +11,6 @@ #include <net/9p/client.h> extern const struct xattr_handler *v9fs_xattr_handlers[]; -extern const struct xattr_handler v9fs_xattr_acl_access_handler; -extern const struct xattr_handler v9fs_xattr_acl_default_handler; ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, void *buffer, size_t buffer_size); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 0a090d614e76..7dcd59693a0c 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -298,7 +298,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (call->count2 != call->count && call->count2 != 0) return afs_protocol_error(call, afs_eproto_cb_count); call->iter = &call->def_iter; - iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4); + iov_iter_discard(&call->def_iter, ITER_DEST, call->count2 * 3 * 4); call->unmarshall++; fallthrough; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 230c2d19116d..104df2964225 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -305,7 +305,7 @@ expand: req->actual_len = i_size; /* May change */ req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ req->data_version = dvnode->status.data_version; /* May change */ - iov_iter_xarray(&req->def_iter, READ, &dvnode->netfs.inode.i_mapping->i_pages, + iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages, 0, i_size); req->iter = &req->def_iter; diff --git a/fs/afs/file.c b/fs/afs/file.c index d1cfb235c4b9..2eeab57df133 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -324,7 +324,7 @@ static void afs_issue_read(struct netfs_io_subrequest *subreq) fsreq->vnode = vnode; fsreq->iter = &fsreq->def_iter; - iov_iter_xarray(&fsreq->def_iter, READ, + iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &fsreq->vnode->netfs.inode.i_mapping->i_pages, fsreq->pos, fsreq->len); @@ -346,7 +346,7 @@ static int afs_symlink_read_folio(struct file *file, struct folio *folio) fsreq->len = folio_size(folio); fsreq->vnode = vnode; fsreq->iter = &fsreq->def_iter; - iov_iter_xarray(&fsreq->def_iter, READ, &folio->mapping->i_pages, + iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages, fsreq->pos, fsreq->len); ret = afs_fetch_data(fsreq->vnode, fsreq); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c0031a3ab42f..3ac5fcf98d0d 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -167,8 +167,8 @@ responded: clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 723d162078a3..9ba7b68375c9 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1301,7 +1301,7 @@ static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t si call->iov_len = size; call->kvec[0].iov_base = buf; call->kvec[0].iov_len = size; - iov_iter_kvec(&call->def_iter, READ, call->kvec, 1, size); + iov_iter_kvec(&call->def_iter, ITER_DEST, call->kvec, 1, size); } static inline void afs_extract_to_tmp(struct afs_call *call) @@ -1319,7 +1319,7 @@ static inline void afs_extract_to_tmp64(struct afs_call *call) static inline void afs_extract_discard(struct afs_call *call, size_t size) { call->iov_len = size; - iov_iter_discard(&call->def_iter, READ, size); + iov_iter_discard(&call->def_iter, ITER_DEST, size); } static inline void afs_extract_to_buf(struct afs_call *call, size_t size) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index eccc3cd0cb70..c62939e5ea1f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -359,7 +359,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size); + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0); @@ -400,7 +400,7 @@ error_do_abort: RX_USER_ABORT, ret, "KSD"); } else { len = 0; - iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); rxrpc_kernel_recv_data(call->net->socket, rxcall, &msg.msg_iter, &len, false, &call->abort_code, &call->service_id); @@ -485,7 +485,7 @@ static void afs_deliver_to_call(struct afs_call *call) ) { if (state == AFS_CALL_SV_AWAIT_ACK) { len = 0; - iov_iter_kvec(&call->def_iter, READ, NULL, 0, 0); + iov_iter_kvec(&call->def_iter, ITER_DEST, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, call->rxcall, &call->def_iter, &len, false, &remote_abort, @@ -822,7 +822,7 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0); + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, NULL, 0, 0); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -862,7 +862,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; diff --git a/fs/afs/server.c b/fs/afs/server.c index 4981baf97835..b5237206eac3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, if (!server) return; - a = atomic_inc_return(&server->active); + a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); if (unlikely(zero)) diff --git a/fs/afs/write.c b/fs/afs/write.c index 9ebdd36eaf2f..08fd456dde67 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -609,7 +609,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, */ afs_write_to_cache(vnode, start, len, i_size, caching); - iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len); + iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len); ret = afs_store_data(vnode, &iter, start, false); } else { _debug("write discard %x @%llx [%llx]", len, start, i_size); @@ -1000,7 +1000,7 @@ int afs_launder_folio(struct folio *folio) bv[0].bv_page = &folio->page; bv[0].bv_offset = f; bv[0].bv_len = t - f; - iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len); + iov_iter_bvec(&iter, ITER_SOURCE, bv, 1, bv[0].bv_len); trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio); ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true); @@ -1552,7 +1552,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, if (unlikely(!file->f_op->read_iter)) return -EINVAL; - ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); + ret = aio_setup_rw(ITER_DEST, iocb, &iovec, vectored, compat, &iter); if (ret < 0) return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); @@ -1580,7 +1580,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, if (unlikely(!file->f_op->write_iter)) return -EINVAL; - ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); + ret = aio_setup_rw(ITER_SOURCE, iocb, &iovec, vectored, compat, &iter); if (ret < 0) return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 9d1cde8066cf..92737166203f 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -154,7 +154,7 @@ static int bad_inode_tmpfile(struct user_namespace *mnt_userns, } static int bad_inode_set_acl(struct user_namespace *mnt_userns, - struct inode *inode, struct posix_acl *acl, + struct dentry *dentry, struct posix_acl *acl, int type) { return -EIO; @@ -177,7 +177,7 @@ static const struct inode_operations bad_inode_ops = .setattr = bad_inode_setattr, .listxattr = bad_inode_listxattr, .get_link = bad_inode_get_link, - .get_acl = bad_inode_get_acl, + .get_inode_acl = bad_inode_get_acl, .fiemap = bad_inode_fiemap, .update_time = bad_inode_update_time, .atomic_open = bad_inode_atomic_open, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 63c7ebb0da89..de63572a9404 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -248,7 +248,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } while (0) #ifdef ARCH_DLINFO - /* + /* * ARCH_DLINFO must come first so PPC can do its special alignment of * AUXV. * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in @@ -456,13 +456,13 @@ static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr) * * Loads ELF program headers from the binary file elf_file, which has the ELF * header pointed to by elf_ex, into a newly allocated array. The caller is - * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. + * responsible for freeing the allocated data. Returns NULL upon failure. */ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, struct file *elf_file) { struct elf_phdr *elf_phdata = NULL; - int retval, err = -1; + int retval = -1; unsigned int size; /* @@ -484,15 +484,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, /* Read in the program headers */ retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); - if (retval < 0) { - err = retval; - goto out; - } - /* Success! */ - err = 0; out: - if (err) { + if (retval) { kfree(elf_phdata); elf_phdata = NULL; } @@ -911,7 +905,7 @@ static int load_elf_binary(struct linux_binprm *bprm) interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL); if (!interp_elf_ex) { retval = -ENOMEM; - goto out_free_ph; + goto out_free_file; } /* Get the exec headers */ @@ -1020,7 +1014,7 @@ out_free_interp: executable_stack); if (retval < 0) goto out_free_dentry; - + elf_bss = 0; elf_brk = 0; @@ -1043,7 +1037,7 @@ out_free_interp: if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; - + /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ @@ -1166,7 +1160,7 @@ out_free_interp: error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { - retval = IS_ERR((void *)error) ? + retval = IS_ERR_VALUE(error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } @@ -1251,7 +1245,7 @@ out_free_interp: interpreter, load_bias, interp_elf_phdata, &arch_state); - if (!IS_ERR((void *)elf_entry)) { + if (!IS_ERR_VALUE(elf_entry)) { /* * load_elf_interp() returns relocation * adjustment @@ -1260,7 +1254,7 @@ out_free_interp: elf_entry += interp_elf_ex->e_entry; } if (BAD_ADDR(elf_entry)) { - retval = IS_ERR((void *)elf_entry) ? + retval = IS_ERR_VALUE(elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; } @@ -1354,6 +1348,7 @@ out: out_free_dentry: kfree(interp_elf_ex); kfree(interp_elf_phdata); +out_free_file: allow_write_access(interpreter); if (interpreter) fput(interpreter); @@ -1520,7 +1515,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) phdr->p_align = 0; } -static void fill_note(struct memelfnote *note, const char *name, int type, +static void fill_note(struct memelfnote *note, const char *name, int type, unsigned int sz, void *data) { note->name = name; @@ -1723,7 +1718,6 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm return 0; } -#ifdef CORE_DUMP_USE_REGSET #include <linux/regset.h> struct elf_thread_core_info { @@ -1744,6 +1738,7 @@ struct elf_note_info { int thread_notes; }; +#ifdef CORE_DUMP_USE_REGSET /* * When a regset has a writeback hook, we call it on each thread before * dumping user memory. On register window machines, this makes sure the @@ -1823,34 +1818,58 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, return 1; } +#else +static int fill_thread_core_info(struct elf_thread_core_info *t, + const struct user_regset_view *view, + long signr, struct elf_note_info *info) +{ + struct task_struct *p = t->task; + elf_fpregset_t *fpu; + + fill_prstatus(&t->prstatus.common, p, signr); + elf_core_copy_task_regs(p, &t->prstatus.pr_reg); + + fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), + &(t->prstatus)); + info->size += notesize(&t->notes[0]); + + fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL); + if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) { + kfree(fpu); + return 1; + } + + t->prstatus.pr_fpvalid = 1; + fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(*fpu), fpu); + info->size += notesize(&t->notes[1]); + + return 1; +} +#endif static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, struct coredump_params *cprm) { struct task_struct *dump_task = current; - const struct user_regset_view *view = task_user_regset_view(dump_task); + const struct user_regset_view *view; struct elf_thread_core_info *t; struct elf_prpsinfo *psinfo; struct core_thread *ct; - unsigned int i; - - info->size = 0; - info->thread = NULL; psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - if (psinfo == NULL) { - info->psinfo.data = NULL; /* So we don't free this wrongly */ + if (!psinfo) return 0; - } - fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); +#ifdef CORE_DUMP_USE_REGSET + view = task_user_regset_view(dump_task); + /* * Figure out how many notes we're going to need for each thread. */ info->thread_notes = 0; - for (i = 0; i < view->n; ++i) + for (int i = 0; i < view->n; ++i) if (view->regsets[i].core_note_type != 0) ++info->thread_notes; @@ -1869,11 +1888,23 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, */ fill_elf_header(elf, phdrs, view->e_machine, view->e_flags); +#else + view = NULL; + info->thread_notes = 2; + fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); +#endif /* * Allocate a structure for each thread. */ - for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) { + info->thread = kzalloc(offsetof(struct elf_thread_core_info, + notes[info->thread_notes]), + GFP_KERNEL); + if (unlikely(!info->thread)) + return 0; + + info->thread->task = dump_task; + for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) { t = kzalloc(offsetof(struct elf_thread_core_info, notes[info->thread_notes]), GFP_KERNEL); @@ -1881,17 +1912,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, return 0; t->task = ct->task; - if (ct->task == dump_task || !info->thread) { - t->next = info->thread; - info->thread = t; - } else { - /* - * Make sure to keep the original task at - * the head of the list. - */ - t->next = info->thread->next; - info->thread->next = t; - } + t->next = info->thread->next; + info->thread->next = t; } /* @@ -1919,11 +1941,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, return 1; } -static size_t get_note_info_size(struct elf_note_info *info) -{ - return info->size; -} - /* * Write all the notes for each thread. When writing the first thread, the * process-wide notes are interleaved after the first thread-specific note. @@ -1978,197 +1995,6 @@ static void free_note_info(struct elf_note_info *info) kvfree(info->files.data); } -#else - -/* Here is the structure in which status of each thread is captured. */ -struct elf_thread_status -{ - struct list_head list; - struct elf_prstatus prstatus; /* NT_PRSTATUS */ - elf_fpregset_t fpu; /* NT_PRFPREG */ - struct task_struct *thread; - struct memelfnote notes[3]; - int num_notes; -}; - -/* - * In order to add the specific thread information for the elf file format, - * we need to keep a linked list of every threads pr_status and then create - * a single section for them in the final core file. - */ -static int elf_dump_thread_status(long signr, struct elf_thread_status *t) -{ - int sz = 0; - struct task_struct *p = t->thread; - t->num_notes = 0; - - fill_prstatus(&t->prstatus.common, p, signr); - elf_core_copy_task_regs(p, &t->prstatus.pr_reg); - - fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), - &(t->prstatus)); - t->num_notes++; - sz += notesize(&t->notes[0]); - - if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, - &t->fpu))) { - fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), - &(t->fpu)); - t->num_notes++; - sz += notesize(&t->notes[1]); - } - return sz; -} - -struct elf_note_info { - struct memelfnote *notes; - struct memelfnote *notes_files; - struct elf_prstatus *prstatus; /* NT_PRSTATUS */ - struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ - struct list_head thread_list; - elf_fpregset_t *fpu; - user_siginfo_t csigdata; - int thread_status_size; - int numnote; -}; - -static int elf_note_info_init(struct elf_note_info *info) -{ - memset(info, 0, sizeof(*info)); - INIT_LIST_HEAD(&info->thread_list); - - /* Allocate space for ELF notes */ - info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL); - if (!info->notes) - return 0; - info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); - if (!info->psinfo) - return 0; - info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); - if (!info->prstatus) - return 0; - info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); - if (!info->fpu) - return 0; - return 1; -} - -static int fill_note_info(struct elfhdr *elf, int phdrs, - struct elf_note_info *info, - struct coredump_params *cprm) -{ - struct core_thread *ct; - struct elf_thread_status *ets; - - if (!elf_note_info_init(info)) - return 0; - - for (ct = current->signal->core_state->dumper.next; - ct; ct = ct->next) { - ets = kzalloc(sizeof(*ets), GFP_KERNEL); - if (!ets) - return 0; - - ets->thread = ct->task; - list_add(&ets->list, &info->thread_list); - } - - list_for_each_entry(ets, &info->thread_list, list) { - int sz; - - sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets); - info->thread_status_size += sz; - } - /* now collect the dump for the current */ - memset(info->prstatus, 0, sizeof(*info->prstatus)); - fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo); - elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs); - - /* Set up header */ - fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); - - /* - * Set up the notes in similar form to SVR4 core dumps made - * with info from their /proc. - */ - - fill_note(info->notes + 0, "CORE", NT_PRSTATUS, - sizeof(*info->prstatus), info->prstatus); - fill_psinfo(info->psinfo, current->group_leader, current->mm); - fill_note(info->notes + 1, "CORE", NT_PRPSINFO, - sizeof(*info->psinfo), info->psinfo); - - fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo); - fill_auxv_note(info->notes + 3, current->mm); - info->numnote = 4; - - if (fill_files_note(info->notes + info->numnote, cprm) == 0) { - info->notes_files = info->notes + info->numnote; - info->numnote++; - } - - /* Try to dump the FPU. */ - info->prstatus->pr_fpvalid = - elf_core_copy_task_fpregs(current, cprm->regs, info->fpu); - if (info->prstatus->pr_fpvalid) - fill_note(info->notes + info->numnote++, - "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); - return 1; -} - -static size_t get_note_info_size(struct elf_note_info *info) -{ - int sz = 0; - int i; - - for (i = 0; i < info->numnote; i++) - sz += notesize(info->notes + i); - - sz += info->thread_status_size; - - return sz; -} - -static int write_note_info(struct elf_note_info *info, - struct coredump_params *cprm) -{ - struct elf_thread_status *ets; - int i; - - for (i = 0; i < info->numnote; i++) - if (!writenote(info->notes + i, cprm)) - return 0; - - /* write out the thread status notes section */ - list_for_each_entry(ets, &info->thread_list, list) { - for (i = 0; i < ets->num_notes; i++) - if (!writenote(&ets->notes[i], cprm)) - return 0; - } - - return 1; -} - -static void free_note_info(struct elf_note_info *info) -{ - while (!list_empty(&info->thread_list)) { - struct list_head *tmp = info->thread_list.next; - list_del(tmp); - kfree(list_entry(tmp, struct elf_thread_status, list)); - } - - /* Free data possibly allocated by fill_files_note(): */ - if (info->notes_files) - kvfree(info->notes_files->data); - - kfree(info->prstatus); - kfree(info->psinfo); - kfree(info->notes); - kfree(info->fpu); -} - -#endif - static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, elf_addr_t e_shoff, int segs) { @@ -2232,7 +2058,7 @@ static int elf_core_dump(struct coredump_params *cprm) /* Write notes phdr entry */ { - size_t sz = get_note_info_size(&info); + size_t sz = info.size; /* For cell spufs */ sz += elf_coredump_extra_notes_size(); @@ -2294,7 +2120,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!write_note_info(&info, cprm)) goto end_coredump; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 08d0c8797828..096e3520a0b1 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->start_stack = current->mm->start_brk + stack_size; #endif - if (create_elf_fdpic_tables(bprm, current->mm, - &exec_params, &interp_params) < 0) + retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params, + &interp_params); + if (retval < 0) goto error; kdebug("- start_code %lx", current->mm->start_code); @@ -1603,7 +1604,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!writenote(thread_list->notes, cprm)) goto end_coredump; if (!writenote(&psinfo_note, cprm)) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e1eae7ea823a..bb202ad369d5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -44,10 +44,10 @@ static LIST_HEAD(entries); static int enabled = 1; enum {Enabled, Magic}; -#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) -#define MISC_FMT_OPEN_BINARY (1 << 30) -#define MISC_FMT_CREDENTIALS (1 << 29) -#define MISC_FMT_OPEN_FILE (1 << 28) +#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) +#define MISC_FMT_OPEN_BINARY (1UL << 30) +#define MISC_FMT_CREDENTIALS (1UL << 29) +#define MISC_FMT_OPEN_FILE (1UL << 28) typedef struct { struct list_head list; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 548d6a5477b4..1e47b3ec3989 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -110,10 +110,11 @@ out: return ret; } -int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int ret; + struct inode *inode = d_inode(dentry); umode_t old_mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dce3a16996b9..18374a6d05bd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -138,6 +138,7 @@ struct share_check { u64 root_objectid; u64 inum; int share_count; + bool have_delayed_delete_refs; }; static inline int extent_is_shared(struct share_check *sc) @@ -288,8 +289,10 @@ static void prelim_release(struct preftree *preftree) struct prelim_ref *ref, *next_ref; rbtree_postorder_for_each_entry_safe(ref, next_ref, - &preftree->root.rb_root, rbnode) + &preftree->root.rb_root, rbnode) { + free_inode_elem_list(ref->inode_list); free_pref(ref); + } preftree->root = RB_ROOT_CACHED; preftree->count = 0; @@ -647,6 +650,18 @@ unode_aux_to_inode_list(struct ulist_node *node) return (struct extent_inode_elem *)(uintptr_t)node->aux; } +static void free_leaf_list(struct ulist *ulist) +{ + struct ulist_node *node; + struct ulist_iterator uiter; + + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(ulist, &uiter))) + free_inode_elem_list(unode_aux_to_inode_list(node)); + + ulist_free(ulist); +} + /* * We maintain three separate rbtrees: one for direct refs, one for * indirect refs which have a key, and one for indirect refs which do not @@ -761,7 +776,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, cond_resched(); } out: - ulist_free(parents); + /* + * We may have inode lists attached to refs in the parents ulist, so we + * must free them before freeing the ulist and its refs. + */ + free_leaf_list(parents); return ret; } @@ -820,16 +839,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_key key; - struct btrfs_key tmp_op_key; struct rb_node *n; int count; int ret = 0; - if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); - spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, @@ -855,10 +869,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; + struct btrfs_key *key_ptr = NULL; + + if (head->extent_op && head->extent_op->update_key) { + btrfs_disk_key_to_cpu(&key, &head->extent_op->key); + key_ptr = &key; + } ref = btrfs_delayed_node_to_tree_ref(node); ret = add_indirect_ref(fs_info, preftrees, ref->root, - &tmp_op_key, ref->level + 1, + key_ptr, ref->level + 1, node->bytenr, count, sc, GFP_ATOMIC); break; @@ -884,13 +904,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* - * Found a inum that doesn't match our known inum, we - * know it's shared. + * If we have a share check context and a reference for + * another inode, we can't exit immediately. This is + * because even if this is a BTRFS_ADD_DELAYED_REF + * reference we may find next a BTRFS_DROP_DELAYED_REF + * which cancels out this ADD reference. + * + * If this is a DROP reference and there was no previous + * ADD reference, then we need to signal that when we + * process references from the extent tree (through + * add_inline_refs() and add_keyed_refs()), we should + * not exit early if we find a reference for another + * inode, because one of the delayed DROP references + * may cancel that reference in the extent tree. */ - if (sc && sc->inum && ref->objectid != sc->inum) { - ret = BACKREF_FOUND_SHARED; - goto out; - } + if (sc && count < 0) + sc->have_delayed_delete_refs = true; ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, @@ -920,7 +949,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); -out: + spin_unlock(&head->lock); return ret; } @@ -1023,7 +1052,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1033,6 +1063,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); + break; } default: @@ -1122,7 +1153,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1354,6 +1386,12 @@ again: if (ret < 0) goto out; ref->inode_list = eie; + /* + * We transferred the list ownership to the ref, + * so set to NULL to avoid a double free in case + * an error happens after this. + */ + eie = NULL; } ret = ulist_add_merge_ptr(refs, ref->parent, ref->inode_list, @@ -1379,6 +1417,14 @@ again: eie->next = ref->inode_list; } eie = NULL; + /* + * We have transferred the inode list ownership from + * this ref to the ref we added to the 'refs' ulist. + * So set this ref's inode list to NULL to avoid + * use-after-free when our caller uses it or double + * frees in case an error happens before we return. + */ + ref->inode_list = NULL; } cond_resched(); } @@ -1395,24 +1441,6 @@ out: return ret; } -static void free_leaf_list(struct ulist *blocks) -{ - struct ulist_node *node = NULL; - struct extent_inode_elem *eie; - struct ulist_iterator uiter; - - ULIST_ITER_INIT(&uiter); - while ((node = ulist_next(blocks, &uiter))) { - if (!node->aux) - continue; - eie = unode_aux_to_inode_list(node); - free_inode_elem_list(eie); - node->aux = 0; - } - - ulist_free(blocks); -} - /* * Finds all leafs with a reference to the specified combination of bytenr and * offset. key_list_head will point to a list of corresponding keys (caller must @@ -1522,6 +1550,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache { struct btrfs_backref_shared_cache_entry *entry; + if (!cache->use_cache) + return false; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return false; @@ -1557,6 +1588,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache return false; *is_shared = entry->is_shared; + /* + * If the node at this level is shared, than all nodes below are also + * shared. Currently some of the nodes below may be marked as not shared + * because we have just switched from one leaf to another, and switched + * also other nodes above the leaf and below the current level, so mark + * them as shared. + */ + if (*is_shared) { + for (int i = 0; i < level; i++) { + cache->entries[i].is_shared = true; + cache->entries[i].gen = entry->gen; + } + } return true; } @@ -1573,6 +1617,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, struct btrfs_backref_shared_cache_entry *entry; u64 gen; + if (!cache->use_cache) + return; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return; @@ -1648,6 +1695,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, + .have_delayed_delete_refs = false, }; int level; @@ -1669,6 +1717,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); + cache->use_cache = true; while (1) { bool is_shared; bool cached; @@ -1698,6 +1747,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, extent_gen > btrfs_root_last_snapshot(&root->root_item)) break; + /* + * If our data extent was not directly shared (without multiple + * reference items), than it might have a single reference item + * with a count > 1 for the same offset, which means there are 2 + * (or more) file extent items that point to the data extent - + * this happens when a file extent item needs to be split and + * then one item gets moved to another leaf due to a b+tree leaf + * split when inserting some item. In this case the file extent + * items may be located in different leaves and therefore some + * of the leaves may be referenced through shared subtrees while + * others are not. Since our extent buffer cache only works for + * a single path (by far the most common case and simpler to + * deal with), we can not use it if we have multiple leaves + * (which implies multiple paths). + */ + if (level == -1 && tmp->nnodes > 1) + cache->use_cache = false; + if (level >= 0) store_backref_shared_cache(cache, root, bytenr, level, false); @@ -1713,6 +1780,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; } shared.share_count = 0; + shared.have_delayed_delete_refs = false; cond_resched(); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 52ae6957b414..8e69584d538d 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache { * a given data extent should never exceed the maximum b+tree height. */ struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; + bool use_cache; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 32c415cfbdfe..deebc8ddbd93 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -774,10 +774,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - /* REVIEW */ if (wait && caching_ctl) ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); - /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */ if (caching_ctl) btrfs_put_caching_control(caching_ctl); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f1f051ad3147..e6635fe70067 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -512,7 +512,7 @@ static u64 bio_end_offset(struct bio *bio) static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, struct compressed_bio *cb, - unsigned long *pflags) + int *memstall, unsigned long *pflags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); unsigned long end_index; @@ -581,8 +581,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, continue; } - if (PageWorkingset(page)) + if (!*memstall && PageWorkingset(page)) { psi_memstall_enter(pflags); + *memstall = 1; + } ret = set_page_extent_mapped(page); if (ret < 0) { @@ -670,8 +672,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 em_len; u64 em_start; struct extent_map *em; - /* Initialize to 1 to make skip psi_memstall_leave unless needed */ - unsigned long pflags = 1; + unsigned long pflags; + int memstall = 0; blk_status_t ret; int ret2; int i; @@ -727,7 +729,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, goto fail; } - add_ra_bio_pages(inode, em_start + em_len, cb, &pflags); + add_ra_bio_pages(inode, em_start + em_len, cb, &memstall, &pflags); /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; @@ -807,7 +809,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } } - if (!pflags) + if (memstall) psi_memstall_leave(&pflags); if (refcount_dec_and_test(&cb->pending_ios)) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b39b339fbf96..dcb510f38dda 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -114,6 +114,22 @@ noinline void btrfs_release_path(struct btrfs_path *p) } /* + * We want the transaction abort to print stack trace only for errors where the + * cause could be a bug, eg. due to ENOSPC, and not for common errors that are + * caused by external factors. + */ +bool __cold abort_should_print_stack(int errno) +{ + switch (errno) { + case -EIO: + case -EROFS: + case -ENOMEM: + return false; + } + return true; +} + +/* * safely gets a reference on the root node of a tree. A lock * is not taken, so a concurrent writer may put a different node * at the root of the tree. See btrfs_lock_root_node for the @@ -4647,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int i; - ASSERT(!path->nowait); + /* + * The nowait semantics are used only for write paths, where we don't + * use the tree mod log and sequence numbers. + */ + if (time_seq) + ASSERT(!path->nowait); nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) @@ -4667,7 +4688,14 @@ again: if (path->need_commit_sem) { path->need_commit_sem = 0; need_commit_sem = true; - down_read(&fs_info->commit_root_sem); + if (path->nowait) { + if (!down_read_trylock(&fs_info->commit_root_sem)) { + ret = -EAGAIN; + goto done; + } + } else { + down_read(&fs_info->commit_root_sem); + } } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); } @@ -4743,7 +4771,7 @@ again: next = c; ret = read_block_for_search(root, path, &next, level, slot, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4753,6 +4781,10 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && path->nowait) { + ret = -EAGAIN; + goto done; + } if (!ret && time_seq) { /* * If we don't get the lock, we may be racing @@ -4783,7 +4815,7 @@ again: ret = read_block_for_search(root, path, &next, level, 0, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4791,8 +4823,16 @@ again: goto done; } - if (!path->skip_locking) - btrfs_tree_read_lock(next); + if (!path->skip_locking) { + if (path->nowait) { + if (!btrfs_try_tree_read_lock(next)) { + ret = -EAGAIN; + goto done; + } + } else { + btrfs_tree_read_lock(next); + } + } } ret = 0; done: diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 727595eee973..919670d35919 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3462,7 +3462,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded); -ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before); +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); extern const struct dentry_operations btrfs_dentry_operations; @@ -3793,9 +3796,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, const char *function, unsigned int line, int errno, bool first_hit); +bool __cold abort_should_print_stack(int errno); + /* * Call btrfs_abort_transaction as early as possible when an error condition is - * detected, that way the exact line number is reported. + * detected, that way the exact stack trace is reported for some errors. */ #define btrfs_abort_transaction(trans, errno) \ do { \ @@ -3804,10 +3809,11 @@ do { \ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ &((trans)->fs_info->fs_state))) { \ first = true; \ - if ((errno) != -EIO && (errno) != -EROFS) { \ - WARN(1, KERN_DEBUG \ + if (WARN(abort_should_print_stack(errno), \ + KERN_DEBUG \ "BTRFS: Transaction aborted (error %d)\n", \ - (errno)); \ + (errno))) { \ + /* Stack trace printed. */ \ } else { \ btrfs_debug((trans)->fs_info, \ "Transaction aborted (error %d)", \ @@ -3987,7 +3993,7 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); -int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a2da9313c694..d99bf7c64611 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -166,11 +166,9 @@ static bool btrfs_supported_super_csum(u16 csum_type) * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. */ -static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, - char *raw_disk_sb) +int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, + const struct btrfs_super_block *disk_sb) { - struct btrfs_super_block *disk_sb = - (struct btrfs_super_block *)raw_disk_sb; char result[BTRFS_CSUM_SIZE]; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); @@ -181,7 +179,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is * filled with zeros and is included in the checksum. */ - crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE, + crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result); if (memcmp(disk_sb->csum, result, fs_info->csum_size)) @@ -2553,7 +2551,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) fs_info->dev_root = root; } /* Initialize fs_info for all devices in any case */ - btrfs_init_devices_late(fs_info); + ret = btrfs_init_devices_late(fs_info); + if (ret) + goto out; /* * This tree can share blocks with some other fs tree during relocation @@ -3479,7 +3479,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ - if (btrfs_check_super_csum(fs_info, (u8 *)disk_super)) { + if (btrfs_check_super_csum(fs_info, disk_super)) { btrfs_err(fs_info, "superblock checksum mismatch"); err = -EINVAL; btrfs_release_disk_super(disk_super); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c67c15d4d20b..9fa923e005a3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -42,6 +42,8 @@ struct extent_buffer *btrfs_find_create_tree_block( void btrfs_clean_tree_block(struct extent_buffer *buf); void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info); int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info); +int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, + const struct btrfs_super_block *disk_sb); int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 1d4c2397d0d6..fab7eb76e53b 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, } struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, - u64 root_objectid, u32 generation, + u64 root_objectid, u64 generation, int check_generation) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h index f32f4113c976..5afb7ca42828 100644 --- a/fs/btrfs/export.h +++ b/fs/btrfs/export.h @@ -19,7 +19,7 @@ struct btrfs_fid { } __attribute__ ((packed)); struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, - u64 root_objectid, u32 generation, + u64 root_objectid, u64 generation, int check_generation); struct dentry *btrfs_get_parent(struct dentry *child); diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 618275af19c4..83cb0378096f 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, int err; u64 failed_start; - while (1) { + err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, + cached_state, NULL, GFP_NOFS); + while (err == -EEXIST) { + if (failed_start != start) + clear_extent_bit(tree, start, failed_start - 1, + EXTENT_LOCKED, cached_state); + + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, cached_state, NULL, GFP_NOFS); - if (err == -EEXIST) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else - break; - WARN_ON(start > end); } return err; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd2d36580f1a..2801c991814f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3295,21 +3295,22 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, } /* - * If this is a leaf and there are tree mod log users, we may - * have recorded mod log operations that point to this leaf. - * So we must make sure no one reuses this leaf's extent before - * mod log operations are applied to a node, otherwise after - * rewinding a node using the mod log operations we get an - * inconsistent btree, as the leaf's extent may now be used as - * a node or leaf for another different btree. + * If there are tree mod log users we may have recorded mod log + * operations for this node. If we re-allocate this node we + * could replay operations on this node that happened when it + * existed in a completely different root. For example if it + * was part of root A, then was reallocated to root B, and we + * are doing a btrfs_old_search_slot(root b), we could replay + * operations that happened when the block was part of root A, + * giving us an inconsistent view of the btree. + * * We are safe from races here because at this point no other * node or root points to this extent buffer, so if after this - * check a new tree mod log user joins, it will not be able to - * find a node pointing to this leaf and record operations that - * point to this leaf. + * check a new tree mod log user joins we will not have an + * existing log of operations on this node that we have to + * contend with. */ - if (btrfs_header_level(buf) == 0 && - test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) + if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) must_pin = true; if (must_pin || btrfs_is_zoned(fs_info)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 176b432035ae..d01631d47806 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1598,14 +1598,19 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, write_bytes); else btrfs_check_nocow_unlock(BTRFS_I(inode)); + + if (nowait && ret == -ENOSPC) + ret = -EAGAIN; break; } release_bytes = reserve_bytes; again: ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags); - if (ret) + if (ret) { + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); break; + } /* * This is going to setup the pages array with the number of @@ -1765,6 +1770,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) loff_t endbyte; ssize_t err; unsigned int ilock_flags = 0; + struct iomap_dio *dio; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; @@ -1825,11 +1831,22 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ -again: from->nofault = true; - err = btrfs_dio_rw(iocb, from, written); + dio = btrfs_dio_write(iocb, from, written); from->nofault = false; + /* + * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync + * iocb, and that needs to lock the inode. So unlock it before calling + * iomap_dio_complete() to avoid a deadlock. + */ + btrfs_inode_unlock(inode, ilock_flags); + + if (IS_ERR_OR_NULL(dio)) + err = PTR_ERR_OR_ZERO(dio); + else + err = iomap_dio_complete(dio); + /* No increment (+=) because iomap returns a cumulative value. */ if (err > 0) written = err; @@ -1855,12 +1872,10 @@ again: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto again; + goto relock; } } - btrfs_inode_unlock(inode, ilock_flags); - /* * If 'err' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. @@ -4035,7 +4050,7 @@ again: */ pagefault_disable(); to->nofault = true; - ret = btrfs_dio_rw(iocb, to, read); + ret = btrfs_dio_read(iocb, to, read); to->nofault = false; pagefault_enable(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b0807c59e321..5a54bb93c413 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5256,7 +5256,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr err = btrfs_dirty_inode(inode); if (!err && attr->ia_valid & ATTR_MODE) - err = posix_acl_chmod(mnt_userns, inode, inode->i_mode); + err = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); } return err; @@ -7980,7 +7980,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, */ status = BLK_STS_RESOURCE; dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS); - if (!dip) + if (!dip->csums) goto out_err; status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums); @@ -8078,13 +8078,21 @@ static const struct iomap_dio_ops btrfs_dio_ops = { .bio_set = &btrfs_dio_bioset, }; -ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) { struct btrfs_dio_data data; return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC, - &data, done_before); + IOMAP_DIO_PARTIAL, &data, done_before); +} + +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before) +{ + struct btrfs_dio_data data; + + return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, + IOMAP_DIO_PARTIAL, &data, done_before); } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -11288,7 +11296,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { .mknod = btrfs_mknod, .listxattr = btrfs_listxattr, .permission = btrfs_permission, - .get_acl = btrfs_get_acl, + .get_inode_acl = btrfs_get_acl, .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, .tmpfile = btrfs_tmpfile, @@ -11341,7 +11349,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .listxattr = btrfs_listxattr, .permission = btrfs_permission, .fiemap = btrfs_fiemap, - .get_acl = btrfs_get_acl, + .get_inode_acl = btrfs_get_acl, .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, .fileattr_get = btrfs_fileattr_get, @@ -11352,7 +11360,7 @@ static const struct inode_operations btrfs_special_inode_operations = { .setattr = btrfs_setattr, .permission = btrfs_permission, .listxattr = btrfs_listxattr, - .get_acl = btrfs_get_acl, + .get_inode_acl = btrfs_get_acl, .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, }; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d5dd8bed1488..f897be9ec1e9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp) } } + btrfs_free_path(path); + path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; @@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, } out: + btrfs_free_path(path); + if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ @@ -3205,7 +3209,6 @@ out: } kfree(rootrefs); - btrfs_free_path(path); return ret; } @@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) ipath->fspath->val[i] = rel_ptr; } + btrfs_free_path(path); + path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { @@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, size = min_t(u32, loi->size, SZ_16M); } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - inodes = init_data_container(size); if (IS_ERR(inodes)) { ret = PTR_ERR(inodes); - inodes = NULL; - goto out; + goto out_loi; } + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); + btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, ret = -EFAULT; out: - btrfs_free_path(path); kvfree(inodes); out_loi: kfree(loi); @@ -5283,7 +5286,7 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp, goto out_acct; } - ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack), + ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack), &iov, &iter); if (ret < 0) goto out_acct; @@ -5382,7 +5385,7 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool if (args.len > args.unencoded_len - args.unencoded_offset) goto out_acct; - ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack), + ret = import_iovec(ITER_SOURCE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack), &iov, &iter); if (ret < 0) goto out_acct; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e54f8280031f..100d9f4836b1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -761,11 +761,11 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) struct btrfs_ordered_extent *ordered; if (start + len < start) { - orig_end = INT_LIMIT(loff_t); + orig_end = OFFSET_MAX; } else { orig_end = start + len - 1; - if (orig_end > INT_LIMIT(loff_t)) - orig_end = INT_LIMIT(loff_t); + if (orig_end > OFFSET_MAX) + orig_end = OFFSET_MAX; } /* start IO across the range first to instantiate any delalloc diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9334c3157c22..b74105a10f16 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, dstgroup); - if (ret) { - qgroup_mark_inconsistent(fs_info); - btrfs_info(fs_info, - "unable to update quota limit for %llu", - dstgroup->qgroupid); - goto unlock; - } + qgroup_dirty(fs_info, dstgroup); } if (srcid) { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f6395e8288d6..82c8e991300e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1632,10 +1632,8 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio) int ret; ret = alloc_rbio_parity_pages(rbio); - if (ret) { - __free_raid_bio(rbio); + if (ret) return ret; - } ret = lock_stripe_add(rbio); if (ret == 0) @@ -1823,8 +1821,10 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc) */ if (rbio_is_full(rbio)) { ret = full_stripe_write(rbio); - if (ret) + if (ret) { + __free_raid_bio(rbio); goto fail; + } return; } @@ -1838,8 +1838,10 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc) list_add_tail(&rbio->plug_list, &plug->rbio_list); } else { ret = __raid56_parity_write(rbio); - if (ret) + if (ret) { + __free_raid_bio(rbio); goto fail; + } } return; @@ -2742,8 +2744,10 @@ raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc) rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { - BUG(); - kfree(rbio); + btrfs_warn_rl(fs_info, + "can not determine the failed stripe number for full stripe %llu", + bioc->raid_map[0]); + __free_raid_bio(rbio); return NULL; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f260c53829e5..196c4c6ed1ed 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2672,17 +2672,11 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, u8 csum[BTRFS_CSUM_SIZE]; u32 blocksize; - /* - * Block size determines how many scrub_block will be allocated. Here - * we use BTRFS_STRIPE_LEN (64KiB) as default limit, so we won't - * allocate too many scrub_block, while still won't cause too large - * bios for large extents. - */ if (flags & BTRFS_EXTENT_FLAG_DATA) { if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) blocksize = map->stripe_len; else - blocksize = BTRFS_STRIPE_LEN; + blocksize = sctx->fs_info->sectorsize; spin_lock(&sctx->stat_lock); sctx->stat.data_extents_scrubbed++; sctx->stat.data_bytes_scrubbed += len; @@ -3917,7 +3911,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) { - spin_unlock(&cache->lock); btrfs_put_block_group(cache); goto skip; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ef4167072b8..1c4b693ee4a3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -348,6 +348,7 @@ static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd) switch (sctx->proto) { case 1: return cmd <= BTRFS_SEND_C_MAX_V1; case 2: return cmd <= BTRFS_SEND_C_MAX_V2; + case 3: return cmd <= BTRFS_SEND_C_MAX_V3; default: return false; } } @@ -5701,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, u64 ext_len; u64 clone_len; u64 clone_data_offset; + bool crossed_src_i_size = false; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5758,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, if (key.offset >= clone_src_i_size) break; - if (key.offset + ext_len > clone_src_i_size) + if (key.offset + ext_len > clone_src_i_size) { ext_len = clone_src_i_size - key.offset; + crossed_src_i_size = true; + } clone_data_offset = btrfs_file_extent_offset(leaf, ei); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { @@ -5820,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, ret = send_clone(sctx, offset, clone_len, clone_root); } + } else if (crossed_src_i_size && clone_len < len) { + /* + * If we are at i_size of the clone source inode and we + * can not clone from it, terminate the loop. This is + * to avoid sending two write operations, one with a + * length matching clone_len and the final one after + * this loop with a length of len - clone_len. + * + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED + * was passed to the send ioctl), this helps avoid + * sending an encoded write for an offset that is not + * sector size aligned, in case the i_size of the source + * inode is not sector size aligned. That will make the + * receiver fallback to decompression of the data and + * writing it using regular buffered IO, therefore while + * not incorrect, it's not optimal due decompression and + * possible re-compression at the receiver. + */ + break; } else { ret = send_extent_data(sctx, dst_path, offset, clone_len); @@ -6469,7 +6492,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret < 0) goto out; } - if (sctx->cur_inode_needs_verity) { + + if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY) + && sctx->cur_inode_needs_verity) { ret = process_verity(sctx); if (ret < 0) goto out; @@ -6665,17 +6690,19 @@ static int changed_inode(struct send_ctx *sctx, /* * First, process the inode as if it was deleted. */ - sctx->cur_inode_gen = right_gen; - sctx->cur_inode_new = false; - sctx->cur_inode_deleted = true; - sctx->cur_inode_size = btrfs_inode_size( - sctx->right_path->nodes[0], right_ii); - sctx->cur_inode_mode = btrfs_inode_mode( - sctx->right_path->nodes[0], right_ii); - ret = process_all_refs(sctx, - BTRFS_COMPARE_TREE_DELETED); - if (ret < 0) - goto out; + if (old_nlinks > 0) { + sctx->cur_inode_gen = right_gen; + sctx->cur_inode_new = false; + sctx->cur_inode_deleted = true; + sctx->cur_inode_size = btrfs_inode_size( + sctx->right_path->nodes[0], right_ii); + sctx->cur_inode_mode = btrfs_inode_mode( + sctx->right_path->nodes[0], right_ii); + ret = process_all_refs(sctx, + BTRFS_COMPARE_TREE_DELETED); + if (ret < 0) + goto out; + } /* * Now process the inode as if it was new. diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 0a4537775e0c..f7585cfa7e52 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -10,7 +10,12 @@ #include <linux/types.h> #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" +/* Conditional support for the upcoming protocol version. */ +#ifdef CONFIG_BTRFS_DEBUG +#define BTRFS_SEND_STREAM_VERSION 3 +#else #define BTRFS_SEND_STREAM_VERSION 2 +#endif /* * In send stream v1, no command is larger than 64K. In send stream v2, no limit diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9be4fd2db0f4..5942b9384088 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2555,6 +2555,7 @@ static int check_dev_super(struct btrfs_device *dev) { struct btrfs_fs_info *fs_info = dev->fs_info; struct btrfs_super_block *sb; + u16 csum_type; int ret = 0; /* This should be called with fs still frozen. */ @@ -2569,6 +2570,21 @@ static int check_dev_super(struct btrfs_device *dev) if (IS_ERR(sb)) return PTR_ERR(sb); + /* Verify the checksum. */ + csum_type = btrfs_super_csum_type(sb); + if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { + btrfs_err(fs_info, "csum type changed, has %u expect %u", + csum_type, btrfs_super_csum_type(fs_info->super_copy)); + ret = -EUCLEAN; + goto out; + } + + if (btrfs_check_super_csum(fs_info, sb)) { + btrfs_err(fs_info, "csum for on-disk super block no longer matches"); + ret = -EUCLEAN; + goto out; + } + /* Btrfs_validate_super() includes fsid check against super->fsid. */ ret = btrfs_validate_super(fs_info, sb, 0); if (ret < 0) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 699b54b3acaa..74fef1f49c35 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void) #ifdef CONFIG_BTRFS_DEBUG ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); - if (ret) - goto out2; + if (ret) { + sysfs_unmerge_group(&btrfs_kset->kobj, + &btrfs_static_feature_attr_group); + goto out_remove_group; + } #endif return 0; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 9c478fa256f6..d43cb5242fec 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -200,7 +200,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) void btrfs_free_dummy_root(struct btrfs_root *root) { - if (!root) + if (IS_ERR_OR_NULL(root)) return; /* Will be freed by btrfs_free_fs_roots */ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index eee1e4459541..63676ea19f29 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -225,20 +225,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root, */ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -250,29 +250,31 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } + /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */ + old_roots = NULL; + new_roots = NULL; + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, nodesize, nodesize)) { test_err("qgroup counts didn't match expected values"); return -EINVAL; } - old_roots = NULL; - new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } ret = remove_extent_item(root, nodesize, nodesize); - if (ret) + if (ret) { + ulist_free(old_roots); return -EINVAL; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -322,20 +324,20 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -355,20 +357,20 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } ret = add_tree_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -394,20 +396,20 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } ret = remove_extent_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 813986e38258..c3cf3dabe0b1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, u64 *last_old_dentry_offset) { struct btrfs_root *log = inode->root->log_root; - struct extent_buffer *src = path->nodes[0]; - const int nritems = btrfs_header_nritems(src); + struct extent_buffer *src; + const int nritems = btrfs_header_nritems(path->nodes[0]); const u64 ino = btrfs_ino(inode); bool last_found = false; int batch_start = 0; int batch_size = 0; int i; - for (i = path->slots[0]; i < nritems; i++) { + /* + * We need to clone the leaf, release the read lock on it, and use the + * clone before modifying the log tree. See the comment at copy_items() + * about why we need to do this. + */ + src = btrfs_clone_extent_buffer(path->nodes[0]); + if (!src) + return -ENOMEM; + + i = path->slots[0]; + btrfs_release_path(path); + path->nodes[0] = src; + path->slots[0] = i; + + for (; i < nritems; i++) { struct btrfs_dir_item *di; struct btrfs_key key; int ret; @@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; - struct extent_buffer *src = src_path->nodes[0]; + struct extent_buffer *src; int ret = 0; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); + /* + * To keep lockdep happy and avoid deadlocks, clone the source leaf and + * use the clone. This is because otherwise we would be changing the log + * tree, to insert items from the subvolume tree or insert csum items, + * while holding a read lock on a leaf from the subvolume tree, which + * creates a nasty lock dependency when COWing log tree nodes/leaves: + * + * 1) Modifying the log tree triggers an extent buffer allocation while + * holding a write lock on a parent extent buffer from the log tree. + * Allocating the pages for an extent buffer, or the extent buffer + * struct, can trigger inode eviction and finally the inode eviction + * will trigger a release/remove of a delayed node, which requires + * taking the delayed node's mutex; + * + * 2) Allocating a metadata extent for a log tree can trigger the async + * reclaim thread and make us wait for it to release enough space and + * unblock our reservation ticket. The reclaim thread can start + * flushing delayed items, and that in turn results in the need to + * lock delayed node mutexes and in the need to write lock extent + * buffers of a subvolume tree - all this while holding a write lock + * on the parent extent buffer in the log tree. + * + * So one task in scenario 1) running in parallel with another task in + * scenario 2) could lead to a deadlock, one wanting to lock a delayed + * node mutex while having a read lock on a leaf from the subvolume, + * while the other is holding the delayed node's mutex and wants to + * write lock the same subvolume leaf for flushing delayed items. + */ + src = btrfs_clone_extent_buffer(src_path->nodes[0]); + if (!src) + return -ENOMEM; + + i = src_path->slots[0]; + btrfs_release_path(src_path); + src_path->nodes[0] = src; + src_path->slots[0] = i; + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); if (!ins_data) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 94ba46d57920..635f45f1a2ef 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1011,6 +1011,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) rcu_assign_pointer(device->name, name); } + if (orig_dev->zone_info) { + struct btrfs_zoned_device_info *zone_info; + + zone_info = btrfs_clone_dev_zone_info(orig_dev); + if (!zone_info) { + btrfs_free_device(device); + ret = -ENOMEM; + goto error; + } + device->zone_info = zone_info; + } + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -6918,18 +6930,18 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args, static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, const struct btrfs_device *device) { - ASSERT((args->devid != (u64)-1) || args->missing); + if (args->missing) { + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && + !device->bdev) + return true; + return false; + } - if ((args->devid != (u64)-1) && device->devid != args->devid) + if (device->devid != args->devid) return false; if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0) return false; - if (!args->missing) - return true; - if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && - !device->bdev) - return true; - return false; + return true; } /* @@ -7142,6 +7154,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, u64 devid; u64 type; u8 uuid[BTRFS_UUID_SIZE]; + int index; int num_stripes; int ret; int i; @@ -7149,6 +7162,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); type = btrfs_chunk_type(leaf, chunk); + index = btrfs_bg_flags_to_raid_index(type); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); #if BITS_PER_LONG == 32 @@ -7202,7 +7216,15 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, map->io_align = btrfs_chunk_io_align(leaf, chunk); map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); map->type = type; - map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + /* + * We can't use the sub_stripes value, as for profiles other than + * RAID10, they may have 0 as sub_stripes for filesystems created by + * older mkfs (<v5.4). + * In that case, it can cause divide-by-zero errors later. + * Since currently sub_stripes is fixed for each profile, let's + * use the trusted value instead. + */ + map->sub_stripes = btrfs_raid_array[index].sub_stripes; map->verified_stripes = 0; em->orig_block_len = btrfs_calc_stripe_length(em); for (i = 0; i < num_stripes; i++) { @@ -7734,10 +7756,11 @@ error: return ret; } -void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) +int btrfs_init_devices_late(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; struct btrfs_device *device; + int ret = 0; fs_devices->fs_info = fs_info; @@ -7746,12 +7769,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) device->fs_info = fs_info; list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { - list_for_each_entry(device, &seed_devs->devices, dev_list) + list_for_each_entry(device, &seed_devs->devices, dev_list) { device->fs_info = fs_info; + ret = btrfs_get_dev_zone_info(device, false); + if (ret) + break; + } seed_devs->fs_info = fs_info; } mutex_unlock(&fs_devices->device_list_mutex); + + return ret; } static u64 btrfs_dev_stats_value(const struct extent_buffer *eb, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 599b9d5af349..099def5613b8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -395,6 +395,7 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio); */ struct btrfs_bio { unsigned int mirror_num; + struct bvec_iter iter; /* for direct I/O */ u64 file_offset; @@ -403,7 +404,6 @@ struct btrfs_bio { struct btrfs_device *device; u8 *csum; u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; - struct bvec_iter iter; /* End I/O information supplied to btrfs_bio_alloc */ btrfs_bio_end_io_t end_io; @@ -671,7 +671,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats); -void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); +int btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e2d073b08a7d..c9e2b0c85309 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, super[i] = page_address(page[i]); } - if (super[0]->generation > super[1]->generation) + if (btrfs_super_generation(super[0]) > + btrfs_super_generation(super[1])) sector = zones[1].start; else sector = zones[0].start; @@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) goto out; } - zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; goto out; @@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } - kfree(zones); + kvfree(zones); switch (bdev_zoned_model(bdev)) { case BLK_ZONED_HM: @@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) return 0; out: - kfree(zones); + kvfree(zones); out_free_zone_info: btrfs_destroy_dev_zone_info(device); @@ -639,6 +640,46 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) device->zone_info = NULL; } +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) +{ + struct btrfs_zoned_device_info *zone_info; + + zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); + if (!zone_info) + return NULL; + + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->seq_zones) + goto out; + + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, + zone_info->nr_zones); + + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->empty_zones) + goto out; + + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, + zone_info->nr_zones); + + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->active_zones) + goto out; + + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, + zone_info->nr_zones); + zone_info->zone_cache = NULL; + + return zone_info; + +out: + bitmap_free(zone_info->seq_zones); + bitmap_free(zone_info->empty_zones); + bitmap_free(zone_info->active_zones); + kfree(zone_info); + return NULL; +} + int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) { diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index e17462db3a84..8bd16d40b7c6 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -36,6 +36,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info); int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache); void btrfs_destroy_dev_zone_info(struct btrfs_device *device); +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev); int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info); int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info); int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, @@ -103,6 +104,16 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device, static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { } +/* + * In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never call + * into btrfs_clone_dev_zone_info() so it's safe to return NULL here. + */ +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info( + struct btrfs_device *orig_dev) +{ + return NULL; +} + static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info) { if (!btrfs_is_zoned(fs_info)) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index f4fc8e0b847c..c7e8dd5b58d4 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -85,13 +85,14 @@ retry: return acl; } -int ceph_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ceph_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int ret = 0, size = 0; const char *name = NULL; char *value = NULL; struct iattr newattrs; + struct inode *inode = d_inode(dentry); struct timespec64 old_ctime = inode->i_ctime; umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index dcf701b05cc1..61f47debec5a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -288,7 +288,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) } len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); - iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); + iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter); if (err == 0) err = -EFAULT; @@ -327,7 +327,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) } dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); - iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); + iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); if (err < 0) { dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index fb023f9fafcb..e54814d0c2f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2248,7 +2248,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; - unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2267,27 +2266,23 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) spin_unlock(&ci->i_unsafe_lock); /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions array memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max_sessions = mdsc->max_sessions; - - /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; + if (req1 || req2) { struct ceph_mds_request *req; + struct ceph_mds_session **sessions; + struct ceph_mds_session *s; + unsigned int max_sessions; int i; + mutex_lock(&mdsc->mutex); + max_sessions = mdsc->max_sessions; + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { + mutex_unlock(&mdsc->mutex); err = -ENOMEM; goto out; } @@ -2299,16 +2294,6 @@ retry: s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2321,16 +2306,6 @@ retry: s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2342,11 +2317,12 @@ retry: /* the auth MDS */ spin_lock(&ci->i_ceph_lock); if (ci->i_auth_cap) { - s = ci->i_auth_cap->session; - if (!sessions[s->s_mds]) - sessions[s->s_mds] = ceph_get_mds_session(s); + s = ci->i_auth_cap->session; + if (!sessions[s->s_mds]) + sessions[s->s_mds] = ceph_get_mds_session(s); } spin_unlock(&ci->i_ceph_lock); + mutex_unlock(&mdsc->mutex); /* send flush mdlog request to MDSes */ for (i = 0; i < max_sessions; i++) { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e7e2ebac330d..6c7026cc8988 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -2033,7 +2033,7 @@ const struct inode_operations ceph_dir_iops = { .getattr = ceph_getattr, .setattr = ceph_setattr, .listxattr = ceph_listxattr, - .get_acl = ceph_get_acl, + .get_inode_acl = ceph_get_acl, .set_acl = ceph_set_acl, .mknod = ceph_mknod, .symlink = ceph_symlink, diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 04fd34557de8..6f9580defb2b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1161,7 +1161,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) aio_req->total_len = rc + zlen; } - iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs, + iov_iter_bvec(&i, ITER_DEST, osd_data->bvec_pos.bvecs, osd_data->num_bvecs, len); iov_iter_advance(&i, rc); iov_iter_zero(zlen, &i); @@ -1400,7 +1400,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int zlen = min_t(size_t, len - ret, size - pos - ret); - iov_iter_bvec(&i, READ, bvecs, num_pages, len); + iov_iter_bvec(&i, ITER_DEST, bvecs, num_pages, len); iov_iter_advance(&i, ret); iov_iter_zero(zlen, &i); ret += zlen; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4af5e55abc15..f23c5a6edc6f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -126,7 +126,7 @@ const struct inode_operations ceph_file_iops = { .setattr = ceph_setattr, .getattr = ceph_getattr, .listxattr = ceph_listxattr, - .get_acl = ceph_get_acl, + .get_inode_acl = ceph_get_acl, .set_acl = ceph_set_acl, }; @@ -362,7 +362,7 @@ static int ceph_fill_fragtree(struct inode *inode, if (nsplits != ci->i_fragtree_nsplits) { update = true; } else if (nsplits) { - i = prandom_u32_max(nsplits); + i = get_random_u32_below(nsplits); id = le32_to_cpu(fragtree->splits[i].frag); if (!__ceph_find_frag(ci, id)) update = true; @@ -2255,7 +2255,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, err = __ceph_setattr(inode, attr); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) - err = posix_acl_chmod(&init_user_ns, inode, attr->ia_mode); + err = posix_acl_chmod(&init_user_ns, dentry, attr->ia_mode); return err; } @@ -2492,7 +2492,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, struct inode *parent; parent = ceph_lookup_inode(sb, ceph_ino(inode)); - if (!parent) + if (IS_ERR(parent)) return PTR_ERR(parent); pci = ceph_inode(parent); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 3e2843e86e27..f3b461c708a8 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -364,7 +364,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) *fcntl_count = 0; *flock_count = 0; - ctx = inode->i_flctx; + ctx = locks_inode_context(inode); if (ctx) { spin_lock(&ctx->flc_lock); list_for_each_entry(lock, &ctx->flc_posix, fl_list) @@ -418,7 +418,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int num_fcntl_locks, int num_flock_locks) { struct file_lock *lock; - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); int err = 0; int seen_fcntl = 0; int seen_flock = 0; diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 3fbabc98e1f7..7dac21ee6ce7 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -29,7 +29,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) return -1; /* pick */ - n = prandom_u32_max(n); + n = get_random_u32_below(n); for (j = 0, i = 0; i < m->possible_max_rank; i++) { if (CEPH_MDS_IS_READY(i, ignore_laggy)) j++; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 864cdaa0d2bd..e4151852184e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -763,7 +763,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ - struct ceph_snap_realm *realm = NULL; + struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; int rebuild_snapcs; @@ -774,6 +774,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, dout("%s deletion=%d\n", __func__, deletion); more: + realm = NULL; rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 40630e6f691c..50e57a1fa32f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1117,7 +1117,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx); struct posix_acl *ceph_get_acl(struct inode *, int, bool); int ceph_set_acl(struct user_namespace *mnt_userns, - struct inode *inode, struct posix_acl *acl, int type); + struct dentry *dentry, struct posix_acl *acl, int type); int ceph_pre_init_acls(struct inode *dir, umode_t *mode, struct ceph_acl_sec_ctx *as_ctx); void ceph_init_inode_acls(struct inode *inode, diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c index fe88b67c863f..60399081046a 100644 --- a/fs/cifs/cached_dir.c +++ b/fs/cifs/cached_dir.c @@ -253,8 +253,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, dentry = dget(cifs_sb->root); else { dentry = path_to_dentry(cifs_sb, path); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + rc = -ENOENT; goto oshr_free; + } } cfid->dentry = dentry; cfid->tcon = tcon; @@ -338,6 +340,27 @@ smb2_close_cached_fid(struct kref *ref) free_cached_dir(cfid); } +void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb) +{ + struct cached_fid *cfid = NULL; + int rc; + + rc = open_cached_dir(xid, tcon, name, cifs_sb, true, &cfid); + if (rc) { + cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name); + return; + } + spin_lock(&cfid->cfids->cfid_list_lock); + if (cfid->has_lease) { + cfid->has_lease = false; + kref_put(&cfid->refcount, smb2_close_cached_fid); + } + spin_unlock(&cfid->cfids->cfid_list_lock); + close_cached_dir(cfid); +} + + void close_cached_dir(struct cached_fid *cfid) { kref_put(&cfid->refcount, smb2_close_cached_fid); @@ -378,22 +401,20 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) { struct cached_fids *cfids = tcon->cfids; struct cached_fid *cfid, *q; - struct list_head entry; + LIST_HEAD(entry); - INIT_LIST_HEAD(&entry); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - list_del(&cfid->entry); - list_add(&cfid->entry, &entry); + list_move(&cfid->entry, &entry); cfids->num_entries--; cfid->is_open = false; + cfid->on_list = false; /* To prevent race with smb2_cached_lease_break() */ kref_get(&cfid->refcount); } spin_unlock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &entry, entry) { - cfid->on_list = false; list_del(&cfid->entry); cancel_work_sync(&cfid->lease_break); if (cfid->has_lease) { @@ -518,15 +539,13 @@ struct cached_fids *init_cached_dirs(void) void free_cached_dirs(struct cached_fids *cfids) { struct cached_fid *cfid, *q; - struct list_head entry; + LIST_HEAD(entry); - INIT_LIST_HEAD(&entry); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { cfid->on_list = false; cfid->is_open = false; - list_del(&cfid->entry); - list_add(&cfid->entry, &entry); + list_move(&cfid->entry, &entry); } spin_unlock(&cfids->cfid_list_lock); diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h index e536304ca2ce..2f4e764c9ca9 100644 --- a/fs/cifs/cached_dir.h +++ b/fs/cifs/cached_dir.h @@ -69,6 +69,10 @@ extern int open_cached_dir_by_dentry(struct cifs_tcon *tcon, struct dentry *dentry, struct cached_fid **cfid); extern void close_cached_dir(struct cached_fid *cfid); +extern void drop_cached_dir_by_name(const unsigned int xid, + struct cifs_tcon *tcon, + const char *name, + struct cifs_sb_info *cifs_sb); extern void close_all_cached_dirs(struct cifs_sb_info *cifs_sb); extern void invalidate_all_cached_dirs(struct cifs_tcon *tcon); extern int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index fa480d62f313..c647f0d56518 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -13,6 +13,9 @@ #include <linux/string.h> #include <linux/keyctl.h> #include <linux/key-type.h> +#include <uapi/linux/posix_acl.h> +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> #include <keys/user-type.h> #include "cifspdu.h" #include "cifsglob.h" @@ -20,6 +23,8 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "fs_context.h" +#include "cifs_fs_sb.h" +#include "cifs_unicode.h" /* security id for everyone/world system group */ static const struct cifs_sid sid_everyone = { @@ -1668,3 +1673,137 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, kfree(pntsd); return rc; } + +struct posix_acl *cifs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type) +{ +#if defined(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) && defined(CONFIG_CIFS_POSIX) + struct posix_acl *acl = NULL; + ssize_t rc = -EOPNOTSUPP; + unsigned int xid; + struct super_block *sb = dentry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *pTcon; + const char *full_path; + void *page; + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return ERR_CAST(tlink); + pTcon = tlink_tcon(tlink); + + xid = get_xid(); + page = alloc_dentry_path(); + + full_path = build_path_from_dentry(dentry, page); + if (IS_ERR(full_path)) { + acl = ERR_CAST(full_path); + goto out; + } + + /* return alt name if available as pseudo attr */ + switch (type) { + case ACL_TYPE_ACCESS: + if (sb->s_flags & SB_POSIXACL) + rc = cifs_do_get_acl(xid, pTcon, full_path, &acl, + ACL_TYPE_ACCESS, + cifs_sb->local_nls, + cifs_remap(cifs_sb)); + break; + + case ACL_TYPE_DEFAULT: + if (sb->s_flags & SB_POSIXACL) + rc = cifs_do_get_acl(xid, pTcon, full_path, &acl, + ACL_TYPE_DEFAULT, + cifs_sb->local_nls, + cifs_remap(cifs_sb)); + break; + } + + if (rc < 0) { + if (rc == -EINVAL) + acl = ERR_PTR(-EOPNOTSUPP); + else + acl = ERR_PTR(rc); + } + +out: + free_dentry_path(page); + free_xid(xid); + cifs_put_tlink(tlink); + return acl; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +int cifs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + struct posix_acl *acl, int type) +{ +#if defined(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) && defined(CONFIG_CIFS_POSIX) + int rc = -EOPNOTSUPP; + unsigned int xid; + struct super_block *sb = dentry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *pTcon; + const char *full_path; + void *page; + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + pTcon = tlink_tcon(tlink); + + xid = get_xid(); + page = alloc_dentry_path(); + + full_path = build_path_from_dentry(dentry, page); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + goto out; + } + + if (!acl) + goto out; + + /* return dos attributes as pseudo xattr */ + /* return alt name if available as pseudo attr */ + + /* if proc/fs/cifs/streamstoxattr is set then + search server for EAs or streams to + returns as xattrs */ + if (posix_acl_xattr_size(acl->a_count) > CIFSMaxBufSize) { + cifs_dbg(FYI, "size of EA value too large\n"); + rc = -EOPNOTSUPP; + goto out; + } + + switch (type) { + case ACL_TYPE_ACCESS: + if (sb->s_flags & SB_POSIXACL) + rc = cifs_do_set_acl(xid, pTcon, full_path, acl, + ACL_TYPE_ACCESS, + cifs_sb->local_nls, + cifs_remap(cifs_sb)); + break; + + case ACL_TYPE_DEFAULT: + if (sb->s_flags & SB_POSIXACL) + rc = cifs_do_set_acl(xid, pTcon, full_path, acl, + ACL_TYPE_DEFAULT, + cifs_sb->local_nls, + cifs_remap(cifs_sb)); + break; + } + +out: + free_dentry_path(page); + free_xid(xid); + cifs_put_tlink(tlink); + return rc; +#else + return -EOPNOTSUPP; +#endif +} diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c6ac19223ddc..040267ed8a64 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1133,6 +1133,8 @@ const struct inode_operations cifs_dir_inode_ops = { .symlink = cifs_symlink, .mknod = cifs_mknod, .listxattr = cifs_listxattr, + .get_acl = cifs_get_acl, + .set_acl = cifs_set_acl, }; const struct inode_operations cifs_file_inode_ops = { @@ -1141,10 +1143,36 @@ const struct inode_operations cifs_file_inode_ops = { .permission = cifs_permission, .listxattr = cifs_listxattr, .fiemap = cifs_fiemap, + .get_acl = cifs_get_acl, + .set_acl = cifs_set_acl, }; +const char *cifs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + char *target_path; + + target_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!target_path) + return ERR_PTR(-ENOMEM); + + spin_lock(&inode->i_lock); + if (likely(CIFS_I(inode)->symlink_target)) { + strscpy(target_path, CIFS_I(inode)->symlink_target, PATH_MAX); + } else { + kfree(target_path); + target_path = ERR_PTR(-EOPNOTSUPP); + } + spin_unlock(&inode->i_lock); + + if (!IS_ERR(target_path)) + set_delayed_call(done, kfree_link, target_path); + + return target_path; +} + const struct inode_operations cifs_symlink_inode_ops = { - .get_link = simple_get_link, + .get_link = cifs_get_link, .permission = cifs_permission, .listxattr = cifs_listxattr, }; @@ -1257,7 +1285,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, rc = filemap_write_and_wait_range(src_inode->i_mapping, off, off + len - 1); if (rc) - goto out; + goto unlock; /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); @@ -1273,6 +1301,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * that target is updated on the server */ CIFS_I(target_inode)->time = 0; + +unlock: /* although unlocking in the reverse order from locking is not * strictly necessary here it is a little cleaner to be consistent */ @@ -1302,8 +1332,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, ssize_t rc; struct cifsFileInfo *cfile = dst_file->private_data; - if (cfile->swapfile) - return -EOPNOTSUPP; + if (cfile->swapfile) { + rc = -EOPNOTSUPP; + free_xid(xid); + return rc; + } rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 5b4a7a32bdc5..388b745a978e 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 39 -#define CIFS_VERSION "2.39" +#define SMB3_PRODUCT_BUILD 40 +#define CIFS_VERSION "2.40" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 83e83d8beabb..f50f96e4ec30 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -224,6 +224,10 @@ extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, const char *, u32 *, u32); extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, const struct cifs_fid *, u32 *, u32); +extern struct posix_acl *cifs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type); +extern int cifs_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, struct posix_acl *acl, int type); extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); extern unsigned int setup_authusers_ACE(struct cifs_ace *pace); @@ -537,14 +541,14 @@ extern int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); extern int CIFSSMBSetCIFSACL(const unsigned int, struct cifs_tcon *, __u16, struct cifs_ntsd *, __u32, int); -extern int CIFSSMBGetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - char *acl_inf, const int buflen, const int acl_type, - const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *fileName, - const char *local_acl, const int buflen, const int acl_type, - const struct nls_table *nls_codepage, int remap_special_chars); +extern int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, + const unsigned char *searchName, + struct posix_acl **acl, const int acl_type, + const struct nls_table *nls_codepage, int remap); +extern int cifs_do_set_acl(const unsigned int xid, struct cifs_tcon *tcon, + const unsigned char *fileName, + const struct posix_acl *acl, const int acl_type, + const struct nls_table *nls_codepage, int remap); extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, const int netfid, __u64 *pExtAttrBits, __u64 *pMask); #endif /* CIFS_ALLOW_INSECURE_LEGACY */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1724066c1536..23f10e0d6e7e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2914,32 +2914,57 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, #ifdef CONFIG_CIFS_POSIX -/*Convert an Access Control Entry from wire format to local POSIX xattr format*/ -static void cifs_convert_ace(struct posix_acl_xattr_entry *ace, - struct cifs_posix_ace *cifs_ace) +#ifdef CONFIG_FS_POSIX_ACL +/** + * cifs_init_posix_acl - convert ACL from cifs to POSIX ACL format + * @ace: POSIX ACL entry to store converted ACL into + * @cifs_ace: ACL in cifs format + * + * Convert an Access Control Entry from wire format to local POSIX xattr + * format. + * + * Note that the @cifs_uid member is used to store both {g,u}id_t. + */ +static void cifs_init_posix_acl(struct posix_acl_entry *ace, + struct cifs_posix_ace *cifs_ace) { /* u8 cifs fields do not need le conversion */ - ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm); - ace->e_tag = cpu_to_le16(cifs_ace->cifs_e_tag); - ace->e_id = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid)); -/* - cifs_dbg(FYI, "perm %d tag %d id %d\n", - ace->e_perm, ace->e_tag, ace->e_id); -*/ + ace->e_perm = cifs_ace->cifs_e_perm; + ace->e_tag = cifs_ace->cifs_e_tag; + switch (ace->e_tag) { + case ACL_USER: + ace->e_uid = make_kuid(&init_user_ns, + le64_to_cpu(cifs_ace->cifs_uid)); + break; + case ACL_GROUP: + ace->e_gid = make_kgid(&init_user_ns, + le64_to_cpu(cifs_ace->cifs_uid)); + break; + } return; } -/* Convert ACL from CIFS POSIX wire format to local Linux POSIX ACL xattr */ -static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen, - const int acl_type, const int size_of_data_area) +/** + * cifs_to_posix_acl - copy cifs ACL format to POSIX ACL format + * @acl: ACLs returned in POSIX ACL format + * @src: ACLs in cifs format + * @acl_type: type of POSIX ACL requested + * @size_of_data_area: size of SMB we got + * + * This function converts ACLs from cifs format to POSIX ACL format. + * If @acl is NULL then the size of the buffer required to store POSIX ACLs in + * their uapi format is returned. + */ +static int cifs_to_posix_acl(struct posix_acl **acl, char *src, + const int acl_type, const int size_of_data_area) { int size = 0; - int i; __u16 count; struct cifs_posix_ace *pACE; struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)src; - struct posix_acl_xattr_header *local_acl = (void *)trgt; + struct posix_acl *kacl = NULL; + struct posix_acl_entry *pa, *pe; if (le16_to_cpu(cifs_acl->version) != CIFS_ACL_VERSION) return -EOPNOTSUPP; @@ -2959,7 +2984,7 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen, count = le16_to_cpu(cifs_acl->access_entry_count); size = sizeof(struct cifs_posix_acl); size += sizeof(struct cifs_posix_ace) * count; -/* skip past access ACEs to get to default ACEs */ + /* skip past access ACEs to get to default ACEs */ pACE = &cifs_acl->ace_array[count]; count = le16_to_cpu(cifs_acl->default_entry_count); size += sizeof(struct cifs_posix_ace) * count; @@ -2971,62 +2996,75 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen, return -EINVAL; } - size = posix_acl_xattr_size(count); - if ((buflen == 0) || (local_acl == NULL)) { - /* used to query ACL EA size */ - } else if (size > buflen) { - return -ERANGE; - } else /* buffer big enough */ { - struct posix_acl_xattr_entry *ace = (void *)(local_acl + 1); - - local_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); - for (i = 0; i < count ; i++) { - cifs_convert_ace(&ace[i], pACE); - pACE++; - } + /* Allocate number of POSIX ACLs to store in VFS format. */ + kacl = posix_acl_alloc(count, GFP_NOFS); + if (!kacl) + return -ENOMEM; + + FOREACH_ACL_ENTRY(pa, kacl, pe) { + cifs_init_posix_acl(pa, pACE); + pACE++; } - return size; + + *acl = kacl; + return 0; } -static void convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace, - const struct posix_acl_xattr_entry *local_ace) +/** + * cifs_init_ace - convert ACL entry from POSIX ACL to cifs format + * @cifs_ace: the cifs ACL entry to store into + * @local_ace: the POSIX ACL entry to convert + */ +static void cifs_init_ace(struct cifs_posix_ace *cifs_ace, + const struct posix_acl_entry *local_ace) { - cifs_ace->cifs_e_perm = le16_to_cpu(local_ace->e_perm); - cifs_ace->cifs_e_tag = le16_to_cpu(local_ace->e_tag); - /* BB is there a better way to handle the large uid? */ - if (local_ace->e_id == cpu_to_le32(-1)) { - /* Probably no need to le convert -1 on any arch but can not hurt */ + cifs_ace->cifs_e_perm = local_ace->e_perm; + cifs_ace->cifs_e_tag = local_ace->e_tag; + + switch (local_ace->e_tag) { + case ACL_USER: + cifs_ace->cifs_uid = + cpu_to_le64(from_kuid(&init_user_ns, local_ace->e_uid)); + break; + case ACL_GROUP: + cifs_ace->cifs_uid = + cpu_to_le64(from_kgid(&init_user_ns, local_ace->e_gid)); + break; + default: cifs_ace->cifs_uid = cpu_to_le64(-1); - } else - cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id)); -/* - cifs_dbg(FYI, "perm %d tag %d id %d\n", - ace->e_perm, ace->e_tag, ace->e_id); -*/ + } } -/* Convert ACL from local Linux POSIX xattr to CIFS POSIX ACL wire format */ -static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, - const int buflen, const int acl_type) +/** + * posix_acl_to_cifs - convert ACLs from POSIX ACL to cifs format + * @parm_data: ACLs in cifs format to conver to + * @acl: ACLs in POSIX ACL format to convert from + * @acl_type: the type of POSIX ACLs stored in @acl + * + * Return: the number cifs ACL entries after conversion + */ +static __u16 posix_acl_to_cifs(char *parm_data, const struct posix_acl *acl, + const int acl_type) { __u16 rc = 0; struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)parm_data; - struct posix_acl_xattr_header *local_acl = (void *)pACL; - struct posix_acl_xattr_entry *ace = (void *)(local_acl + 1); + const struct posix_acl_entry *pa, *pe; int count; - int i; + int i = 0; - if ((buflen == 0) || (pACL == NULL) || (cifs_acl == NULL)) + if ((acl == NULL) || (cifs_acl == NULL)) return 0; - count = posix_acl_xattr_count((size_t)buflen); - cifs_dbg(FYI, "setting acl with %d entries from buf of length %d and version of %d\n", - count, buflen, le32_to_cpu(local_acl->a_version)); - if (le32_to_cpu(local_acl->a_version) != 2) { - cifs_dbg(FYI, "unknown POSIX ACL version %d\n", - le32_to_cpu(local_acl->a_version)); - return 0; - } + count = acl->a_count; + cifs_dbg(FYI, "setting acl with %d entries\n", count); + + /* + * Note that the uapi POSIX ACL version is verified by the VFS and is + * independent of the cifs ACL version. Changing the POSIX ACL version + * is a uapi change and if it's changed we will pass down the POSIX ACL + * version in struct posix_acl from the VFS. For now there's really + * only one that all filesystems know how to deal with. + */ cifs_acl->version = cpu_to_le16(1); if (acl_type == ACL_TYPE_ACCESS) { cifs_acl->access_entry_count = cpu_to_le16(count); @@ -3038,8 +3076,9 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } - for (i = 0; i < count; i++) - convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &ace[i]); + FOREACH_ACL_ENTRY(pa, acl, pe) { + cifs_init_ace(&cifs_acl->ace_array[i++], pa); + } if (rc == 0) { rc = (__u16)(count * sizeof(struct cifs_posix_ace)); rc += sizeof(struct cifs_posix_acl); @@ -3048,11 +3087,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return rc; } -int -CIFSSMBGetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - char *acl_inf, const int buflen, const int acl_type, - const struct nls_table *nls_codepage, int remap) +int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, + const unsigned char *searchName, struct posix_acl **acl, + const int acl_type, const struct nls_table *nls_codepage, + int remap) { /* SMB_QUERY_POSIX_ACL */ TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -3124,23 +3162,26 @@ queryAclRetry: else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); __u16 count = le16_to_cpu(pSMBr->t2.DataCount); - rc = cifs_copy_posix_acl(acl_inf, + rc = cifs_to_posix_acl(acl, (char *)&pSMBr->hdr.Protocol+data_offset, - buflen, acl_type, count); + acl_type, count); } } cifs_buf_release(pSMB); + /* + * The else branch after SendReceive() doesn't return EAGAIN so if we + * allocated @acl in cifs_to_posix_acl() we are guaranteed to return + * here and don't leak POSIX ACLs. + */ if (rc == -EAGAIN) goto queryAclRetry; return rc; } -int -CIFSSMBSetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *fileName, - const char *local_acl, const int buflen, - const int acl_type, - const struct nls_table *nls_codepage, int remap) +int cifs_do_set_acl(const unsigned int xid, struct cifs_tcon *tcon, + const unsigned char *fileName, const struct posix_acl *acl, + const int acl_type, const struct nls_table *nls_codepage, + int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; @@ -3181,7 +3222,7 @@ setAclRetry: pSMB->ParameterOffset = cpu_to_le16(param_offset); /* convert to on the wire format for POSIX ACL */ - data_count = ACL_to_cifs_posix(parm_data, local_acl, buflen, acl_type); + data_count = posix_acl_to_cifs(parm_data, acl, acl_type); if (data_count == 0) { rc = -EOPNOTSUPP; @@ -3211,6 +3252,23 @@ setACLerrorExit: goto setAclRetry; return rc; } +#else +int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, + const unsigned char *searchName, struct posix_acl **acl, + const int acl_type, const struct nls_table *nls_codepage, + int remap) +{ + return -EOPNOTSUPP; +} + +int cifs_do_set_acl(const unsigned int xid, struct cifs_tcon *tcon, + const unsigned char *fileName, const struct posix_acl *acl, + const int acl_type, const struct nls_table *nls_codepage, + int remap) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_FS_POSIX_ACL */ int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ffb291579bb9..e80252a83225 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -759,7 +759,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, { struct msghdr smb_msg = {}; struct kvec iov = {.iov_base = buf, .iov_len = to_read}; - iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read); + iov_iter_kvec(&smb_msg.msg_iter, ITER_DEST, &iov, 1, to_read); return cifs_readv_from_socket(server, &smb_msg); } @@ -774,7 +774,7 @@ cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read) * and cifs_readv_from_socket sets msg_control and msg_controllen * so little to initialize in struct msghdr */ - iov_iter_discard(&smb_msg.msg_iter, READ, to_read); + iov_iter_discard(&smb_msg.msg_iter, ITER_DEST, to_read); return cifs_readv_from_socket(server, &smb_msg); } @@ -786,7 +786,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, struct msghdr smb_msg = {}; struct bio_vec bv = { .bv_page = page, .bv_len = to_read, .bv_offset = page_offset}; - iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read); + iov_iter_bvec(&smb_msg.msg_iter, ITER_DEST, &bv, 1, to_read); return cifs_readv_from_socket(server, &smb_msg); } @@ -1584,6 +1584,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) server->session_key.response = NULL; server->session_key.len = 0; kfree(server->hostname); + server->hostname = NULL; task = xchg(&server->tsk, NULL); if (task) @@ -3854,9 +3855,13 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: - free_xid(mnt_ctx.xid); cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); - return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + if (rc) + goto error; + + free_xid(mnt_ctx.xid); + return rc; error: dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); @@ -3883,8 +3888,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; } + rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + if (rc) + goto error; + free_xid(mnt_ctx.xid); - return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + return rc; error: mount_put_conns(&mnt_ctx); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index a5c73c2af3a2..8b1c37158556 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -543,8 +543,10 @@ int cifs_create(struct user_namespace *mnt_userns, struct inode *inode, cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n", inode, direntry, direntry); - if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) - return -EIO; + if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) { + rc = -EIO; + goto out_free_xid; + } tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); rc = PTR_ERR(tlink); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index f6ffee514c34..87b56b1ae117 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1413,7 +1413,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) struct inode *inode = d_inode(cfile->dentry); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct file_lock *flock; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); unsigned int count = 0, i; int rc = 0, xid, type; struct list_head locks_to_send, *el; @@ -1885,11 +1885,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) struct cifsFileInfo *cfile; __u32 type; - rc = -EACCES; xid = get_xid(); - if (!(fl->fl_flags & FL_FLOCK)) - return -ENOLCK; + if (!(fl->fl_flags & FL_FLOCK)) { + rc = -ENOLCK; + free_xid(xid); + return rc; + } cfile = (struct cifsFileInfo *)file->private_data; tcon = tlink_tcon(cfile->tlink); @@ -1908,8 +1910,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) * if no lock or unlock then nothing to do since we do not * know what it is */ + rc = -EOPNOTSUPP; free_xid(xid); - return -EOPNOTSUPP; + return rc; } rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, @@ -2431,12 +2434,16 @@ cifs_writev_complete(struct work_struct *work) struct cifs_writedata * cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete) { + struct cifs_writedata *writedata = NULL; struct page **pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); - if (pages) - return cifs_writedata_direct_alloc(pages, complete); + if (pages) { + writedata = cifs_writedata_direct_alloc(pages, complete); + if (!writedata) + kvfree(pages); + } - return NULL; + return writedata; } struct cifs_writedata * @@ -3296,6 +3303,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, cifs_uncached_writev_complete); if (!wdata) { rc = -ENOMEM; + for (i = 0; i < nr_pages; i++) + put_page(pagevec[i]); + kvfree(pagevec); add_credits_and_wake_if(server, credits, 0); break; } @@ -3522,7 +3532,7 @@ static ssize_t __cifs_writev( ctx->iter = *from; ctx->len = len; } else { - rc = setup_aio_ctx_iter(ctx, from, WRITE); + rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE); if (rc) { kref_put(&ctx->refcount, cifs_aio_ctx_release); return rc; @@ -4266,7 +4276,7 @@ static ssize_t __cifs_readv( ctx->iter = *to; ctx->len = len; } else { - rc = setup_aio_ctx_iter(ctx, to, READ); + rc = setup_aio_ctx_iter(ctx, to, ITER_DEST); if (rc) { kref_put(&ctx->refcount, cifs_aio_ctx_release); return rc; diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index a1751b956318..f6f3a6b75601 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -150,7 +150,7 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page) bvec[0].bv_page = page; bvec[0].bv_offset = 0; bvec[0].bv_len = PAGE_SIZE; - iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); ret = fscache_begin_read_operation(&cres, cookie); if (ret < 0) @@ -180,7 +180,7 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page, bvec[0].bv_page = page; bvec[0].bv_offset = 0; bvec[0].bv_len = PAGE_SIZE; - iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); ret = fscache_begin_write_operation(&cres, cookie); if (ret < 0) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7cf96e581d24..4e2ca3c6e5c0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -215,11 +215,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) kfree(cifs_i->symlink_target); cifs_i->symlink_target = fattr->cf_symlink_target; fattr->cf_symlink_target = NULL; - - if (unlikely(!cifs_i->symlink_target)) - inode->i_link = ERR_PTR(-EOPNOTSUPP); - else - inode->i_link = cifs_i->symlink_target; } spin_unlock(&inode->i_lock); @@ -368,8 +363,10 @@ cifs_get_file_info_unix(struct file *filp) if (cfile->symlink_target) { fattr.cf_symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); - if (!fattr.cf_symlink_target) - return -ENOMEM; + if (!fattr.cf_symlink_target) { + rc = -ENOMEM; + goto cifs_gfiunix_out; + } } rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 89d5fa887364..6419ec47c2a8 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -343,7 +343,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); - if (rc != EOPNOTSUPP) + if (rc != -EOPNOTSUPP) break; } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -373,7 +373,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * pSMBFile->fid.netfid, * extAttrBits, * &ExtAttrMask); - * if (rc != EOPNOTSUPP) + * if (rc != -EOPNOTSUPP) * break; */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index da51ffd02928..3e68d8208cf5 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -400,6 +400,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) { struct smb_hdr *buf = (struct smb_hdr *)buffer; struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsInodeInfo *pCifsInode; @@ -464,9 +465,12 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) return false; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(srv) ? srv->primary_server : srv; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &srv->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0435d1dfa9e1..9e7d9f0baa18 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* now drop the ref to the current iface */ if (old_iface && iface) { - kref_put(&old_iface->refcount, release_iface); cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); - } else if (old_iface) { kref_put(&old_iface->refcount, release_iface); + } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); @@ -496,6 +496,7 @@ out: cifs_put_tcp_session(chan->server, 0); } + free_xid(xid); return rc; } diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index a6640e6ea58b..68e08c85fbb8 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -655,6 +655,7 @@ int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { + drop_cached_dir_by_name(xid, tcon, name, cifs_sb); return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE, NULL, SMB2_OP_RMDIR, NULL, NULL, NULL); @@ -698,6 +699,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, { struct cifsFileInfo *cfile; + drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb); cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile); return smb2_set_path_attr(xid, tcon, from_name, to_name, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index a38720477966..572293c18e16 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -135,6 +135,7 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, int smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) { + struct TCP_Server_Info *pserver; struct smb2_hdr *shdr = (struct smb2_hdr *)buf; struct smb2_pdu *pdu = (struct smb2_pdu *)shdr; int hdr_size = sizeof(struct smb2_hdr); @@ -143,6 +144,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) __u32 calc_len; /* calculated length */ __u64 mid; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* * Add function to do table lookup of StructureSize by command * ie Validate the wct via smb2_struct_sizes table above @@ -155,7 +159,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) /* decrypt frame now that it is completely read in */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(iter, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(iter, &pserver->smb_ses_list, smb_ses_list) { if (iter->Suid == le64_to_cpu(thdr->SessionId)) { ses = iter; break; @@ -608,51 +612,52 @@ smb2_tcon_find_pending_open_lease(struct cifs_tcon *tcon, } static bool -smb2_is_valid_lease_break(char *buffer) +smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) { struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; - struct TCP_Server_Info *server; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_pending_open *open; cifs_dbg(FYI, "Checking for lease break\n"); + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { - spin_lock(&tcon->open_file_lock); - cifs_stats_inc( - &tcon->stats.cifs_stats.num_oplock_brks); - if (smb2_tcon_has_lease(tcon, rsp)) { - spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } - open = smb2_tcon_find_pending_open_lease(tcon, - rsp); - if (open) { - __u8 lease_key[SMB2_LEASE_KEY_SIZE]; - struct tcon_link *tlink; - - tlink = cifs_get_tlink(open->tlink); - memcpy(lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - smb2_queue_pending_open_break(tlink, - lease_key, - rsp->NewLeaseState); - return true; - } + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + spin_lock(&tcon->open_file_lock); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp)) { spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + open = smb2_tcon_find_pending_open_lease(tcon, + rsp); + if (open) { + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; + struct tcon_link *tlink; + + tlink = cifs_get_tlink(open->tlink); + memcpy(lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + smb2_queue_pending_open_break(tlink, + lease_key, + rsp->NewLeaseState); + return true; + } + spin_unlock(&tcon->open_file_lock); - if (cached_dir_lease_break(tcon, rsp->LeaseKey)) { - spin_unlock(&cifs_tcp_ses_lock); - return true; - } + if (cached_dir_lease_break(tcon, rsp->LeaseKey)) { + spin_unlock(&cifs_tcp_ses_lock); + return true; } } } @@ -671,6 +676,7 @@ bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) { struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode; @@ -684,16 +690,19 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer); + return smb2_is_valid_lease_break(buffer, server); else return false; } cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel); + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 17b25153cb68..32b3877b538a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,6 +530,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); + ses->iface_count = 0; /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -651,9 +652,9 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, kref_put(&iface->refcount, release_iface); } else list_add_tail(&info->iface_head, &ses->iface_list); - spin_unlock(&ses->iface_lock); ses->iface_count++; + spin_unlock(&ses->iface_lock); ses->iface_last_update = jiffies; next_iface: nb_iface++; @@ -1115,6 +1116,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto sea_exit; smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); @@ -1125,6 +1128,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rqst[2].rq_nvec = 1; rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); + if (rc) + goto sea_exit; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, server, @@ -2301,14 +2306,18 @@ static void smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) { struct smb2_hdr *shdr = (struct smb2_hdr *)buf; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; if (shdr->Status != STATUS_NETWORK_NAME_DELETED) return; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); @@ -4263,21 +4272,23 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) static int smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) { + struct TCP_Server_Info *pserver; struct cifs_ses *ses; u8 *ses_enc_key; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - if (ses->Suid == ses_id) { - spin_lock(&ses->ses_lock); - ses_enc_key = enc ? ses->smb3encryptionkey : - ses->smb3decryptionkey; - memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); - spin_unlock(&ses->ses_lock); - spin_unlock(&cifs_tcp_ses_lock); - return 0; - } + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (ses->Suid == ses_id) { + spin_lock(&ses->ses_lock); + ses_enc_key = enc ? ses->smb3encryptionkey : + ses->smb3decryptionkey; + memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + return 0; } } spin_unlock(&cifs_tcp_ses_lock); @@ -4712,13 +4723,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return 0; } - iov_iter_bvec(&iter, WRITE, bvec, npages, data_len); + iov_iter_bvec(&iter, ITER_SOURCE, bvec, npages, data_len); } else if (buf_len >= data_offset + data_len) { /* read response payload is in buf */ WARN_ONCE(npages > 0, "read data can be either in buf or in pages"); iov.iov_base = buf + data_offset; iov.iov_len = data_len; - iov_iter_kvec(&iter, WRITE, &iov, 1, data_len); + iov_iter_kvec(&iter, ITER_SOURCE, &iov, 1, data_len); } else { /* read response payload cannot be in both buf and pages */ WARN_ONCE(1, "buf can not contain only a part of read data"); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a2384509ea84..a5695748a89b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1341,14 +1341,13 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) static void SMB2_sess_free_buffer(struct SMB2_sess_data *sess_data) { - int i; + struct kvec *iov = sess_data->iov; - /* zero the session data before freeing, as it might contain sensitive info (keys, etc) */ - for (i = 0; i < 2; i++) - if (sess_data->iov[i].iov_base) - memzero_explicit(sess_data->iov[i].iov_base, sess_data->iov[i].iov_len); + /* iov[1] is already freed by caller */ + if (sess_data->buf0_type != CIFS_NO_BUFFER && iov[0].iov_base) + memzero_explicit(iov[0].iov_base, iov[0].iov_len); - free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base); + free_rsp_buf(sess_data->buf0_type, iov[0].iov_base); sess_data->buf0_type = CIFS_NO_BUFFER; } @@ -1531,7 +1530,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) &blob_length, ses, server, sess_data->nls_cp); if (rc) - goto out_err; + goto out; if (use_spnego) { /* BB eventually need to add this */ @@ -1578,7 +1577,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) } out: - memzero_explicit(ntlmssp_blob, blob_length); + kfree_sensitive(ntlmssp_blob); SMB2_sess_free_buffer(sess_data); if (!rc) { sess_data->result = 0; @@ -1662,7 +1661,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) } #endif out: - memzero_explicit(ntlmssp_blob, blob_length); + kfree_sensitive(ntlmssp_blob); SMB2_sess_free_buffer(sess_data); kfree_sensitive(ses->ntlmssp); ses->ntlmssp = NULL; diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 8e3f26e6f6b9..381babc1212c 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -77,18 +77,19 @@ static int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; + struct TCP_Server_Info *pserver; struct cifs_ses *ses = NULL; - struct TCP_Server_Info *it = NULL; int i; int rc = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) { - if (ses->Suid == ses_id) - goto found; - } + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (ses->Suid == ses_id) + goto found; } cifs_server_dbg(VFS, "%s: Could not find session 0x%llx\n", __func__, ses_id); @@ -136,9 +137,13 @@ out: static struct cifs_ses * smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) { + struct TCP_Server_Info *pserver; struct cifs_ses *ses; - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { if (ses->Suid != ses_id) continue; ++ses->ses_count; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 575fa8f58342..3851d0aaa288 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -347,7 +347,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, .iov_base = &rfc1002_marker, .iov_len = 4 }; - iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4); + iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) goto unmask; @@ -368,7 +368,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, size += iov[i].iov_len; } - iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size); + iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) @@ -384,7 +384,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, rqst_page_get_length(&rqst[j], i, &bvec.bv_len, &bvec.bv_offset); - iov_iter_bvec(&smb_msg.msg_iter, WRITE, + iov_iter_bvec(&smb_msg.msg_iter, ITER_SOURCE, &bvec, 1, bvec.bv_len); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 998fa51f9b68..5f2fb2fd2e37 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -200,32 +200,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler, } break; } - -#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - case XATTR_ACL_ACCESS: -#ifdef CONFIG_CIFS_POSIX - if (!value) - goto out; - if (sb->s_flags & SB_POSIXACL) - rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, - value, (const int)size, - ACL_TYPE_ACCESS, cifs_sb->local_nls, - cifs_remap(cifs_sb)); -#endif /* CONFIG_CIFS_POSIX */ - break; - - case XATTR_ACL_DEFAULT: -#ifdef CONFIG_CIFS_POSIX - if (!value) - goto out; - if (sb->s_flags & SB_POSIXACL) - rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, - value, (const int)size, - ACL_TYPE_DEFAULT, cifs_sb->local_nls, - cifs_remap(cifs_sb)); -#endif /* CONFIG_CIFS_POSIX */ - break; -#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ } out: @@ -366,27 +340,6 @@ static int cifs_xattr_get(const struct xattr_handler *handler, } break; } -#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - case XATTR_ACL_ACCESS: -#ifdef CONFIG_CIFS_POSIX - if (sb->s_flags & SB_POSIXACL) - rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, - value, size, ACL_TYPE_ACCESS, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); -#endif /* CONFIG_CIFS_POSIX */ - break; - - case XATTR_ACL_DEFAULT: -#ifdef CONFIG_CIFS_POSIX - if (sb->s_flags & SB_POSIXACL) - rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, - value, size, ACL_TYPE_DEFAULT, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); -#endif /* CONFIG_CIFS_POSIX */ - break; -#endif /* ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ } /* We could add an additional check for streams ie @@ -525,21 +478,6 @@ static const struct xattr_handler smb3_ntsd_full_xattr_handler = { .set = cifs_xattr_set, }; - -static const struct xattr_handler cifs_posix_acl_access_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - .flags = XATTR_ACL_ACCESS, - .get = cifs_xattr_get, - .set = cifs_xattr_set, -}; - -static const struct xattr_handler cifs_posix_acl_default_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_DEFAULT, - .flags = XATTR_ACL_DEFAULT, - .get = cifs_xattr_get, - .set = cifs_xattr_set, -}; - const struct xattr_handler *cifs_xattr_handlers[] = { &cifs_user_xattr_handler, &cifs_os2_xattr_handler, @@ -549,7 +487,9 @@ const struct xattr_handler *cifs_xattr_handlers[] = { &smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_cifs_ntsd_full_xattr_handler, &smb3_ntsd_full_xattr_handler, /* alias for above since avoiding "cifs" */ - &cifs_posix_acl_access_xattr_handler, - &cifs_posix_acl_default_xattr_handler, +#ifdef CONFIG_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif NULL }; diff --git a/fs/coredump.c b/fs/coredump.c index 7bad7785e8e6..9a745d08c57f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -325,6 +325,10 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, err = cn_printf(cn, "%lu", rlimit(RLIMIT_CORE)); break; + /* CPU the task ran on */ + case 'C': + err = cn_printf(cn, "%d", cprm->cpu); + break; default: break; } @@ -525,7 +529,6 @@ void do_coredump(const kernel_siginfo_t *siginfo) static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .siginfo = siginfo, - .regs = signal_pt_regs(), .limit = rlimit(RLIMIT_CORE), /* * We must use the same mm->flags while dumping core to avoid @@ -534,6 +537,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) */ .mm_flags = mm->flags, .vma_meta = NULL, + .cpu = raw_smp_processor_id(), }; audit_core_dumps(siginfo->si_signo); @@ -853,7 +857,7 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) if (dump_interrupted()) return 0; pos = file->f_pos; - iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE); + iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); n = __kernel_write_iter(cprm->file, &iter, &pos); if (n != PAGE_SIZE) return 0; diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 1cca09aa43f8..2a24b1f0ae68 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -205,14 +205,19 @@ static int allocate_filesystem_keyring(struct super_block *sb) } /* - * This is called at unmount time to release all encryption keys that have been - * added to the filesystem, along with the keyring that contains them. + * Release all encryption keys that have been added to the filesystem, along + * with the keyring that contains them. * - * Note that besides clearing and freeing memory, this might need to evict keys - * from the keyslots of an inline crypto engine. Therefore, this must be called - * while the filesystem's underlying block device(s) are still available. + * This is called at unmount time. The filesystem's underlying block device(s) + * are still available at this time; this is important because after user file + * accesses have been allowed, this function may need to evict keys from the + * keyslots of an inline crypto engine, which requires the block device(s). + * + * This is also called when the super_block is being freed. This is needed to + * avoid a memory leak if mounting fails after the "test_dummy_encryption" + * option was processed, as in that case the unmount-time call isn't made. */ -void fscrypt_sb_delete(struct super_block *sb) +void fscrypt_destroy_keyring(struct super_block *sb) { struct fscrypt_keyring *keyring = sb->s_master_keys; size_t i; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index ddb3fc258df9..b54f470e0d03 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -378,8 +378,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, } EXPORT_SYMBOL_GPL(debugfs_attr_read); -ssize_t debugfs_attr_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) +static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, + size_t len, loff_t *ppos, bool is_signed) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; @@ -387,12 +387,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; - ret = simple_attr_write(file, buf, len, ppos); + if (is_signed) + ret = simple_attr_write_signed(file, buf, len, ppos); + else + ret = simple_attr_write(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } + +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return debugfs_attr_write_xsigned(file, buf, len, ppos, false); +} EXPORT_SYMBOL_GPL(debugfs_attr_write); +ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return debugfs_attr_write_xsigned(file, buf, len, ppos, true); +} +EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); + static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, @@ -738,11 +754,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val) *val = atomic_read((atomic_t *)data); return 0; } -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get, debugfs_atomic_t_set, "%lld\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); /** diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index c214fe0981bd..f3cd00fac9c3 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -18,6 +18,8 @@ #include <linux/fs_stack.h> #include <linux/slab.h> #include <linux/xattr.h> +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> #include <linux/fileattr.h> #include <asm/unaligned.h> #include "ecryptfs_kernel.h" @@ -1120,6 +1122,28 @@ static int ecryptfs_fileattr_set(struct user_namespace *mnt_userns, return rc; } +static struct posix_acl *ecryptfs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type) +{ + return vfs_get_acl(mnt_userns, ecryptfs_dentry_to_lower(dentry), + posix_acl_xattr_name(type)); +} + +static int ecryptfs_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, struct posix_acl *acl, + int type) +{ + int rc; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_inode = d_inode(lower_dentry); + + rc = vfs_set_acl(&init_user_ns, lower_dentry, + posix_acl_xattr_name(type), acl); + if (!rc) + fsstack_copy_attr_all(d_inode(dentry), lower_inode); + return rc; +} + const struct inode_operations ecryptfs_symlink_iops = { .get_link = ecryptfs_get_link, .permission = ecryptfs_permission, @@ -1143,6 +1167,8 @@ const struct inode_operations ecryptfs_dir_iops = { .listxattr = ecryptfs_listxattr, .fileattr_get = ecryptfs_fileattr_get, .fileattr_set = ecryptfs_fileattr_set, + .get_acl = ecryptfs_get_acl, + .set_acl = ecryptfs_set_acl, }; const struct inode_operations ecryptfs_main_iops = { @@ -1152,6 +1178,8 @@ const struct inode_operations ecryptfs_main_iops = { .listxattr = ecryptfs_listxattr, .fileattr_get = ecryptfs_fileattr_get, .fileattr_set = ecryptfs_fileattr_set, + .get_acl = ecryptfs_get_acl, + .set_acl = ecryptfs_set_acl, }; static int ecryptfs_xattr_get(const struct xattr_handler *handler, @@ -1182,6 +1210,10 @@ static const struct xattr_handler ecryptfs_xattr_handler = { }; const struct xattr_handler *ecryptfs_xattr_handlers[] = { +#ifdef CONFIG_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif &ecryptfs_xattr_handler, NULL }; diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index a0ef63cfcecb..9e4f47808bd5 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -651,22 +651,6 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, if (err) return err; - /* - * Ensure that the available space hasn't shrunk below the safe level - */ - status = check_var_size(attributes, *size + ucs2_strsize(name, 1024)); - if (status != EFI_SUCCESS) { - if (status != EFI_UNSUPPORTED) { - err = efi_status_to_err(status); - goto out; - } - - if (*size > 65536) { - err = -ENOSPC; - goto out; - } - } - status = efivar_set_variable_locked(name, vendor, attributes, *size, data, false); if (status != EFI_SUCCESS) { diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 998cd26a1b3b..4c837be3b6e3 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -75,11 +75,15 @@ static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos = - (folio_index(folio) - start_page) * PAGE_SIZE; - unsigned int pgend = pgpos + folio_size(folio); + unsigned int pgpos, pgend; bool pg_failed = false; + if (xas_retry(&xas, folio)) + continue; + + pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; + pgend = pgpos + folio_size(folio); + for (;;) { if (!subreq) { pg_failed = true; @@ -190,7 +194,7 @@ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, atomic_inc(&rreq->nr_outstanding); - iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, + iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, start + done, subreq->len); ret = fscache_read(cres, subreq->start, &iter, @@ -286,23 +290,26 @@ static int erofs_fscache_data_read(struct address_space *mapping, if (IS_ERR(src)) return PTR_ERR(src); - iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); - if (copy_to_iter(src + offset, size, &iter) != size) + iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); + if (copy_to_iter(src + offset, size, &iter) != size) { + erofs_put_metabuf(&buf); return -EFAULT; + } iov_iter_zero(PAGE_SIZE - size, &iter); erofs_put_metabuf(&buf); return PAGE_SIZE; } - count = min_t(size_t, map.m_llen - (pos - map.m_la), len); - DBG_BUGON(!count || count % PAGE_SIZE); - if (!(map.m_flags & EROFS_MAP_MAPPED)) { - iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); + count = len; + iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); iov_iter_zero(count, &iter); return count; } + count = min_t(size_t, map.m_llen - (pos - map.m_la), len); + DBG_BUGON(!count || count % PAGE_SIZE); + mdev = (struct erofs_map_dev) { .m_deviceid = map.m_deviceid, .m_pa = map.m_pa, @@ -403,13 +410,13 @@ static void erofs_fscache_domain_put(struct erofs_domain *domain) static int erofs_fscache_register_volume(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - char *domain_id = sbi->opt.domain_id; + char *domain_id = sbi->domain_id; struct fscache_volume *volume; char *name; int ret = 0; name = kasprintf(GFP_KERNEL, "erofs,%s", - domain_id ? domain_id : sbi->opt.fsid); + domain_id ? domain_id : sbi->fsid); if (!name) return -ENOMEM; @@ -435,7 +442,7 @@ static int erofs_fscache_init_domain(struct super_block *sb) if (!domain) return -ENOMEM; - domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL); + domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); if (!domain->domain_id) { kfree(domain); return -ENOMEM; @@ -472,7 +479,7 @@ static int erofs_fscache_register_domain(struct super_block *sb) mutex_lock(&erofs_domain_list_lock); list_for_each_entry(domain, &erofs_domain_list, list) { - if (!strcmp(domain->domain_id, sbi->opt.domain_id)) { + if (!strcmp(domain->domain_id, sbi->domain_id)) { sbi->domain = domain; sbi->volume = domain->volume; refcount_inc(&domain->ref); @@ -590,14 +597,17 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, struct super_block *psb = erofs_pseudo_mnt->mnt_sb; mutex_lock(&erofs_domain_cookies_lock); + spin_lock(&psb->s_inode_list_lock); list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { ctx = inode->i_private; if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) continue; igrab(inode); + spin_unlock(&psb->s_inode_list_lock); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } + spin_unlock(&psb->s_inode_list_lock); ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); mutex_unlock(&erofs_domain_cookies_lock); return ctx; @@ -606,7 +616,7 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, char *name, bool need_inode) { - if (EROFS_SB(sb)->opt.domain_id) + if (EROFS_SB(sb)->domain_id) return erofs_domain_register_cookie(sb, name, need_inode); return erofs_fscache_acquire_cookie(sb, name, need_inode); } @@ -638,7 +648,7 @@ int erofs_fscache_register_fs(struct super_block *sb) struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_fscache *fscache; - if (sbi->opt.domain_id) + if (sbi->domain_id) ret = erofs_fscache_register_domain(sb); else ret = erofs_fscache_register_volume(sb); @@ -646,7 +656,7 @@ int erofs_fscache_register_fs(struct super_block *sb) return ret; /* acquired domain/volume will be relinquished in kill_sb() on error */ - fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true); + fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true); if (IS_ERR(fscache)) return PTR_ERR(fscache); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index ad2a82f2eb4c..2d571343deec 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -371,7 +371,7 @@ int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, const struct inode_operations erofs_generic_iops = { .getattr = erofs_getattr, .listxattr = erofs_listxattr, - .get_acl = erofs_get_acl, + .get_inode_acl = erofs_get_acl, .fiemap = erofs_fiemap, }; @@ -379,12 +379,12 @@ const struct inode_operations erofs_symlink_iops = { .get_link = page_get_link, .getattr = erofs_getattr, .listxattr = erofs_listxattr, - .get_acl = erofs_get_acl, + .get_inode_acl = erofs_get_acl, }; const struct inode_operations erofs_fast_symlink_iops = { .get_link = simple_get_link, .getattr = erofs_getattr, .listxattr = erofs_listxattr, - .get_acl = erofs_get_acl, + .get_inode_acl = erofs_get_acl, }; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 1701df48c446..05dc68627722 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -75,8 +75,6 @@ struct erofs_mount_opts { unsigned int max_sync_decompress_pages; #endif unsigned int mount_opt; - char *fsid; - char *domain_id; }; struct erofs_dev_context { @@ -89,6 +87,8 @@ struct erofs_dev_context { struct erofs_fs_context { struct erofs_mount_opts opt; struct erofs_dev_context *devs; + char *fsid; + char *domain_id; }; /* all filesystem-wide lz4 configurations */ @@ -170,6 +170,8 @@ struct erofs_sb_info { struct fscache_volume *volume; struct erofs_fscache *s_fscache; struct erofs_domain *domain; + char *fsid; + char *domain_id; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 0dc34721080c..b64a108fac92 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -228,6 +228,6 @@ const struct inode_operations erofs_dir_iops = { .lookup = erofs_lookup, .getattr = erofs_getattr, .listxattr = erofs_listxattr, - .get_acl = erofs_get_acl, + .get_inode_acl = erofs_get_acl, .fiemap = erofs_fiemap, }; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 2cf96ce1c32e..1c7dcca702b3 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -579,9 +579,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, break; case Opt_fsid: #ifdef CONFIG_EROFS_FS_ONDEMAND - kfree(ctx->opt.fsid); - ctx->opt.fsid = kstrdup(param->string, GFP_KERNEL); - if (!ctx->opt.fsid) + kfree(ctx->fsid); + ctx->fsid = kstrdup(param->string, GFP_KERNEL); + if (!ctx->fsid) return -ENOMEM; #else errorfc(fc, "fsid option not supported"); @@ -589,9 +589,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, break; case Opt_domain_id: #ifdef CONFIG_EROFS_FS_ONDEMAND - kfree(ctx->opt.domain_id); - ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL); - if (!ctx->opt.domain_id) + kfree(ctx->domain_id); + ctx->domain_id = kstrdup(param->string, GFP_KERNEL); + if (!ctx->domain_id) return -ENOMEM; #else errorfc(fc, "domain_id option not supported"); @@ -728,10 +728,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_fs_info = sbi; sbi->opt = ctx->opt; - ctx->opt.fsid = NULL; - ctx->opt.domain_id = NULL; sbi->devs = ctx->devs; ctx->devs = NULL; + sbi->fsid = ctx->fsid; + ctx->fsid = NULL; + sbi->domain_id = ctx->domain_id; + ctx->domain_id = NULL; if (erofs_is_fscache_mode(sb)) { sb->s_blocksize = EROFS_BLKSIZ; @@ -820,7 +822,7 @@ static int erofs_fc_get_tree(struct fs_context *fc) { struct erofs_fs_context *ctx = fc->fs_private; - if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) return get_tree_nodev(fc, erofs_fc_fill_super); return get_tree_bdev(fc, erofs_fc_fill_super); @@ -834,6 +836,9 @@ static int erofs_fc_reconfigure(struct fs_context *fc) DBG_BUGON(!sb_rdonly(sb)); + if (ctx->fsid || ctx->domain_id) + erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); + if (test_opt(&ctx->opt, POSIX_ACL)) fc->sb_flags |= SB_POSIXACL; else @@ -873,8 +878,8 @@ static void erofs_fc_free(struct fs_context *fc) struct erofs_fs_context *ctx = fc->fs_private; erofs_free_dev_context(ctx->devs); - kfree(ctx->opt.fsid); - kfree(ctx->opt.domain_id); + kfree(ctx->fsid); + kfree(ctx->domain_id); kfree(ctx); } @@ -944,8 +949,8 @@ static void erofs_kill_sb(struct super_block *sb) erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev, NULL); erofs_fscache_unregister_fs(sb); - kfree(sbi->opt.fsid); - kfree(sbi->opt.domain_id); + kfree(sbi->fsid); + kfree(sbi->domain_id); kfree(sbi); sb->s_fs_info = NULL; } @@ -1098,10 +1103,10 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(opt, DAX_NEVER)) seq_puts(seq, ",dax=never"); #ifdef CONFIG_EROFS_FS_ONDEMAND - if (opt->fsid) - seq_printf(seq, ",fsid=%s", opt->fsid); - if (opt->domain_id) - seq_printf(seq, ",domain_id=%s", opt->domain_id); + if (sbi->fsid) + seq_printf(seq, ",fsid=%s", sbi->fsid); + if (sbi->domain_id) + seq_printf(seq, ",domain_id=%s", sbi->domain_id); #endif return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index 783bb7b21b51..fd476961f742 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -210,14 +210,14 @@ int erofs_register_sysfs(struct super_block *sb) int err; if (erofs_is_fscache_mode(sb)) { - if (sbi->opt.domain_id) { - str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id, - sbi->opt.fsid); + if (sbi->domain_id) { + str = kasprintf(GFP_KERNEL, "%s,%s", sbi->domain_id, + sbi->fsid); if (!str) return -ENOMEM; name = str; } else { - name = sbi->opt.fsid; + name = sbi->fsid; } } else { name = sb->s_id; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 559380a535af..b792d424d774 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -660,6 +660,9 @@ static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, u8 *src, *dst; unsigned int i, cnt; + if (!packed_inode) + return -EFSCORRUPTED; + pos += EROFS_I(inode)->z_fragmentoff; for (i = 0; i < len; i += cnt) { cnt = min_t(unsigned int, len - i, @@ -813,15 +816,14 @@ retry: ++spiltted; if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; - - if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && - !(map->m_flags & EROFS_MAP_PARTIAL_REF) && - fe->pcl->length == map->m_llen) - fe->pcl->partial = false; if (fe->pcl->length < offset + end - map->m_la) { fe->pcl->length = offset + end - map->m_la; fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK; } + if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && + fe->pcl->length == map->m_llen) + fe->pcl->partial = false; next_part: /* shorten the remaining extent to update progress */ map->m_llen = offset + cur - map->m_la; @@ -888,15 +890,13 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) { unsigned int pgnr; - struct page *oldpage; pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; DBG_BUGON(pgnr >= be->nr_pages); - oldpage = be->decompressed_pages[pgnr]; - be->decompressed_pages[pgnr] = bvec->page; - - if (!oldpage) + if (!be->decompressed_pages[pgnr]) { + be->decompressed_pages[pgnr] = bvec->page; return; + } } /* (cold path) one pcluster is requested multiple times */ @@ -1415,8 +1415,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, struct block_device *last_bdev; unsigned int nr_bios = 0; struct bio *bio = NULL; - /* initialize to 1 to make skip psi_memstall_leave unless needed */ - unsigned long pflags = 1; + unsigned long pflags; + int memstall = 0; bi_private = jobqueueset_init(sb, q, fgq, force_fg); qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; @@ -1466,14 +1466,18 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, if (bio && (cur != last_index + 1 || last_bdev != mdev.m_bdev)) { submit_bio_retry: - if (!pflags) - psi_memstall_leave(&pflags); submit_bio(bio); + if (memstall) { + psi_memstall_leave(&pflags); + memstall = 0; + } bio = NULL; } - if (unlikely(PageWorkingset(page))) + if (unlikely(PageWorkingset(page)) && !memstall) { psi_memstall_enter(&pflags); + memstall = 1; + } if (!bio) { bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, @@ -1503,9 +1507,9 @@ submit_bio_retry: } while (owned_head != Z_EROFS_PCLUSTER_TAIL); if (bio) { - if (!pflags) - psi_memstall_leave(&pflags); submit_bio(bio); + if (memstall) + psi_memstall_leave(&pflags); } /* diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index e7f04c4fbb81..d98c95212985 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -126,10 +126,10 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) } /* - * bit 31: I/O error occurred on this page - * bit 0 - 30: remaining parts to complete this page + * bit 30: I/O error occurred on this page + * bit 0 - 29: remaining parts to complete this page */ -#define Z_EROFS_PAGE_EIO (1 << 31) +#define Z_EROFS_PAGE_EIO (1 << 30) static inline void z_erofs_onlinepage_init(struct page *page) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 44c27ef39c43..0bb66927e3d0 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -57,8 +57,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); - kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), - EROFS_KMAP_ATOMIC); + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); if (IS_ERR(kaddr)) { err = PTR_ERR(kaddr); goto out_unlock; @@ -73,7 +72,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); vi->z_tailextent_headlcn = 0; - goto unmap_done; + goto done; } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; @@ -85,7 +84,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; - goto unmap_done; + goto out_put_metabuf; } vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); @@ -95,7 +94,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto unmap_done; + goto out_put_metabuf; } if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ @@ -103,12 +102,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto unmap_done; + goto out_put_metabuf; } -unmap_done: - erofs_put_metabuf(&buf); - if (err) - goto out_unlock; if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { struct erofs_map_blocks map = { @@ -127,7 +122,7 @@ unmap_done: err = -EFSCORRUPTED; } if (err < 0) - goto out_unlock; + goto out_put_metabuf; } if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && @@ -141,11 +136,14 @@ unmap_done: EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&map.buf); if (err < 0) - goto out_unlock; + goto out_put_metabuf; } +done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); +out_put_metabuf: + erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; diff --git a/fs/exec.c b/fs/exec.c index 349a5da91efe..089a743f636b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -64,6 +64,7 @@ #include <linux/io_uring.h> #include <linux/syscall_user_dispatch.h> #include <linux/coredump.h> +#include <linux/time_namespace.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -171,7 +172,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) exit: fput(file); out: - return error; + return error; } #endif /* #ifdef CONFIG_USELIB */ @@ -199,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; - unsigned int gup_flags = FOLL_FORCE; + unsigned int gup_flags = 0; #ifdef CONFIG_STACK_GROWSUP if (write) { @@ -842,16 +843,13 @@ int setup_arg_pages(struct linux_binprm *bprm, * will align it up. */ rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; + + stack_expand = min(rlim_stack, stack_size + stack_expand); + #ifdef CONFIG_STACK_GROWSUP - if (stack_size + stack_expand > rlim_stack) - stack_base = vma->vm_start + rlim_stack; - else - stack_base = vma->vm_end + stack_expand; + stack_base = vma->vm_start + stack_expand; #else - if (stack_size + stack_expand > rlim_stack) - stack_base = vma->vm_end - rlim_stack; - else - stack_base = vma->vm_start - stack_expand; + stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; ret = expand_stack(vma, stack_base); @@ -1012,7 +1010,6 @@ static int exec_mmap(struct mm_struct *mm) active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; - lru_gen_add_mm(mm); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for @@ -1025,6 +1022,7 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); + lru_gen_add_mm(mm); task_unlock(tsk); lru_gen_use_mm(mm); if (old_mm) { @@ -1197,11 +1195,11 @@ static int unshare_sighand(struct task_struct *me) return -ENOMEM; refcount_set(&newsighand->count, 1); - memcpy(newsighand->action, oldsighand->action, - sizeof(newsighand->action)); write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); + memcpy(newsighand->action, oldsighand->action, + sizeof(newsighand->action)); rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); @@ -1297,6 +1295,10 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; + retval = exec_task_namespaces(); + if (retval) + goto out_unlock; + #ifdef CONFIG_POSIX_TIMERS spin_lock_irq(&me->sighand->siglock); posix_cpu_timers_exit(me); @@ -1568,6 +1570,12 @@ static void check_unsafe_exec(struct linux_binprm *bprm) if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; + /* + * If another task is sharing our fs, we cannot safely + * suid exec because the differently privileged task + * will be able to manipulate the current directory, etc. + * It would be nice to force an unshare instead... + */ t = p; n_fs = 1; spin_lock(&p->fs->lock); @@ -1748,6 +1756,7 @@ static int search_binary_handler(struct linux_binprm *bprm) return retval; } +/* binfmt handlers will call back into begin_new_exec() on success. */ static int exec_binprm(struct linux_binprm *bprm) { pid_t old_pid, old_vpid; @@ -1806,6 +1815,11 @@ static int bprm_execve(struct linux_binprm *bprm, if (retval) return retval; + /* + * Check for unsafe execution states before exec_binprm(), which + * will call back into begin_new_exec(), into bprm_creds_from_file(), + * where setuid-ness is evaluated. + */ check_unsafe_exec(bprm); current->in_execve = 1; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index bf298967c5b8..440d5f1e9d47 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -219,11 +219,12 @@ __ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) * inode->i_mutex: down */ int -ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int error; int update_mode = 0; + struct inode *inode = d_inode(dentry); umode_t mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 925ab6287d35..3841becb94ff 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -56,7 +56,7 @@ static inline int ext2_acl_count(size_t size) /* acl.c */ extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu); -extern int ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +extern int ext2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index eb97aa3d700e..6b4bebe982ca 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -200,7 +200,7 @@ const struct inode_operations ext2_file_inode_operations = { .listxattr = ext2_listxattr, .getattr = ext2_getattr, .setattr = ext2_setattr, - .get_acl = ext2_get_acl, + .get_inode_acl = ext2_get_acl, .set_acl = ext2_set_acl, .fiemap = ext2_fiemap, .fileattr_get = ext2_fileattr_get, diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index f4944c4dee60..78b8686d9a4a 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -277,7 +277,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) int best_ndir = inodes_per_group; int best_group = -1; - parent_group = prandom_u32_max(ngroups); + parent_group = get_random_u32_below(ngroups); for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; desc = ext2_get_group_desc (sb, group, NULL); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 918ab2f9e4c0..e97e77be64f3 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1652,7 +1652,7 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } setattr_copy(&init_user_ns, inode, iattr); if (iattr->ia_valid & ATTR_MODE) - error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode); + error = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); mark_inode_dirty(inode); return error; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 9125eab85146..c056957221a2 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -427,7 +427,7 @@ const struct inode_operations ext2_dir_inode_operations = { .listxattr = ext2_listxattr, .getattr = ext2_getattr, .setattr = ext2_setattr, - .get_acl = ext2_get_acl, + .get_inode_acl = ext2_get_acl, .set_acl = ext2_set_acl, .tmpfile = ext2_tmpfile, .fileattr_get = ext2_fileattr_get, @@ -438,6 +438,6 @@ const struct inode_operations ext2_special_inode_operations = { .listxattr = ext2_listxattr, .getattr = ext2_getattr, .setattr = ext2_setattr, - .get_acl = ext2_get_acl, + .get_inode_acl = ext2_get_acl, .set_acl = ext2_set_acl, }; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 57e82e25f8e2..a9f89539aeee 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -225,12 +225,13 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +ext4_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { handle_t *handle; int error, credits, retries = 0; size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0; + struct inode *inode = d_inode(dentry); umode_t mode = inode->i_mode; int update_mode = 0; diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 3219669732bf..09c4a8a3b716 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -56,7 +56,7 @@ static inline int ext4_acl_count(size_t size) /* acl.c */ struct posix_acl *ext4_get_acl(struct inode *inode, int type, bool rcu); -int ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ext4_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1956288307f..6c399a8b22b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * and it is decreased till we reach start. */ again: + ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -5227,14 +5228,21 @@ again: ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - if (le32_to_cpu(extent->ee_block) > 0) + if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; - else - /* Beginning is reached, end of the loop */ + else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; - /* Update path extent in case we need to stop */ - while (le32_to_cpu(extent->ee_block) < start) + else { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + while (le32_to_cpu(extent->ee_block) >= start) + extent--; + + if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) + break; + extent++; + iterator = NULL; + } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ef05bfa87798..0f6d0a80467d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1521,6 +1521,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, struct ext4_iloc iloc; int inode_len, ino, ret, tag = tl->fc_tag; struct ext4_extent_header *eh; + size_t off_gen = offsetof(struct ext4_inode, i_generation); memcpy(&fc_inode, val, sizeof(fc_inode)); @@ -1548,8 +1549,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, raw_inode = ext4_raw_inode(&iloc); memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); - memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, - inode_len - offsetof(struct ext4_inode, i_generation)); + memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen, + inode_len - off_gen); if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); if (eh->eh_magic != EXT4_EXT_MAGIC) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a7a597c727e6..7ac0a81bd371 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -955,7 +955,7 @@ const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_file_getattr, .listxattr = ext4_listxattr, - .get_acl = ext4_get_acl, + .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, .fileattr_get = ext4_fileattr_get, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index e9bc46684106..9aa8b18bdac1 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -465,7 +465,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo); parent_group = hinfo.hash % ngroups; } else - parent_group = prandom_u32_max(ngroups); + parent_group = get_random_u32_below(ngroups); for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; get_orlov_stats(sb, g, flex_size, &stats); @@ -870,7 +870,7 @@ static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode, struct super_block *sb = dir->i_sb; int nblocks = 0; #ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); + struct posix_acl *p = get_inode_acl(dir, ACL_TYPE_DEFAULT); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2b5ef1b64249..a8e12ce6673d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5550,7 +5550,7 @@ out_mmap_sem: ext4_orphan_del(NULL, inode); if (!error && (ia_valid & ATTR_MODE)) - rc = posix_acl_chmod(mnt_userns, inode, inode->i_mode); + rc = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); err_out: if (error) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ded535535b27..95dfea28bf4e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -145,9 +145,8 @@ static int ext4_update_backup_sb(struct super_block *sb, if (ext4_has_metadata_csum(sb) && es->s_checksum != ext4_superblock_csum(sb, es)) { ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " - "superblock %llu\n", sb_block); + "superblock %llu", sb_block); unlock_buffer(bh); - err = -EFSBADCRC; goto out_bh; } func(es, arg); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 0a220ec9862d..a19a9661646e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -424,7 +424,8 @@ int ext4_ext_migrate(struct inode *inode) * already is extent-based, error out. */ if (!ext4_has_feature_extents(inode->i_sb) || - (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + ext4_has_inline_data(inode)) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 588cb09c5291..4681fff6665f 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -262,13 +262,7 @@ void ext4_stop_mmpd(struct ext4_sb_info *sbi) */ static unsigned int mmp_new_seq(void) { - u32 new_seq; - - do { - new_seq = get_random_u32(); - } while (new_seq > EXT4_MMP_SEQ_MAX); - - return new_seq; + return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1); } /* diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d5daaf41e1fc..27a863e1120e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2259,8 +2259,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, memset(de, 0, len); /* wipe old data */ de = (struct ext4_dir_entry_2 *) data2; top = data2 + len; - while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { + if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, + (data2 + (blocksize - csum_size) - + (char *) de))) { + brelse(bh2); + brelse(bh); + return -EFSCORRUPTED; + } de = de2; + } de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - (char *) de, blocksize); @@ -4186,7 +4194,7 @@ const struct inode_operations ext4_dir_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, - .get_acl = ext4_get_acl, + .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, .fileattr_get = ext4_fileattr_get, @@ -4197,6 +4205,6 @@ const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, - .get_acl = ext4_get_acl, + .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, }; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6dfe9ccae0c5..46b87ffeb304 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1158,6 +1158,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, while (group < sbi->s_groups_count) { struct buffer_head *bh; ext4_fsblk_t backup_block; + struct ext4_super_block *es; /* Out of journal space, and can't get more - abort - so sad */ err = ext4_resize_ensure_credits_batch(handle, 1); @@ -1186,6 +1187,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); + es = (struct ext4_super_block *) bh->b_data; + es->s_block_group_nr = cpu_to_le16(group); + if (ext4_has_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(sb, es); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, NULL, bh); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 989365b878a6..63ef74eb8091 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1741,10 +1741,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = { #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) -static const char deprecated_msg[] = - "Mount option \"%s\" will be removed by %s\n" - "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; - #define MOPT_SET 0x0001 #define MOPT_CLEAR 0x0002 #define MOPT_NOSUPPORT 0x0004 @@ -3782,7 +3778,7 @@ cont_thread: } if (!progress) { elr->lr_next_sched = jiffies + - prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ); + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ); } if (time_before(elr->lr_next_sched, next_wakeup)) next_wakeup = elr->lr_next_sched; @@ -3929,8 +3925,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, * spread the inode table initialization requests * better. */ - elr->lr_next_sched = jiffies + prandom_u32_max( - EXT4_DEF_LI_MAX_START_DELAY * HZ); + elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ); return elr; } @@ -4885,7 +4880,7 @@ out: flush_work(&sbi->s_error_work); jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; - return err; + return -EINVAL; } static int ext4_journal_data_mode_check(struct super_block *sb) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 5bbc44a5216e..c1c74aa658ae 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -276,9 +276,11 @@ static int __f2fs_set_acl(struct user_namespace *mnt_userns, return error; } -int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int f2fs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { + struct inode *inode = d_inode(dentry); + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index a26e33cab4ff..ea2bbb3f264b 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -34,7 +34,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool); -extern int f2fs_set_acl(struct user_namespace *, struct inode *, +extern int f2fs_set_acl(struct user_namespace *, struct dentry *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 82cda1258227..83df6f6173d3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1025,7 +1025,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, __setattr_copy(mnt_userns, inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode)); + err = posix_acl_chmod(mnt_userns, dentry, f2fs_get_inode_mode(inode)); if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) @@ -1046,7 +1046,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, const struct inode_operations f2fs_file_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, - .get_acl = f2fs_get_acl, + .get_inode_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, .fiemap = f2fs_fiemap, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4546e01b2ee0..536d332d9e2e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -282,7 +282,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, /* let's select beginning hot/small space first in no_heap mode*/ if (f2fs_need_rand_seg(sbi)) - p->offset = prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); + p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); else if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a389772fd212..c227113b0f26 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1379,7 +1379,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, .setattr = f2fs_setattr, - .get_acl = f2fs_get_acl, + .get_inode_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, .fiemap = f2fs_fiemap, @@ -1397,7 +1397,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { const struct inode_operations f2fs_special_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, - .get_acl = f2fs_get_acl, + .get_inode_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index acf3d3fa4363..b304692c0cf5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2534,7 +2534,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) sanity_check_seg_type(sbi, seg_type); if (f2fs_need_rand_seg(sbi)) - return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec); + return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); /* if segs_per_sec is large than 1, we need to keep original policy. */ if (__is_large_section(sbi)) @@ -2588,7 +2588,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = - prandom_u32_max(sbi->max_fragment_chunk) + 1; + get_random_u32_inclusive(1, sbi->max_fragment_chunk); } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -2625,9 +2625,9 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, /* To allocate block chunks in different sizes, use random number */ if (--seg->fragment_remained_chunk <= 0) { seg->fragment_remained_chunk = - prandom_u32_max(sbi->max_fragment_chunk) + 1; + get_random_u32_inclusive(1, sbi->max_fragment_chunk); seg->next_blkoff += - prandom_u32_max(sbi->max_fragment_hole) + 1; + get_random_u32_inclusive(1, sbi->max_fragment_hole); } } } diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index af191371c352..3626eb585a98 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -17,7 +17,7 @@ struct fat_fid { #define FAT_FID_SIZE_WITHOUT_PARENT 3 #define FAT_FID_SIZE_WITH_PARENT (sizeof(struct fat_fid)/sizeof(u32)) -/** +/* * Look up a directory inode given its starting cluster. */ static struct inode *fat_dget(struct super_block *sb, int i_logstart) @@ -135,7 +135,7 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, return type; } -/** +/* * Map a NFS file handle to a corresponding dentry. * The dentry may or may not be connected to the filesystem root. */ diff --git a/fs/file.c b/fs/file.c index 5f9c802a5d8d..c942c89ca4cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct files_struct *files = current->files; struct file *file; - if (atomic_read(&files->count) == 1) { + /* + * If another thread is concurrently calling close_fd() followed + * by put_files_struct(), we must not observe the old table + * entry combined with the new refcount - otherwise we could + * return a file that is concurrently being freed. + * + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * put_files_struct(). + */ + if (atomic_read_acquire(&files->count) == 1) { file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 443f83382b9b..9958d4020771 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode, wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* - * If the inode is now fully clean, then it can be safely removed from - * its writeback list (if any). Otherwise the flusher threads are - * responsible for the writeback lists. + * If the inode is freeing, its i_io_list shoudn't be updated + * as it can be finally deleted at this moment. */ - if (!(inode->i_state & I_DIRTY_ALL)) - inode_cgwb_move_to_attached(inode, wb); - else if (!(inode->i_state & I_SYNC_QUEUED)) { - if ((inode->i_state & I_DIRTY)) - redirty_tail_locked(inode, wb); - else if (inode->i_state & I_DIRTY_TIME) { - inode->dirtied_when = jiffies; - inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + if (!(inode->i_state & I_FREEING)) { + /* + * If the inode is now fully clean, then it can be safely + * removed from its writeback list (if any). Otherwise the + * flusher threads are responsible for the writeback lists. + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED)) { + if ((inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; + inode_io_list_move_locked(inode, + wb, + &wb->b_dirty_time); + } } } diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 451d8a077e12..bce2492186d0 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -605,6 +605,14 @@ again: set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); queue = true; } + /* + * We could race with cookie_lru which may set LRU_DISCARD bit + * but has yet to run the cookie state machine. If this happens + * and another thread tries to use the cookie, clear LRU_DISCARD + * so we don't end up withdrawing the cookie while in use. + */ + if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) + fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear); break; case FSCACHE_COOKIE_STATE_FAILED: diff --git a/fs/fscache/io.c b/fs/fscache/io.c index 3af3b08a9bb3..0d2b8dec8f82 100644 --- a/fs/fscache/io.c +++ b/fs/fscache/io.c @@ -286,7 +286,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, * taken into account. */ - iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len); + iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len); fscache_write(cres, start, &iter, fscache_wreq_done, wreq); return; diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index a058e0136bfe..ab8ceddf9efa 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, struct fscache_volume *volume; struct fscache_cache *cache; size_t klen, hlen; - char *key; + u8 *key; + + klen = strlen(volume_key); + if (klen > NAME_MAX) + return NULL; if (!coherency_data) coherency_len = 0; @@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, /* Stick the length on the front of the key and pad it out to make * hashing easier. */ - klen = strlen(volume_key); hlen = round_up(1 + klen + 1, sizeof(__le32)); key = kzalloc(hlen, GFP_KERNEL); if (!key) diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 337cb29a8dd5..8edd0f313515 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -53,9 +53,10 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) return acl; } -int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { + struct inode *inode = d_inode(dentry); struct fuse_conn *fc = get_fuse_conn(inode); const char *name; int ret; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bb97a384dc5d..25e6b0f7e73d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1935,7 +1935,7 @@ static const struct inode_operations fuse_dir_inode_operations = { .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, - .get_acl = fuse_get_acl, + .get_inode_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, @@ -1957,7 +1957,7 @@ static const struct inode_operations fuse_common_inode_operations = { .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, - .get_acl = fuse_get_acl, + .get_inode_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 97e2d815075d..c996c0ef8c63 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & (FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_ZERO_RANGE)); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + filemap_invalidate_lock(inode->i_mapping); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { - loff_t endbyte = offset + length - 1; + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3001,6 +2997,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, goto out; } + err = file_modified(file); + if (err) + goto out; + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); @@ -3035,8 +3035,7 @@ out: if (block_faults) filemap_invalidate_unlock(inode->i_mapping); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 98a9cf531873..26a7c524eb70 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1269,7 +1269,7 @@ extern const struct xattr_handler *fuse_no_acl_xattr_handlers[]; struct posix_acl; struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu); -int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); /* readdir.c */ diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 61d8afcb10a3..fcce94ace2c2 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -255,7 +255,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ap.args.in_pages = true; err = -EFAULT; - iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); + iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size); for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) @@ -324,7 +324,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, goto out; err = -EFAULT; - iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); + iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred); for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index b4e565711045..e8deaacf1832 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file, goto unlock; addr = kmap_local_page(page); - if (!offset) + if (!offset) { clear_page(addr); + SetPageUptodate(page); + } memcpy(addr + offset, dirent, reclen); kunmap_local(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; @@ -516,6 +518,12 @@ retry_locked: page = find_get_page_flags(file->f_mapping, index, FGP_ACCESSED | FGP_LOCK); + /* Page gone missing, then re-added to cache, but not initialized? */ + if (page && !PageUptodate(page)) { + unlock_page(page); + put_page(page); + page = NULL; + } spin_lock(&fi->rdc.lock); if (!page) { /* diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 734d1f05d823..3dcde4912413 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -109,9 +109,10 @@ out: return error; } -int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { + struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; bool need_unlock = false; diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index cd180ca7c959..b8de8c148f5c 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -13,7 +13,7 @@ extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 04a201584fa7..1371e067d2a7 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1997,7 +1997,7 @@ static int gfs2_setattr(struct user_namespace *mnt_userns, else { error = gfs2_setattr_simple(inode, attr); if (!error && attr->ia_valid & ATTR_MODE) - error = posix_acl_chmod(&init_user_ns, inode, + error = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); } @@ -2149,7 +2149,7 @@ static const struct inode_operations gfs2_file_iops = { .getattr = gfs2_getattr, .listxattr = gfs2_listxattr, .fiemap = gfs2_fiemap, - .get_acl = gfs2_get_acl, + .get_inode_acl = gfs2_get_acl, .set_acl = gfs2_set_acl, .update_time = gfs2_update_time, .fileattr_get = gfs2_fileattr_get, @@ -2171,7 +2171,7 @@ static const struct inode_operations gfs2_dir_iops = { .getattr = gfs2_getattr, .listxattr = gfs2_listxattr, .fiemap = gfs2_fiemap, - .get_acl = gfs2_get_acl, + .get_inode_acl = gfs2_get_acl, .set_acl = gfs2_set_acl, .update_time = gfs2_update_time, .atomic_open = gfs2_atomic_open, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index c4526f16355d..a0746be3c1de 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -458,6 +458,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) /* panic? */ return -EIO; + if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) + return -EIO; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) /* panic? */ diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c index 39f5e343bf4d..fdb0edb8a607 100644 --- a/fs/hfs/trans.c +++ b/fs/hfs/trans.c @@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr if (nls_io) { wchar_t ch; - while (srclen > 0) { + while (srclen > 0 && dstlen > 0) { size = nls_io->char2uni(src, srclen, &ch); if (size < 0) { ch = '?'; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a5db2e3b2980..6aa919e59483 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -198,6 +198,8 @@ struct hfsplus_sb_info { #define HFSPLUS_SB_HFSX 3 #define HFSPLUS_SB_CASEFOLD 4 #define HFSPLUS_SB_NOBARRIER 5 +#define HFSPLUS_SB_UID 6 +#define HFSPLUS_SB_GID 7 static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) { diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index aeab83ed1c9c..b675581aa9d0 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -192,11 +192,11 @@ static void hfsplus_get_perms(struct inode *inode, mode = be16_to_cpu(perms->mode); i_uid_write(inode, be32_to_cpu(perms->owner)); - if (!i_uid_read(inode) && !mode) + if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode)) inode->i_uid = sbi->uid; i_gid_write(inode, be32_to_cpu(perms->group)); - if (!i_gid_read(inode) && !mode) + if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode)) inode->i_gid = sbi->gid; if (dir) { diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 047e05c57560..c94a58762ad6 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) if (!uid_valid(sbi->uid)) { pr_err("invalid uid specified\n"); return 0; + } else { + set_bit(HFSPLUS_SB_UID, &sbi->flags); } break; case opt_gid: @@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) if (!gid_valid(sbi->gid)) { pr_err("invalid gid specified\n"); return 0; + } else { + set_bit(HFSPLUS_SB_GID, &sbi->flags); } break; case opt_part: diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dd54f67e47fd..df7772335dc0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { unlock_page(page); + if (PageHWPoison(page)) { + put_page(page); + retval = -EIO; + break; + } + /* * We have the page, copy it to user space buffer. */ @@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { - struct inode *inode = mapping->host; - pgoff_t index = page->index; - - hugetlb_delete_from_page_cache(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); - return 0; } diff --git a/fs/inode.c b/fs/inode.c index 8c4078889754..5ccc61fe8a1f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2043,9 +2043,6 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) sync_it |= S_VERSION; - if (!sync_it) - return 0; - return sync_it; } diff --git a/fs/internal.h b/fs/internal.h index 5545c26d86ae..0c8812fe7ca4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -234,6 +234,27 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns, int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct xattr_ctx *ctx); +int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode); + +#ifdef CONFIG_FS_POSIX_ACL +int do_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, const void *kvalue, size_t size); +ssize_t do_get_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, void *kvalue, size_t size); +#else +static inline int do_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + const void *kvalue, size_t size) +{ + return -EOPNOTSUPP; +} +static inline ssize_t do_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + void *kvalue, size_t size) +{ + return -EOPNOTSUPP; +} +#endif ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index e945e3484788..8bb58ce5c06c 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -229,10 +229,11 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a return rc; } -int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int rc, xprefix; + struct inode *inode = d_inode(dentry); switch (type) { case ACL_TYPE_ACCESS: diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 9d9fb7cf093e..ca36a6eca594 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu); -int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int jffs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); extern int jffs2_init_acl_post(struct inode *); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index c0aabbcbfd58..f399b390b5f6 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -62,7 +62,7 @@ const struct inode_operations jffs2_dir_inode_operations = .rmdir = jffs2_rmdir, .mknod = jffs2_mknod, .rename = jffs2_rename, - .get_acl = jffs2_get_acl, + .get_inode_acl = jffs2_get_acl, .set_acl = jffs2_set_acl, .setattr = jffs2_setattr, .listxattr = jffs2_listxattr, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index ba86acbe12d3..3cf71befa475 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -64,7 +64,7 @@ const struct file_operations jffs2_file_operations = const struct inode_operations jffs2_file_inode_operations = { - .get_acl = jffs2_get_acl, + .get_inode_acl = jffs2_get_acl, .set_acl = jffs2_set_acl, .setattr = jffs2_setattr, .listxattr = jffs2_listxattr, diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 39cec28096a7..66af51c41619 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -202,7 +202,7 @@ int jffs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, rc = jffs2_do_setattr(inode, iattr); if (!rc && (iattr->ia_valid & ATTR_MODE)) - rc = posix_acl_chmod(&init_user_ns, inode, inode->i_mode); + rc = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); return rc; } diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index a653f34c6e26..3b667eccc73b 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -94,12 +94,13 @@ out: return rc; } -int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int rc; tid_t tid; int update_mode = 0; + struct inode *inode = d_inode(dentry); umode_t mode = inode->i_mode; tid = txBegin(inode->i_sb, 0); diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 332dc9ac47a9..88663465aecd 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -123,7 +123,7 @@ int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) - rc = posix_acl_chmod(&init_user_ns, inode, inode->i_mode); + rc = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); return rc; } @@ -133,7 +133,7 @@ const struct inode_operations jfs_file_inode_operations = { .fileattr_get = jfs_fileattr_get, .fileattr_set = jfs_fileattr_set, #ifdef CONFIG_JFS_POSIX_ACL - .get_acl = jfs_get_acl, + .get_inode_acl = jfs_get_acl, .set_acl = jfs_set_acl, #endif }; diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 3de40286d31f..f0704a25835f 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -8,7 +8,7 @@ #ifdef CONFIG_JFS_POSIX_ACL struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu); -int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int jfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 9db4f5789c0e..b50afaf7966f 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1525,7 +1525,7 @@ const struct inode_operations jfs_dir_inode_operations = { .fileattr_get = jfs_fileattr_get, .fileattr_set = jfs_fileattr_set, #ifdef CONFIG_JFS_POSIX_ACL - .get_acl = jfs_get_acl, + .get_inode_acl = jfs_get_acl, .set_acl = jfs_set_acl, #endif }; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 3990f3e270cb..f33b3baad07c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -31,10 +31,15 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +static bool __kernfs_active(struct kernfs_node *kn) +{ + return atomic_read(&kn->active) >= 0; +} + static bool kernfs_active(struct kernfs_node *kn) { lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); - return atomic_read(&kn->active) >= 0; + return __kernfs_active(kn); } static bool kernfs_lockdep(struct kernfs_node *kn) @@ -705,7 +710,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, goto err_unlock; } - if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) + /* + * We should fail if @kn has never been activated and guarantee success + * if the caller knows that @kn is active. Both can be achieved by + * __kernfs_active() which tests @kn->active without kernfs_rwsem. + */ + if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) goto err_unlock; spin_unlock(&kernfs_idr_lock); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index b2fc85d440d0..9306e10753f9 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2487,9 +2487,9 @@ static void ksmbd_acls_fattr(struct smb_fattr *fattr, fattr->cf_dacls = NULL; if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) { - fattr->cf_acls = get_acl(inode, ACL_TYPE_ACCESS); + fattr->cf_acls = get_inode_acl(inode, ACL_TYPE_ACCESS); if (S_ISDIR(inode->i_mode)) - fattr->cf_dacls = get_acl(inode, ACL_TYPE_DEFAULT); + fattr->cf_dacls = get_inode_acl(inode, ACL_TYPE_DEFAULT); } } @@ -2956,7 +2956,7 @@ int smb2_open(struct ksmbd_work *work) struct inode *inode = d_inode(path.dentry); posix_acl_rc = ksmbd_vfs_inherit_posix_acl(user_ns, - inode, + path.dentry, d_inode(path.dentry->d_parent)); if (posix_acl_rc) ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc); @@ -2972,7 +2972,7 @@ int smb2_open(struct ksmbd_work *work) if (rc) { if (posix_acl_rc) ksmbd_vfs_set_init_posix_acl(user_ns, - inode); + path.dentry); if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index b05ff9b146b5..ab5c68cc0e13 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -1289,7 +1289,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, } if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) { - posix_acls = get_acl(d_inode(path->dentry), ACL_TYPE_ACCESS); + posix_acls = get_inode_acl(d_inode(path->dentry), ACL_TYPE_ACCESS); if (posix_acls && !found) { unsigned int id = -1; @@ -1386,14 +1386,14 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry); /* Update posix acls */ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) { - rc = set_posix_acl(user_ns, inode, + rc = set_posix_acl(user_ns, path->dentry, ACL_TYPE_ACCESS, fattr.cf_acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", rc); if (S_ISDIR(inode->i_mode) && fattr.cf_dacls) { - rc = set_posix_acl(user_ns, inode, + rc = set_posix_acl(user_ns, path->dentry, ACL_TYPE_DEFAULT, fattr.cf_dacls); if (rc) ksmbd_debug(SMB, diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 8de970d6146f..ff0e7a4fcd4d 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -321,7 +321,7 @@ static int check_lock_range(struct file *filp, loff_t start, loff_t end, unsigned char type) { struct file_lock *flock; - struct file_lock_context *ctx = file_inode(filp)->i_flctx; + struct file_lock_context *ctx = locks_inode_context(file_inode(filp)); int error = 0; if (!ctx || list_empty_careful(&ctx->flc_posix)) @@ -1321,7 +1321,7 @@ int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns, sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) || !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) { - err = ksmbd_vfs_remove_xattr(user_ns, dentry, name); + err = vfs_remove_acl(user_ns, dentry, name); if (err) ksmbd_debug(SMB, "remove acl xattr failed : %s\n", name); @@ -1375,7 +1375,7 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespac if (!IS_ENABLED(CONFIG_FS_POSIX_ACL)) return NULL; - posix_acls = get_acl(inode, acl_type); + posix_acls = get_inode_acl(inode, acl_type); if (!posix_acls) return NULL; @@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_fp->filp, src_off, - dst_fp->filp, dst_off, - len, 0); + ret = vfs_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, len, + COPY_FILE_SPLICE); if (ret < 0) return ret; @@ -1824,10 +1824,11 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock) } int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, - struct inode *inode) + struct dentry *dentry) { struct posix_acl_state acl_state; struct posix_acl *acls; + struct inode *inode = d_inode(dentry); int rc; if (!IS_ENABLED(CONFIG_FS_POSIX_ACL)) @@ -1856,14 +1857,13 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, return -ENOMEM; } posix_state_to_acl(&acl_state, acls->a_entries); - rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls); + rc = set_posix_acl(user_ns, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", rc); else if (S_ISDIR(inode->i_mode)) { posix_state_to_acl(&acl_state, acls->a_entries); - rc = set_posix_acl(user_ns, inode, ACL_TYPE_DEFAULT, - acls); + rc = set_posix_acl(user_ns, dentry, ACL_TYPE_DEFAULT, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); @@ -1874,16 +1874,17 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, } int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns, - struct inode *inode, struct inode *parent_inode) + struct dentry *dentry, struct inode *parent_inode) { struct posix_acl *acls; struct posix_acl_entry *pace; + struct inode *inode = d_inode(dentry); int rc, i; if (!IS_ENABLED(CONFIG_FS_POSIX_ACL)) return -EOPNOTSUPP; - acls = get_acl(parent_inode, ACL_TYPE_DEFAULT); + acls = get_inode_acl(parent_inode, ACL_TYPE_DEFAULT); if (!acls) return -ENOENT; pace = acls->a_entries; @@ -1895,12 +1896,12 @@ int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns, } } - rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls); + rc = set_posix_acl(user_ns, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", rc); if (S_ISDIR(inode->i_mode)) { - rc = set_posix_acl(user_ns, inode, ACL_TYPE_DEFAULT, + rc = set_posix_acl(user_ns, dentry, ACL_TYPE_DEFAULT, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index 593059ca8511..0d73d735cc39 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -160,8 +160,8 @@ int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns, struct dentry *dentry, struct xattr_dos_attrib *da); int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns, - struct inode *inode); + struct dentry *dentry); int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns, - struct inode *inode, + struct dentry *dentry, struct inode *parent_inode); #endif /* __KSMBD_VFS_H__ */ diff --git a/fs/libfs.c b/fs/libfs.c index 682d56345a1c..aada4e7c8713 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -995,8 +995,8 @@ out: EXPORT_SYMBOL_GPL(simple_attr_read); /* interpret the buffer as a number to call the set function with */ -ssize_t simple_attr_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) +static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf, + size_t len, loff_t *ppos, bool is_signed) { struct simple_attr *attr; unsigned long long val; @@ -1017,7 +1017,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - ret = kstrtoull(attr->set_buf, 0, &val); + if (is_signed) + ret = kstrtoll(attr->set_buf, 0, &val); + else + ret = kstrtoull(attr->set_buf, 0, &val); if (ret) goto out; ret = attr->set(attr->data, val); @@ -1027,8 +1030,21 @@ out: mutex_unlock(&attr->mutex); return ret; } + +ssize_t simple_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return simple_attr_write_xsigned(file, buf, len, ppos, false); +} EXPORT_SYMBOL_GPL(simple_attr_write); +ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return simple_attr_write_xsigned(file, buf, len, ppos, true); +} +EXPORT_SYMBOL_GPL(simple_attr_write_signed); + /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation * @sb: filesystem to do the file handle conversion on diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index e1c4617de771..720684345817 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -207,7 +207,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -262,7 +262,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; diff --git a/fs/locks.c b/fs/locks.c index 607f94a0e789..8f01bee17715 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) void *owner; void (*func)(void); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; @@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2096,7 +2096,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) * throw a warning to let people know that they don't actually work. */ if (cmd & LOCK_MAND) { - pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n"); + pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid); return 0; } @@ -2146,6 +2146,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { + WARN_ON_ONCE(filp != fl->fl_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); @@ -2295,6 +2296,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { + WARN_ON_ONCE(filp != fl->fl_file); if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else @@ -2561,7 +2563,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2634,7 +2636,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); + ctx = locks_inode_context(locks_inode(filp)); if (!ctx) return; @@ -2663,12 +2665,36 @@ void locks_remove_file(struct file *filp) */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { + WARN_ON_ONCE(filp != fl->fl_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } EXPORT_SYMBOL_GPL(vfs_cancel_lock); +/** + * vfs_inode_has_locks - are any file locks held on @inode? + * @inode: inode to check for locks + * + * Return true if there are any FL_POSIX or FL_FLOCK locks currently + * set on @inode. + */ +bool vfs_inode_has_locks(struct inode *inode) +{ + struct file_lock_context *ctx; + bool ret; + + ctx = locks_inode_context(inode); + if (!ctx) + return false; + + spin_lock(&ctx->flc_lock); + ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); + spin_unlock(&ctx->flc_lock); + return ret; +} +EXPORT_SYMBOL_GPL(vfs_inode_has_locks); + #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -2839,7 +2865,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return; diff --git a/fs/namei.c b/fs/namei.c index 578c2110df02..7bfebfa993ed 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -297,13 +297,13 @@ static int check_acl(struct user_namespace *mnt_userns, acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); if (!acl) return -EAGAIN; - /* no ->get_acl() calls in RCU mode... */ + /* no ->get_inode_acl() calls in RCU mode... */ if (is_uncached_acl(acl)) return -ECHILD; return posix_acl_permission(mnt_userns, inode, acl, mask); } - acl = get_acl(inode, ACL_TYPE_ACCESS); + acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { @@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; + int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); @@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, if (error) return error; inode = file_inode(file); - if (!(file->f_flags & O_EXCL)) { + if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); diff --git a/fs/namespace.c b/fs/namespace.c index df137ba19d37..c80f422084eb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3515,8 +3515,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, q = next_mnt(q, new); if (!q) break; + // an mntns binding we'd skipped? while (p->mnt.mnt_root != q->mnt.mnt_root) - p = next_mnt(p, old); + p = next_mnt(skip_mnt_tree(p), old); } namespace_unlock(); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 0ce535852151..7679a68e8193 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -17,9 +17,9 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct folio *folio; - unsigned int iopos, account = 0; pgoff_t start_page = rreq->start / PAGE_SIZE; pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + size_t account = 0; bool subreq_failed = false; XA_STATE(xas, &rreq->mapping->i_pages, start_page); @@ -39,18 +39,23 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) */ subreq = list_first_entry(&rreq->subrequests, struct netfs_io_subrequest, rreq_link); - iopos = 0; subreq_failed = (subreq->error < 0); trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; - unsigned int pgend = pgpos + folio_size(folio); + loff_t pg_end; bool pg_failed = false; + if (xas_retry(&xas, folio)) + continue; + + pg_end = folio_pos(folio) + folio_size(folio) - 1; + for (;;) { + loff_t sreq_end; + if (!subreq) { pg_failed = true; break; @@ -58,11 +63,11 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) folio_start_fscache(folio); pg_failed |= subreq_failed; - if (pgend < iopos + subreq->len) + sreq_end = subreq->start + subreq->len - 1; + if (pg_end < sreq_end) break; account += subreq->transferred; - iopos += subreq->len; if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { subreq = list_next_entry(subreq, rreq_link); subreq_failed = (subreq->error < 0); @@ -70,7 +75,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) subreq = NULL; subreq_failed = false; } - if (pgend == iopos) + + if (pg_end == sreq_end) break; } diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 428925899282..7f753380e047 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -23,7 +23,7 @@ static void netfs_clear_unread(struct netfs_io_subrequest *subreq) { struct iov_iter iter; - iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages, + iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages, subreq->start + subreq->transferred, subreq->len - subreq->transferred); iov_iter_zero(iov_iter_count(&iter), &iter); @@ -49,7 +49,7 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq, struct iov_iter iter; netfs_stat(&netfs_n_rh_read); - iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, + iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start + subreq->transferred, subreq->len - subreq->transferred); @@ -121,6 +121,9 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + if (xas_retry(&xas, folio)) + continue; + /* We might have multiple writes from the same huge * folio, but we mustn't unlock a folio more than once. */ @@ -205,7 +208,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) continue; } - iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages, + iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, subreq->start, subreq->len); atomic_inc(&rreq->nr_copy_ops); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index da8da5cdbbc1..f50e025ae406 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -280,7 +280,7 @@ EXPORT_SYMBOL_GPL(nfs_put_client); static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data) { struct nfs_client *clp; - const struct sockaddr *sap = data->addr; + const struct sockaddr *sap = (struct sockaddr *)data->addr; struct nfs_net *nn = net_generic(data->net, nfs_net_id); int error; @@ -666,7 +666,7 @@ static int nfs_init_server(struct nfs_server *server, struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { .hostname = ctx->nfs_server.hostname, - .addr = (const struct sockaddr *)&ctx->nfs_server.address, + .addr = &ctx->nfs_server._address, .addrlen = ctx->nfs_server.addrlen, .nfs_mod = ctx->nfs_mod, .proto = ctx->nfs_server.protocol, diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5c97cad741a7..cf7365581031 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -146,7 +146,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state { struct inode *inode = state->inode; struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct list_head *list; int status = 0; @@ -228,8 +228,7 @@ again: * */ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, - const nfs4_stateid *stateid, + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit) { struct nfs_delegation *delegation; @@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL) { spin_lock(&delegation->lock); - if (nfs4_is_valid_delegation(delegation, 0)) { - nfs4_stateid_copy(&delegation->stateid, stateid); - delegation->type = type; - delegation->pagemod_limit = pagemod_limit; - oldcred = delegation->cred; - delegation->cred = get_cred(cred); - clear_bit(NFS_DELEGATION_NEED_RECLAIM, - &delegation->flags); - spin_unlock(&delegation->lock); - rcu_read_unlock(); - put_cred(oldcred); - trace_nfs4_reclaim_delegation(inode, type); - return; - } - /* We appear to have raced with a delegation return. */ + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; + oldcred = delegation->cred; + delegation->cred = get_cred(cred); + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, + &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); spin_unlock(&delegation->lock); + rcu_read_unlock(); + put_cred(oldcred); + trace_nfs4_reclaim_delegation(inode, type); + } else { + rcu_read_unlock(); + nfs_inode_set_delegation(inode, cred, type, stateid, + pagemod_limit); } - rcu_read_unlock(); - nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit); } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 58036f657126..f594dac436a7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2489,9 +2489,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); goto out; } - if (dentry->d_fsdata) - /* old devname */ - kfree(dentry->d_fsdata); + /* old devname */ + kfree(dentry->d_fsdata); dentry->d_fsdata = NFS_FSDATA_BLOCKED; spin_unlock(&dentry->d_lock); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index e87d500ad95a..6603b5cee029 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -16,8 +16,9 @@ #include "dns_resolve.h" ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, - struct sockaddr *sa, size_t salen) + struct sockaddr_storage *ss, size_t salen) { + struct sockaddr *sa = (struct sockaddr *)ss; ssize_t ret; char *ip_addr = NULL; int ip_len; @@ -341,7 +342,7 @@ out: } ssize_t nfs_dns_resolve_name(struct net *net, char *name, - size_t namelen, struct sockaddr *sa, size_t salen) + size_t namelen, struct sockaddr_storage *ss, size_t salen) { struct nfs_dns_ent key = { .hostname = name, @@ -354,7 +355,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item); if (ret == 0) { if (salen >= item->addrlen) { - memcpy(sa, &item->addr, item->addrlen); + memcpy(ss, &item->addr, item->addrlen); ret = item->addrlen; } else ret = -EOVERFLOW; diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index 576ff4b54c82..fe3b172c4de1 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h @@ -32,6 +32,6 @@ extern void nfs_dns_resolver_cache_destroy(struct net *net); #endif extern ssize_t nfs_dns_resolve_name(struct net *net, char *name, - size_t namelen, struct sockaddr *sa, size_t salen); + size_t namelen, struct sockaddr_storage *sa, size_t salen); #endif diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 4da701fd1424..09833ec102fc 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -273,9 +273,9 @@ static const struct constant_table nfs_secflavor_tokens[] = { * Address family must be initialized, and address must not be * the ANY address for that family. */ -static int nfs_verify_server_address(struct sockaddr *addr) +static int nfs_verify_server_address(struct sockaddr_storage *addr) { - switch (addr->sa_family) { + switch (addr->ss_family) { case AF_INET: { struct sockaddr_in *sa = (struct sockaddr_in *)addr; return sa->sin_addr.s_addr != htonl(INADDR_ANY); @@ -969,7 +969,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc, { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_fh *mntfh = ctx->mntfh; - struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + struct sockaddr_storage *sap = &ctx->nfs_server._address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; int ret; @@ -1044,7 +1044,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc, memcpy(sap, &data->addr, sizeof(data->addr)); ctx->nfs_server.addrlen = sizeof(data->addr); ctx->nfs_server.port = ntohs(data->addr.sin_port); - if (sap->sa_family != AF_INET || + if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; @@ -1200,7 +1200,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc, struct nfs4_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); - struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + struct sockaddr_storage *sap = &ctx->nfs_server._address; int ret; char *c; @@ -1314,7 +1314,7 @@ static int nfs_fs_context_validate(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_subversion *nfs_mod; - struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + struct sockaddr_storage *sap = &ctx->nfs_server._address; int max_namelen = PAGE_SIZE; int max_pathlen = NFS_MAXPATHLEN; int port = 0; @@ -1540,7 +1540,7 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->version = nfss->nfs_client->rpc_ops->version; ctx->minorversion = nfss->nfs_client->cl_minorversion; - memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr, + memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr, ctx->nfs_server.addrlen); if (fc->net_ns != net) { diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index e861d7bae305..e731c00a9fcb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -252,7 +252,7 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page) bvec[0].bv_page = page; bvec[0].bv_offset = 0; bvec[0].bv_len = PAGE_SIZE; - iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); ret = fscache_begin_read_operation(&cres, cookie); if (ret < 0) @@ -282,7 +282,7 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page, bvec[0].bv_page = page; bvec[0].bv_offset = 0; bvec[0].bv_len = PAGE_SIZE; - iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); ret = fscache_begin_write_operation(&cres, cookie); if (ret < 0) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d914d609b85b..647fc3f547cb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -69,7 +69,7 @@ static inline fmode_t flags_to_mode(int flags) struct nfs_client_initdata { unsigned long init_flags; const char *hostname; /* Hostname of the server */ - const struct sockaddr *addr; /* Address of the server */ + const struct sockaddr_storage *addr; /* Address of the server */ const char *nodename; /* Hostname of the client */ const char *ip_addr; /* IP address of the client */ size_t addrlen; @@ -180,7 +180,7 @@ static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) /* mount_clnt.c */ struct nfs_mount_request { - struct sockaddr *sap; + struct sockaddr_storage *sap; size_t salen; char *hostname; char *dirpath; @@ -223,7 +223,7 @@ extern void nfs4_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen, + struct sockaddr_storage *sap, size_t salen, struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, @@ -235,7 +235,7 @@ extern int nfs_client_init_status(const struct nfs_client *clp); extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, - const struct sockaddr *ds_addr, + const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, @@ -243,7 +243,7 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, - const struct sockaddr *ds_addr, int ds_addrlen, + const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans); #ifdef CONFIG_PROC_FS @@ -894,13 +894,13 @@ static inline bool nfs_error_is_fatal_on_server(int err) * Select between a default port value and a user-specified port value. * If a zero value is set, then autobind will be used. */ -static inline void nfs_set_port(struct sockaddr *sap, int *port, +static inline void nfs_set_port(struct sockaddr_storage *sap, int *port, const unsigned short default_port) { if (*port == NFS_UNSPEC_PORT) *port = default_port; - rpc_set_port(sap, *port); + rpc_set_port((struct sockaddr *)sap, *port); } struct nfs_direct_req { diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index c5e3b6b3366a..68e76b626371 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -158,7 +158,7 @@ int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans) struct rpc_create_args args = { .net = info->net, .protocol = info->protocol, - .address = info->sap, + .address = (struct sockaddr *)info->sap, .addrsize = info->salen, .timeout = &mnt_timeout, .servername = info->hostname, @@ -245,7 +245,7 @@ void nfs_umount(const struct nfs_mount_request *info) struct rpc_create_args args = { .net = info->net, .protocol = IPPROTO_UDP, - .address = info->sap, + .address = (struct sockaddr *)info->sap, .addrsize = info->salen, .timeout = &nfs_umnt_timeout, .servername = info->hostname, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 3295af4110f1..2f336ace7555 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -175,7 +175,7 @@ struct vfsmount *nfs_d_automount(struct path *path) } /* for submounts we want the same server; referrals will reassign */ - memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen); + memcpy(&ctx->nfs_server._address, &client->cl_addr, client->cl_addrlen); ctx->nfs_server.addrlen = client->cl_addrlen; ctx->nfs_server.port = server->port; diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 03a4e679fd99..df9ca56db347 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -12,7 +12,7 @@ */ #ifdef CONFIG_NFS_V3_ACL extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu); -extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, struct posix_acl *dfacl); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 93de0b58647a..74d11e3c4205 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -255,23 +255,24 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } -int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { struct posix_acl *orig = acl, *dfacl = NULL, *alloc; + struct inode *inode = d_inode(dentry); int status; if (S_ISDIR(inode->i_mode)) { switch(type) { case ACL_TYPE_ACCESS: - alloc = get_acl(inode, ACL_TYPE_DEFAULT); + alloc = get_inode_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(alloc)) goto fail; dfacl = alloc; break; case ACL_TYPE_DEFAULT: - alloc = get_acl(inode, ACL_TYPE_ACCESS); + alloc = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(alloc)) goto fail; dfacl = acl; @@ -312,7 +313,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data, struct posix_acl *acl; char *p = data + *result; - acl = get_acl(inode, type); + acl = get_inode_acl(inode, type); if (IS_ERR_OR_NULL(acl)) return 0; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b49359afac88..669cda757a5c 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -78,7 +78,7 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source, * the MDS. */ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, - const struct sockaddr *ds_addr, int ds_addrlen, + const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) { struct rpc_timeout ds_timeout; @@ -98,7 +98,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, char buf[INET6_ADDRSTRLEN + 1]; /* fake a hostname because lockd wants it */ - if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) + if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0) return ERR_PTR(-EINVAL); cl_init.hostname = buf; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2e7579626cf0..4bf208a0a8e9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -998,7 +998,7 @@ static const struct inode_operations nfs3_dir_inode_operations = { .setattr = nfs_setattr, #ifdef CONFIG_NFS_V3_ACL .listxattr = nfs3_listxattr, - .get_acl = nfs3_get_acl, + .get_inode_acl = nfs3_get_acl, .set_acl = nfs3_set_acl, #endif }; @@ -1009,7 +1009,7 @@ static const struct inode_operations nfs3_file_inode_operations = { .setattr = nfs_setattr, #ifdef CONFIG_NFS_V3_ACL .listxattr = nfs3_listxattr, - .get_acl = nfs3_get_acl, + .get_inode_acl = nfs3_get_acl, .set_acl = nfs3_set_acl, #endif }; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 13424f0d793b..ecb428512fe1 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1093,6 +1093,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, &args.seq_args, &res.seq_res, 0); trace_nfs4_clone(src_inode, dst_inode, &args, status); if (status == 0) { + /* a zero-length count means clone to EOF in src */ + if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE) + count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset; nfs42_copy_dest_done(dst_inode, dst_offset, count); status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 400a71e75238..cfef738d765e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -281,7 +281,7 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); -size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss, size_t salen, struct net *net, int port); /* nfs4proc.c */ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 7a5162afa5c0..d3051b051a56 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp) ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE, "NFSv4.0 transport Slot table"); if (ret) { + nfs4_shutdown_slot_table(tbl); kfree(tbl); return ret; } @@ -889,7 +890,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, */ static int nfs4_set_client(struct nfs_server *server, const char *hostname, - const struct sockaddr *addr, + const struct sockaddr_storage *addr, const size_t addrlen, const char *ip_addr, int proto, const struct rpc_timeout *timeparms, @@ -924,7 +925,7 @@ static int nfs4_set_client(struct nfs_server *server, __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); - server->port = rpc_get_port(addr); + server->port = rpc_get_port((struct sockaddr *)addr); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); @@ -960,7 +961,7 @@ static int nfs4_set_client(struct nfs_server *server, * the MDS. */ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, - const struct sockaddr *ds_addr, int ds_addrlen, + const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, u32 minor_version) { @@ -980,7 +981,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, }; char buf[INET6_ADDRSTRLEN + 1]; - if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) + if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0) return ERR_PTR(-EINVAL); cl_init.hostname = buf; @@ -1148,7 +1149,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) /* Get a client record */ error = nfs4_set_client(server, ctx->nfs_server.hostname, - &ctx->nfs_server.address, + &ctx->nfs_server._address, ctx->nfs_server.addrlen, ctx->client_address, ctx->nfs_server.protocol, @@ -1238,7 +1239,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT); error = nfs4_set_client(server, ctx->nfs_server.hostname, - &ctx->nfs_server.address, + &ctx->nfs_server._address, ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_RDMA, @@ -1254,7 +1255,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) rpc_set_port(&ctx->nfs_server.address, NFS_PORT); error = nfs4_set_client(server, ctx->nfs_server.hostname, - &ctx->nfs_server.address, + &ctx->nfs_server._address, ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_TCP, @@ -1303,14 +1304,14 @@ error: * Returns zero on success, or a negative errno value. */ int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen, struct net *net) + struct sockaddr_storage *sap, size_t salen, struct net *net) { struct nfs_client *clp = server->nfs_client; struct rpc_clnt *clnt = server->client; struct xprt_create xargs = { .ident = clp->cl_proto, .net = net, - .dstaddr = sap, + .dstaddr = (struct sockaddr *)sap, .addrlen = salen, .servername = hostname, }; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index f2dbf904c598..9a98595bb160 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -164,16 +164,17 @@ static int nfs4_validate_fspath(struct dentry *dentry, return 0; } -size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss, size_t salen, struct net *net, int port) { + struct sockaddr *sa = (struct sockaddr *)ss; ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { ret = rpc_uaddr2sockaddr(net, string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(net, string, len, sa, salen); + ret = nfs_dns_resolve_name(net, string, len, ss, salen); if (ret < 0) ret = 0; } @@ -331,7 +332,7 @@ static int try_location(struct fs_context *fc, ctx->nfs_server.addrlen = nfs_parse_server_name(buf->data, buf->len, - &ctx->nfs_server.address, + &ctx->nfs_server._address, sizeof(ctx->nfs_server._address), fc->net_ns, 0); if (ctx->nfs_server.addrlen == 0) @@ -483,14 +484,13 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, char *page, char *page2, const struct nfs4_fs_location *location) { - const size_t addr_bufsize = sizeof(struct sockaddr_storage); struct net *net = rpc_net_ns(server->client); - struct sockaddr *sap; + struct sockaddr_storage *sap; unsigned int s; size_t salen; int error; - sap = kmalloc(addr_bufsize, GFP_KERNEL); + sap = kmalloc(sizeof(*sap), GFP_KERNEL); if (sap == NULL) return -ENOMEM; @@ -506,10 +506,10 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, net, 0); + sap, sizeof(*sap), net, 0); if (salen == 0) continue; - rpc_set_port(sap, NFS_PORT); + rpc_set_port((struct sockaddr *)sap, NFS_PORT); error = -ENOMEM; hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e2efcd26336c..86ed5c0142c3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3951,7 +3951,7 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, for (i = 0; i < location->nservers; i++) { struct nfs4_string *srv_loc = &location->servers[i]; - struct sockaddr addr; + struct sockaddr_storage addr; size_t addrlen; struct xprt_create xprt_args = { .ident = 0, @@ -3974,7 +3974,7 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, clp->cl_net, server->port); if (!addrlen) return; - xprt_args.dstaddr = &addr; + xprt_args.dstaddr = (struct sockaddr *)&addr; xprt_args.addrlen = addrlen; servername = kmalloc(srv_loc->len + 1, GFP_KERNEL); if (!servername) @@ -7138,6 +7138,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; + struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); if (!nfs4_sequence_done(task, &data->res.seq_res)) return; @@ -7145,8 +7146,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; switch (task->tk_status) { case 0: - renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), - data->timestamp); + renew_lease(server, data->timestamp); if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) @@ -7167,6 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) goto out_restart; + else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) + goto out_restart; } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) goto out_restart; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c3503fb26fa2..dd18344648f3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1501,7 +1501,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ struct file_lock *fl; struct nfs4_lock_state *lsp; int status = 0; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct list_head *list; if (flctx == NULL) @@ -1786,6 +1786,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) { + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); @@ -2670,6 +2671,7 @@ static void nfs4_state_manager(struct nfs_client *clp) if (status < 0) goto out_error; nfs4_state_end_reclaim_reboot(clp); + continue; } /* Detect expired delegations... */ diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 317cedfa52bf..16be6dae524f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1055,7 +1055,7 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev, if (prev) { if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev))) return 0; - flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx; + flctx = locks_inode_context(d_inode(nfs_req_openctx(req)->dentry)); if (flctx != NULL && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock)) && diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 987c88ddeaf0..5d035dd2d7bf 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -821,7 +821,7 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) static struct nfs_client *(*get_v3_ds_connect)( struct nfs_server *mds_srv, - const struct sockaddr *ds_addr, + const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, @@ -882,7 +882,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, continue; } clp = get_v3_ds_connect(mds_srv, - (struct sockaddr *)&da->da_addr, + &da->da_addr, da->da_addrlen, da->da_transport, timeo, retrans); if (IS_ERR(clp)) @@ -951,7 +951,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, put_cred(xprtdata.cred); } else { clp = nfs4_set_ds_client(mds_srv, - (struct sockaddr *)&da->da_addr, + &da->da_addr, da->da_addrlen, da->da_transport, timeo, retrans, minor_version); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ee66ffdb985e..05ae23657527 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -822,8 +822,7 @@ static int nfs_request_mount(struct fs_context *fc, { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_mount_request request = { - .sap = (struct sockaddr *) - &ctx->mount_server.address, + .sap = &ctx->mount_server._address, .dirpath = ctx->nfs_server.export_path, .protocol = ctx->mount_server.protocol, .fh = root_fh, @@ -854,7 +853,7 @@ static int nfs_request_mount(struct fs_context *fc, * Construct the mount server's address. */ if (ctx->mount_server.address.sa_family == AF_UNSPEC) { - memcpy(request.sap, &ctx->nfs_server.address, + memcpy(request.sap, &ctx->nfs_server._address, ctx->nfs_server.addrlen); ctx->mount_server.addrlen = ctx->nfs_server.addrlen; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f41d24b54fd1..80c240e50952 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1185,7 +1185,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_lock_context *l_ctx; - struct file_lock_context *flctx = file_inode(file)->i_flctx; + struct file_lock_context *flctx = locks_inode_context(file_inode(file)); struct nfs_page *req; int do_flush, status; /* @@ -1321,7 +1321,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode, unsigned int pagelen) { int ret; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct file_lock *fl; if (file->f_flags & O_DSYNC) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 29a62db155fb..ec3fceb92236 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -893,9 +893,8 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (net && nf->nf_net != net) - continue; - nfsd_file_unhash_and_dispose(nf, &dispose); + if (!net || nf->nf_net == net) + nfsd_file_unhash_and_dispose(nf, &dispose); nf = rhashtable_walk_next(&iter); } @@ -1077,6 +1076,7 @@ retry: goto open_file; nfsd_file_slab_free(&nf->nf_rcu); + nf = NULL; if (ret == -EEXIST) goto retry; trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 13e6e6897f6c..c43c25a8da2e 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -55,7 +55,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp) goto out; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = get_acl(inode, ACL_TYPE_ACCESS); + acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); @@ -69,7 +69,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp) if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - acl = get_acl(inode, ACL_TYPE_DEFAULT); + acl = get_inode_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { resp->status = nfserrno(PTR_ERR(acl)); goto fail; @@ -113,11 +113,11 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) inode_lock(inode); - error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, + error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS, argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT, + error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT, argp->acl_default); if (error) goto out_drop_lock; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 2fb9ee356455..9daa621817d8 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -47,7 +47,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp) resp->mask = argp->mask; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = get_acl(inode, ACL_TYPE_ACCESS); + acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); @@ -61,7 +61,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp) if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - acl = get_acl(inode, ACL_TYPE_DEFAULT); + acl = get_inode_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { resp->status = nfserrno(PTR_ERR(acl)); goto fail; @@ -103,11 +103,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) inode_lock(inode); - error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, + error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS, argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT, + error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT, argp->acl_default); out_drop_lock: diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index bb8e2f6d7d03..518203821790 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -135,7 +135,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, unsigned int flags = 0; int size = 0; - pacl = get_acl(inode, ACL_TYPE_ACCESS); + pacl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (!pacl) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); @@ -147,7 +147,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, if (S_ISDIR(inode->i_mode)) { flags = NFS4_ACL_DIR; - dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + dpacl = get_inode_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(dpacl)) { error = PTR_ERR(dpacl); goto rel_pacl; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4e718500a00c..da8d0ea66229 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4758,7 +4758,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) static bool nfsd4_deleg_present(const struct inode *inode) { - struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } @@ -5382,6 +5382,7 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, if (err) return -EAGAIN; + exp_put(exp); dput(child); if (child != file_dentry(fp->fi_deleg_file->nf_file)) return -EAGAIN; @@ -5896,7 +5897,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; - ctx = nf->fi_inode->i_flctx; + ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) @@ -7712,7 +7713,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } inode = locks_inode(nf->nf_file); - flctx = inode->i_flctx; + flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6a29bcfc9390..dc74a947a440 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1458,12 +1458,14 @@ static __net_init int nfsd_init_net(struct net *net) goto out_drc_error; retval = nfsd_reply_cache_init(nn); if (retval) - goto out_drc_error; + goto out_cache_error; get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); return 0; +out_cache_error: + nfsd4_leases_net_shutdown(nn); out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index d73434200df9..8c52b6c9d31a 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -392,8 +392,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); - trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); out: + trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); if (error == nfserr_stale) nfsd_stats_fh_stale_inc(exp); return error; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 06a96e955bd0..d4b6839bb459 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -254,7 +254,10 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err, rqstp->rq_xprt->xpt_remotelen); __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); - __entry->inode = d_inode(fhp->fh_dentry); + if (fhp->fh_dentry) + __entry->inode = d_inode(fhp->fh_dentry); + else + __entry->inode = NULL; __entry->type = type; __entry->access = access; __entry->error = be32_to_cpu(error); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f650afedd67f..08a929607641 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -480,12 +480,12 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, attr->na_seclabel->data, attr->na_seclabel->len); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) attr->na_aclerr = set_posix_acl(&init_user_ns, - inode, ACL_TYPE_ACCESS, + dentry, ACL_TYPE_ACCESS, attr->na_pacl); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) attr->na_aclerr = set_posix_acl(&init_user_ns, - inode, ACL_TYPE_DEFAULT, + dentry, ACL_TYPE_DEFAULT, attr->na_dpacl); inode_unlock(inode); if (size_change) @@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src, src_pos, dst, dst_pos, - count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); return ret; } @@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct svc_rqst *rqstp = sd->u.data; struct page *page = buf->page; // may be a compound one unsigned offset = buf->offset; + struct page *last_page; - page += offset / PAGE_SIZE; - for (int i = sd->len; i > 0; i -= PAGE_SIZE) - svc_rqst_replace_page(rqstp, page++); + last_page = page + (offset + sd->len - 1) / PAGE_SIZE; + for (page += offset / PAGE_SIZE; page <= last_page; page++) + svc_rqst_replace_page(rqstp, page); if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; @@ -942,7 +943,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, ssize_t host_err; trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, READ, vec, vlen, *count); + iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count); host_err = vfs_iter_read(file, &iter, &ppos, 0); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1032,7 +1033,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (stable && !use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); + iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt); since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index b4cebad21b48..3335ef352915 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -317,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci = nilfs->ns_writer; - if (!sci || !sci->sc_flush_request) + if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request) return; set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); @@ -2242,7 +2242,7 @@ int nilfs_construct_segment(struct super_block *sb) struct nilfs_sc_info *sci = nilfs->ns_writer; struct nilfs_transaction_info *ti; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; /* A call inside transactions causes a deadlock. */ @@ -2280,7 +2280,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, struct nilfs_transaction_info ti; int err = 0; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; nilfs_transaction_lock(sb, &ti, 0); @@ -2776,11 +2776,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (nilfs->ns_writer) { /* - * This happens if the filesystem was remounted - * read/write after nilfs_error degenerated it into a - * read-only mount. + * This happens if the filesystem is made read-only by + * __nilfs_error or nilfs_remount and then remounted + * read/write. In these cases, reuse the existing + * writer. */ - nilfs_detach_log_writer(sb); + return 0; } nilfs->ns_writer = nilfs_segctor_new(sb, root); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 77ff8e95421f..dc359b56fdfa 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + void *kaddr; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); brelse(bh); } + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ba108f915391..6edb6e0dd61f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1133,8 +1133,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (*flags & SB_RDONLY) { - /* Shutting down log writer */ - nilfs_detach_log_writer(sb); sb->s_flags |= SB_RDONLY; /* diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 3b4a079c9617..2064e6473d30 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -13,6 +13,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/random.h> +#include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" @@ -193,6 +194,34 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, } /** + * nilfs_get_blocksize - get block size from raw superblock data + * @sb: super block instance + * @sbp: superblock raw data buffer + * @blocksize: place to store block size + * + * nilfs_get_blocksize() calculates the block size from the block size + * exponent information written in @sbp and stores it in @blocksize, + * or aborts with an error message if it's too large. + * + * Return Value: On success, 0 is returned. If the block size is too + * large, -EINVAL is returned. + */ +static int nilfs_get_blocksize(struct super_block *sb, + struct nilfs_super_block *sbp, int *blocksize) +{ + unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); + + if (unlikely(shift_bits > + ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) { + nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB", + shift_bits); + return -EINVAL; + } + *blocksize = BLOCK_SIZE << shift_bits; + return 0; +} + +/** * load_nilfs - load and recover the nilfs * @nilfs: the_nilfs structure to be released * @sb: super block instance used to recover past segment @@ -245,11 +274,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); /* verify consistency between two super blocks */ - blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); + err = nilfs_get_blocksize(sb, sbp[0], &blocksize); + if (err) + goto scan_error; + if (blocksize != nilfs->ns_blocksize) { nilfs_warn(sb, "blocksize differs between two super blocks (%d != %d)", blocksize, nilfs->ns_blocksize); + err = -EINVAL; goto scan_error; } @@ -443,11 +476,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) return crc == le32_to_cpu(sbp->s_sum); } -static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) +/** + * nilfs_sb2_bad_offset - check the location of the second superblock + * @sbp: superblock raw data buffer + * @offset: byte offset of second superblock calculated from device size + * + * nilfs_sb2_bad_offset() checks if the position on the second + * superblock is valid or not based on the filesystem parameters + * stored in @sbp. If @offset points to a location within the segment + * area, or if the parameters themselves are not normal, it is + * determined to be invalid. + * + * Return Value: true if invalid, false if valid. + */ +static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) { - return offset < ((le64_to_cpu(sbp->s_nsegments) * - le32_to_cpu(sbp->s_blocks_per_segment)) << - (le32_to_cpu(sbp->s_log_block_size) + 10)); + unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); + u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + u64 nsegments = le64_to_cpu(sbp->s_nsegments); + u64 index; + + if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS || + shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS) + return true; + + index = offset >> (shift_bits + BLOCK_SIZE_BITS); + do_div(index, blocks_per_segment); + return index < nsegments; } static void nilfs_release_super_block(struct the_nilfs *nilfs) @@ -586,9 +641,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto failed_sbh; - blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); - if (blocksize < NILFS_MIN_BLOCK_SIZE || - blocksize > NILFS_MAX_BLOCK_SIZE) { + err = nilfs_get_blocksize(sb, sbp, &blocksize); + if (err) + goto failed_sbh; + + if (blocksize < NILFS_MIN_BLOCK_SIZE) { nilfs_err(sb, "couldn't mount because of unsupported filesystem blocksize %d", blocksize); @@ -690,9 +747,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { unsigned long ncleansegs; - down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); - up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; return 0; } diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 4f2ffc7ef296..c5e4a886593d 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -802,7 +802,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, setattr_copy(mnt_userns, inode, attr); if (mode != inode->i_mode) { - err = ntfs_acl_chmod(mnt_userns, inode); + err = ntfs_acl_chmod(mnt_userns, dentry); if (err) goto out; @@ -1255,7 +1255,7 @@ const struct inode_operations ntfs_file_inode_operations = { .setattr = ntfs3_setattr, .listxattr = ntfs_listxattr, .permission = ntfs_permission, - .get_acl = ntfs_get_acl, + .get_inode_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, .fiemap = ntfs_fiemap, }; diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index bc22cc321a74..053cc0e0f8b5 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -367,7 +367,7 @@ const struct inode_operations ntfs_dir_inode_operations = { .mknod = ntfs_mknod, .rename = ntfs_rename, .permission = ntfs_permission, - .get_acl = ntfs_get_acl, + .get_inode_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, .setattr = ntfs3_setattr, .getattr = ntfs_getattr, @@ -379,7 +379,7 @@ const struct inode_operations ntfs_special_inode_operations = { .setattr = ntfs3_setattr, .getattr = ntfs_getattr, .listxattr = ntfs_listxattr, - .get_acl = ntfs_get_acl, + .get_inode_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, }; // clang-format on diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 2c791222c4e2..a4d292809a33 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -843,7 +843,7 @@ int ntfs_cmp_names_cpu(const struct cpu_str *uni1, const struct le_str *uni2, /* globals from xattr.c */ #ifdef CONFIG_NTFS3_FS_POSIX_ACL struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu); -int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, struct inode *dir); @@ -852,7 +852,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, #define ntfs_set_acl NULL #endif -int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode); +int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry); int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask); ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size); diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 7de8718c68a9..aafe98ee0b21 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -619,10 +619,10 @@ out: /* * ntfs_set_acl - inode_operations::set_acl */ -int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ntfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { - return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); + return ntfs_set_acl_ex(mnt_userns, d_inode(dentry), acl, type, false); } /* @@ -664,8 +664,9 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, /* * ntfs_acl_chmod - Helper for ntfs3_setattr(). */ -int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode) +int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry) { + struct inode *inode = d_inode(dentry); struct super_block *sb = inode->i_sb; if (!(sb->s_flags & SB_POSIXACL)) @@ -674,7 +675,7 @@ int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode) if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - return posix_acl_chmod(mnt_userns, inode, inode->i_mode); + return posix_acl_chmod(mnt_userns, dentry, inode->i_mode); } /* diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 23a72a423955..9f19cf9a5a9f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -260,12 +260,13 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } -int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { struct buffer_head *bh = NULL; int status, had_lock; struct ocfs2_lock_holder oh; + struct inode *inode = d_inode(dentry); had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); if (had_lock < 0) diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 95a57c888ab6..a897c4e41b26 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -17,7 +17,7 @@ struct ocfs2_acl_entry { }; struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu); -int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index b13d344d40b6..60b97c92e2b2 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -335,7 +335,7 @@ static void o2hb_arm_timeout(struct o2hb_region *reg) /* negotiate timeout must be less than write timeout. */ schedule_delayed_work(®->hr_nego_timeout_work, msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS)); - memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap)); + bitmap_zero(reg->hr_nego_node_bitmap, O2NM_MAX_NODES); } static void o2hb_disarm_timeout(struct o2hb_region *reg) @@ -375,7 +375,7 @@ static void o2hb_nego_timeout(struct work_struct *work) if (reg->hr_last_hb_status) return; - o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); + o2hb_fill_node_map(live_node_bitmap, O2NM_MAX_NODES); /* lowest node as master node to make negotiate decision. */ master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES); @@ -386,8 +386,8 @@ static void o2hb_nego_timeout(struct work_struct *work) config_item_name(®->hr_item), reg->hr_bdev); set_bit(master_node, reg->hr_nego_node_bitmap); } - if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap, - sizeof(reg->hr_nego_node_bitmap))) { + if (!bitmap_equal(reg->hr_nego_node_bitmap, live_node_bitmap, + O2NM_MAX_NODES)) { /* check negotiate bitmap every second to do timeout * approve decision. */ @@ -856,8 +856,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg) * live nodes heartbeat on it. In other words, the region has been * added to all nodes. */ - if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, - sizeof(o2hb_live_node_bitmap))) + if (!bitmap_equal(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, + O2NM_MAX_NODES)) goto unlock; printk(KERN_NOTICE "o2hb: Region %s (%pg) is now a quorum device\n", @@ -1087,7 +1087,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) * If a node is not configured but is in the livemap, we still need * to read the slot so as to be able to remove it from the livemap. */ - o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); + o2hb_fill_node_map(live_node_bitmap, O2NM_MAX_NODES); i = -1; while ((i = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { @@ -1437,11 +1437,11 @@ void o2hb_init(void) for (i = 0; i < ARRAY_SIZE(o2hb_live_slots); i++) INIT_LIST_HEAD(&o2hb_live_slots[i]); - memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); - memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); - memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); - memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); - memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); + bitmap_zero(o2hb_live_node_bitmap, O2NM_MAX_NODES); + bitmap_zero(o2hb_region_bitmap, O2NM_MAX_REGIONS); + bitmap_zero(o2hb_live_region_bitmap, O2NM_MAX_REGIONS); + bitmap_zero(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS); + bitmap_zero(o2hb_failed_region_bitmap, O2NM_MAX_REGIONS); o2hb_dependent_users = 0; @@ -1450,23 +1450,21 @@ void o2hb_init(void) /* if we're already in a callback then we're already serialized by the sem */ static void o2hb_fill_node_map_from_callback(unsigned long *map, - unsigned bytes) + unsigned int bits) { - BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); - - memcpy(map, &o2hb_live_node_bitmap, bytes); + bitmap_copy(map, o2hb_live_node_bitmap, bits); } /* * get a map of all nodes that are heartbeating in any regions */ -void o2hb_fill_node_map(unsigned long *map, unsigned bytes) +void o2hb_fill_node_map(unsigned long *map, unsigned int bits) { /* callers want to serialize this map and callbacks so that they * can trust that they don't miss nodes coming to the party */ down_read(&o2hb_callback_sem); spin_lock(&o2hb_live_lock); - o2hb_fill_node_map_from_callback(map, bytes); + o2hb_fill_node_map_from_callback(map, bits); spin_unlock(&o2hb_live_lock); up_read(&o2hb_callback_sem); } @@ -2460,7 +2458,7 @@ int o2hb_check_node_heartbeating_no_sem(u8 node_num) unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; spin_lock(&o2hb_live_lock); - o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); + o2hb_fill_node_map_from_callback(testing_map, O2NM_MAX_NODES); spin_unlock(&o2hb_live_lock); if (!test_bit(node_num, testing_map)) { mlog(ML_HEARTBEAT, @@ -2477,7 +2475,7 @@ int o2hb_check_node_heartbeating_from_callback(u8 node_num) { unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); + o2hb_fill_node_map_from_callback(testing_map, O2NM_MAX_NODES); if (!test_bit(node_num, testing_map)) { mlog(ML_HEARTBEAT, "node (%u) does not have heartbeating enabled.\n", diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 1d4100abf6f8..8ef8c1b9eeb7 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -59,7 +59,7 @@ int o2hb_register_callback(const char *region_uuid, void o2hb_unregister_callback(const char *region_uuid, struct o2hb_callback_func *hc); void o2hb_fill_node_map(unsigned long *map, - unsigned bytes); + unsigned int bits); void o2hb_exit(void); void o2hb_init(void); int o2hb_check_node_heartbeating_no_sem(u8 node_num); diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 7524994e3199..35c05c18de59 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -438,7 +438,7 @@ static int o2net_fill_bitmap(char *buf, int len) unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; int i = -1, out = 0; - o2net_fill_node_map(map, sizeof(map)); + o2net_fill_node_map(map, O2NM_MAX_NODES); while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i); diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 27fee68f860a..2f61d39e4e50 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -54,7 +54,7 @@ int o2nm_configured_node_map(unsigned long *map, unsigned bytes) return -EINVAL; read_lock(&cluster->cl_nodes_lock); - memcpy(map, cluster->cl_nodes_bitmap, sizeof(cluster->cl_nodes_bitmap)); + bitmap_copy(map, cluster->cl_nodes_bitmap, O2NM_MAX_NODES); read_unlock(&cluster->cl_nodes_lock); return 0; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index f660c0dbdb63..37d222bdfc8c 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -900,7 +900,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) { struct kvec vec = { .iov_len = len, .iov_base = data, }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; - iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, len); return sock_recvmsg(sock, &msg, MSG_DONTWAIT); } @@ -990,14 +990,12 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, } /* Get a map of all nodes to which this node is currently connected to */ -void o2net_fill_node_map(unsigned long *map, unsigned bytes) +void o2net_fill_node_map(unsigned long *map, unsigned int bits) { struct o2net_sock_container *sc; int node, ret; - BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); - - memset(map, 0, bytes); + bitmap_zero(map, bits); for (node = 0; node < O2NM_MAX_NODES; ++node) { if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret)) continue; diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index fd2022712167..20f790a47484 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1094,7 +1094,7 @@ static inline enum dlm_status dlm_err_to_dlm_status(int err) static inline void dlm_node_iter_init(unsigned long *map, struct dlm_node_iter *iter) { - memcpy(iter->node_map, map, sizeof(iter->node_map)); + bitmap_copy(iter->node_map, map, O2NM_MAX_NODES); iter->curnode = -1; } diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index c4eccd499db8..5c04dde99981 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1576,8 +1576,8 @@ static int dlm_should_restart_join(struct dlm_ctxt *dlm, spin_lock(&dlm->spinlock); /* For now, we restart the process if the node maps have * changed at all */ - ret = memcmp(ctxt->live_map, dlm->live_nodes_map, - sizeof(dlm->live_nodes_map)); + ret = !bitmap_equal(ctxt->live_map, dlm->live_nodes_map, + O2NM_MAX_NODES); spin_unlock(&dlm->spinlock); if (ret) @@ -1604,13 +1604,11 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) /* group sem locking should work for us here -- we're already * registered for heartbeat events so filling this should be * atomic wrt getting those handlers called. */ - o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); + o2hb_fill_node_map(dlm->live_nodes_map, O2NM_MAX_NODES); spin_lock(&dlm->spinlock); - memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map)); - + bitmap_copy(ctxt->live_map, dlm->live_nodes_map, O2NM_MAX_NODES); __dlm_set_joining_node(dlm, dlm->node_num); - spin_unlock(&dlm->spinlock); node = -1; @@ -1643,8 +1641,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) * yes_resp_map. Copy that into our domain map and send a join * assert message to clean up everyone elses state. */ spin_lock(&dlm->spinlock); - memcpy(dlm->domain_map, ctxt->yes_resp_map, - sizeof(ctxt->yes_resp_map)); + bitmap_copy(dlm->domain_map, ctxt->yes_resp_map, O2NM_MAX_NODES); set_bit(dlm->node_num, dlm->domain_map); spin_unlock(&dlm->spinlock); @@ -2009,9 +2006,9 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n", dlm->recovery_map, &(dlm->recovery_map[0])); - memset(dlm->recovery_map, 0, sizeof(dlm->recovery_map)); - memset(dlm->live_nodes_map, 0, sizeof(dlm->live_nodes_map)); - memset(dlm->domain_map, 0, sizeof(dlm->domain_map)); + bitmap_zero(dlm->recovery_map, O2NM_MAX_NODES); + bitmap_zero(dlm->live_nodes_map, O2NM_MAX_NODES); + bitmap_zero(dlm->domain_map, O2NM_MAX_NODES); dlm->dlm_thread_task = NULL; dlm->dlm_reco_thread_task = NULL; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 227da5b1b6ab..d610da8e2f24 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -258,12 +258,12 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, mle->type = type; INIT_HLIST_NODE(&mle->master_hash_node); INIT_LIST_HEAD(&mle->hb_events); - memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); + bitmap_zero(mle->maybe_map, O2NM_MAX_NODES); spin_lock_init(&mle->spinlock); init_waitqueue_head(&mle->wq); atomic_set(&mle->woken, 0); kref_init(&mle->mle_refs); - memset(mle->response_map, 0, sizeof(mle->response_map)); + bitmap_zero(mle->response_map, O2NM_MAX_NODES); mle->master = O2NM_MAX_NODES; mle->new_master = O2NM_MAX_NODES; mle->inuse = 0; @@ -290,8 +290,8 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, atomic_inc(&dlm->mle_cur_count[mle->type]); /* copy off the node_map and register hb callbacks on our copy */ - memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map)); - memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map)); + bitmap_copy(mle->node_map, dlm->domain_map, O2NM_MAX_NODES); + bitmap_copy(mle->vote_map, dlm->domain_map, O2NM_MAX_NODES); clear_bit(dlm->node_num, mle->vote_map); clear_bit(dlm->node_num, mle->node_map); @@ -572,7 +572,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, spin_unlock(&dlm->track_lock); memset(res->lvb, 0, DLM_LVB_LEN); - memset(res->refmap, 0, sizeof(res->refmap)); + bitmap_zero(res->refmap, O2NM_MAX_NODES); } struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, @@ -1036,10 +1036,10 @@ recheck: spin_lock(&mle->spinlock); m = mle->master; - map_changed = (memcmp(mle->vote_map, mle->node_map, - sizeof(mle->vote_map)) != 0); - voting_done = (memcmp(mle->vote_map, mle->response_map, - sizeof(mle->vote_map)) == 0); + map_changed = !bitmap_equal(mle->vote_map, mle->node_map, + O2NM_MAX_NODES); + voting_done = bitmap_equal(mle->vote_map, mle->response_map, + O2NM_MAX_NODES); /* restart if we hit any errors */ if (map_changed) { @@ -1277,11 +1277,11 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, /* now blank out everything, as if we had never * contacted anyone */ - memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); - memset(mle->response_map, 0, sizeof(mle->response_map)); + bitmap_zero(mle->maybe_map, O2NM_MAX_NODES); + bitmap_zero(mle->response_map, O2NM_MAX_NODES); /* reset the vote_map to the current node_map */ - memcpy(mle->vote_map, mle->node_map, - sizeof(mle->node_map)); + bitmap_copy(mle->vote_map, mle->node_map, + O2NM_MAX_NODES); /* put myself into the maybe map */ if (mle->type != DLM_MLE_BLOCK) set_bit(dlm->node_num, mle->maybe_map); @@ -2094,7 +2094,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) flags = item->u.am.flags; spin_lock(&dlm->spinlock); - memcpy(nodemap, dlm->domain_map, sizeof(nodemap)); + bitmap_copy(nodemap, dlm->domain_map, O2NM_MAX_NODES); spin_unlock(&dlm->spinlock); clear_bit(dlm->node_num, nodemap); @@ -3447,7 +3447,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, ret = 0; } - memset(iter.node_map, 0, sizeof(iter.node_map)); + bitmap_zero(iter.node_map, O2NM_MAX_NODES); set_bit(old_master, iter.node_map); mlog(0, "doing assert master of %.*s back to %u\n", res->lockname.len, res->lockname.name, old_master); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 52ad342fec3e..50da8af988c1 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -733,7 +733,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) struct dlm_reco_node_data *ndata; spin_lock(&dlm->spinlock); - memcpy(dlm->reco.node_map, dlm->domain_map, sizeof(dlm->domain_map)); + bitmap_copy(dlm->reco.node_map, dlm->domain_map, O2NM_MAX_NODES); /* nodes can only be removed (by dying) after dropping * this lock, and death will be trapped later, so this should do */ spin_unlock(&dlm->spinlock); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4d78e0979517..5c60b6bc85bf 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2712,7 +2712,7 @@ const struct inode_operations ocfs2_file_iops = { .permission = ocfs2_permission, .listxattr = ocfs2_listxattr, .fiemap = ocfs2_fiemap, - .get_acl = ocfs2_iop_get_acl, + .get_inode_acl = ocfs2_iop_get_acl, .set_acl = ocfs2_iop_set_acl, .fileattr_get = ocfs2_fileattr_get, .fileattr_set = ocfs2_fileattr_set, @@ -2722,7 +2722,7 @@ const struct inode_operations ocfs2_special_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, .permission = ocfs2_permission, - .get_acl = ocfs2_iop_get_acl, + .get_inode_acl = ocfs2_iop_get_acl, .set_acl = ocfs2_iop_set_acl, }; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 126671e6caed..3fb98b4569a2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -157,7 +157,7 @@ static void ocfs2_queue_replay_slots(struct ocfs2_super *osb, replay_map->rm_state = REPLAY_DONE; } -static void ocfs2_free_replay_slots(struct ocfs2_super *osb) +void ocfs2_free_replay_slots(struct ocfs2_super *osb) { struct ocfs2_replay_map *replay_map = osb->replay_map; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 969d0aa28718..41c382f68529 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -150,6 +150,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); +void ocfs2_free_replay_slots(struct ocfs2_super *osb); /* * Journal Control: * Initialize, Load, Shutdown, Wipe a journal. diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 961d1cf54388..a8fd51afb794 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -232,6 +232,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, handle_t *handle = NULL; struct ocfs2_super *osb; struct ocfs2_dinode *dirfe; + struct ocfs2_dinode *fe = NULL; struct buffer_head *new_fe_bh = NULL; struct inode *inode = NULL; struct ocfs2_alloc_context *inode_ac = NULL; @@ -382,6 +383,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, goto leave; } + fe = (struct ocfs2_dinode *) new_fe_bh->b_data; if (S_ISDIR(mode)) { status = ocfs2_fill_new_dir(osb, handle, dir, inode, new_fe_bh, data_ac, meta_ac); @@ -454,8 +456,11 @@ roll_back: leave: if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && fe) + ocfs2_set_links_count(fe, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) @@ -632,18 +637,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, return status; } - status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, + return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, parent_fe_bh, handle, inode_ac, fe_blkno, suballoc_loc, suballoc_bit); - if (status < 0) { - u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit); - int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode, - inode_ac->ac_bh, suballoc_bit, bg_blkno, 1); - if (tmp) - mlog_errno(tmp); - } - - return status; } static int ocfs2_mkdir(struct user_namespace *mnt_userns, @@ -2028,8 +2024,11 @@ bail: ocfs2_clusters_to_bytes(osb->sb, 1)); if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && fe) + ocfs2_set_links_count(fe, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) @@ -2916,7 +2915,7 @@ const struct inode_operations ocfs2_dir_iops = { .permission = ocfs2_permission, .listxattr = ocfs2_listxattr, .fiemap = ocfs2_fiemap, - .get_acl = ocfs2_iop_get_acl, + .get_inode_acl = ocfs2_iop_get_acl, .set_acl = ocfs2_iop_set_acl, .fileattr_get = ocfs2_fileattr_get, .fileattr_set = ocfs2_fileattr_set, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 740b64238312..a503c553bab2 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -560,8 +560,7 @@ static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) u32 nlink = le16_to_cpu(di->i_links_count); u32 hi = le16_to_cpu(di->i_links_count_hi); - if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL)) - nlink |= (hi << OCFS2_LINKS_HI_SHIFT); + nlink |= (hi << OCFS2_LINKS_HI_SHIFT); return nlink; } diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 88f75f7f02d7..c973c03f6fd8 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -273,17 +273,17 @@ static int o2cb_cluster_check(void) */ #define O2CB_MAP_STABILIZE_COUNT 60 for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) { - o2hb_fill_node_map(hbmap, sizeof(hbmap)); + o2hb_fill_node_map(hbmap, O2NM_MAX_NODES); if (!test_bit(node_num, hbmap)) { printk(KERN_ERR "o2cb: %s heartbeat has not been " "started.\n", (o2hb_global_heartbeat_active() ? "Global" : "Local")); return -EINVAL; } - o2net_fill_node_map(netmap, sizeof(netmap)); + o2net_fill_node_map(netmap, O2NM_MAX_NODES); /* Force set the current node to allow easy compare */ set_bit(node_num, netmap); - if (!memcmp(hbmap, netmap, sizeof(hbmap))) + if (bitmap_equal(hbmap, netmap, O2NM_MAX_NODES)) return 0; if (i < O2CB_MAP_STABILIZE_COUNT - 1) msleep(1000); diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 317126261523..a8d5ca98fa57 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -669,6 +669,8 @@ static struct ctl_table_header *ocfs2_table_header; static int __init ocfs2_stack_glue_init(void) { + int ret; + strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table); @@ -678,7 +680,11 @@ static int __init ocfs2_stack_glue_init(void) return -ENOMEM; /* or something. */ } - return ocfs2_sysfs_init(); + ret = ocfs2_sysfs_init(); + if (ret) + unregister_sysctl_table(ocfs2_table_header); + + return ret; } static void __exit ocfs2_stack_glue_exit(void) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 42c993e53924..0b0e6a132101 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1159,6 +1159,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) out_dismount: atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); + ocfs2_free_replay_slots(osb); ocfs2_dismount_volume(sb, 1); goto out; @@ -1822,12 +1823,14 @@ static int ocfs2_mount_volume(struct super_block *sb) status = ocfs2_truncate_log_init(osb); if (status < 0) { mlog_errno(status); - goto out_system_inodes; + goto out_check_volume; } ocfs2_super_unlock(osb, 1); return 0; +out_check_volume: + ocfs2_free_replay_slots(osb); out_system_inodes: if (osb->local_alloc_state == OCFS2_LA_ENABLED) ocfs2_shutdown_local_alloc(osb); diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 605e5a3506ec..c5da2091cefb 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -64,8 +64,7 @@ struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu) return acl; } -static int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, - int type) +int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int error = 0; void *value = NULL; @@ -119,12 +118,13 @@ out: return error; } -int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int orangefs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int error; struct iattr iattr; int rc; + struct inode *inode = d_inode(dentry); memset(&iattr, 0, sizeof iattr); @@ -153,46 +153,7 @@ int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, rc = __orangefs_set_acl(inode, acl, type); if (!rc && (iattr.ia_valid == ATTR_MODE)) - rc = __orangefs_setattr(inode, &iattr); + rc = __orangefs_setattr_mode(dentry, &iattr); return rc; } - -int orangefs_init_acl(struct inode *inode, struct inode *dir) -{ - struct posix_acl *default_acl, *acl; - umode_t mode = inode->i_mode; - struct iattr iattr; - int error = 0; - - error = posix_acl_create(dir, &mode, &default_acl, &acl); - if (error) - return error; - - if (default_acl) { - error = __orangefs_set_acl(inode, default_acl, - ACL_TYPE_DEFAULT); - posix_acl_release(default_acl); - } else { - inode->i_default_acl = NULL; - } - - if (acl) { - if (!error) - error = __orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); - posix_acl_release(acl); - } else { - inode->i_acl = NULL; - } - - /* If mode of the inode was changed, then do a forcible ->setattr */ - if (mode != inode->i_mode) { - memset(&iattr, 0, sizeof iattr); - inode->i_mode = mode; - iattr.ia_mode = mode; - iattr.ia_valid |= ATTR_MODE; - __orangefs_setattr(inode, &iattr); - } - - return error; -} diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 7a8c0c6e698d..370bd3bbf5e4 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -53,7 +53,7 @@ static int orangefs_writepage_locked(struct page *page, bv.bv_len = wlen; bv.bv_offset = off % PAGE_SIZE; WARN_ON(wlen == 0); - iov_iter_bvec(&iter, WRITE, &bv, 1, wlen); + iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, wlen); ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen, len, wr, NULL, NULL); @@ -112,7 +112,7 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow, else ow->bv[i].bv_offset = 0; } - iov_iter_bvec(&iter, WRITE, ow->bv, ow->npages, ow->len); + iov_iter_bvec(&iter, ITER_SOURCE, ow->bv, ow->npages, ow->len); WARN_ON(ow->off >= len); if (ow->off + ow->len > len) @@ -270,7 +270,7 @@ static void orangefs_readahead(struct readahead_control *rac) offset = readahead_pos(rac); i_pages = &rac->mapping->i_pages; - iov_iter_xarray(&iter, READ, i_pages, offset, readahead_length(rac)); + iov_iter_xarray(&iter, ITER_DEST, i_pages, offset, readahead_length(rac)); /* read in the pages. */ if ((ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, @@ -303,7 +303,7 @@ static int orangefs_read_folio(struct file *file, struct folio *folio) bv.bv_page = &folio->page; bv.bv_len = folio_size(folio); bv.bv_offset = 0; - iov_iter_bvec(&iter, READ, &bv, 1, folio_size(folio)); + iov_iter_bvec(&iter, ITER_DEST, &bv, 1, folio_size(folio)); ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, folio_size(folio), inode->i_size, NULL, NULL, file); @@ -828,15 +828,23 @@ again: spin_unlock(&inode->i_lock); mark_inode_dirty(inode); - if (iattr->ia_valid & ATTR_MODE) - /* change mod on a file that has ACLs */ - ret = posix_acl_chmod(&init_user_ns, inode, inode->i_mode); - ret = 0; out: return ret; } +int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr) +{ + int ret; + struct inode *inode = d_inode(dentry); + + ret = __orangefs_setattr(inode, iattr); + /* change mode on a file that has ACLs */ + if (!ret && (iattr->ia_valid & ATTR_MODE)) + ret = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); + return ret; +} + /* * Change attributes of an object referenced by dentry. */ @@ -849,7 +857,7 @@ int orangefs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ret = setattr_prepare(&init_user_ns, dentry, iattr); if (ret) goto out; - ret = __orangefs_setattr(d_inode(dentry), iattr); + ret = __orangefs_setattr_mode(dentry, iattr); sync_inode_metadata(d_inode(dentry), 1); out: gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", @@ -967,7 +975,7 @@ static int orangefs_fileattr_set(struct user_namespace *mnt_userns, /* ORANGEFS2 implementation of VFS inode operations for files */ static const struct inode_operations orangefs_file_inode_operations = { - .get_acl = orangefs_get_acl, + .get_inode_acl = orangefs_get_acl, .set_acl = orangefs_set_acl, .setattr = orangefs_setattr, .getattr = orangefs_getattr, @@ -1097,8 +1105,9 @@ struct inode *orangefs_iget(struct super_block *sb, * Allocate an inode for a newly created file and insert it into the inode hash. */ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, - int mode, dev_t dev, struct orangefs_object_kref *ref) + umode_t mode, dev_t dev, struct orangefs_object_kref *ref) { + struct posix_acl *acl = NULL, *default_acl = NULL; unsigned long hash = orangefs_handle_hash(ref); struct inode *inode; int error; @@ -1115,6 +1124,10 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, if (!inode) return ERR_PTR(-ENOMEM); + error = posix_acl_create(dir, &mode, &default_acl, &acl); + if (error) + goto out_iput; + orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ @@ -1125,6 +1138,19 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, orangefs_init_iops(inode); inode->i_rdev = dev; + if (default_acl) { + error = __orangefs_set_acl(inode, default_acl, + ACL_TYPE_DEFAULT); + if (error) + goto out_iput; + } + + if (acl) { + error = __orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); + if (error) + goto out_iput; + } + error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref); if (error < 0) goto out_iput; @@ -1132,10 +1158,22 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, gossip_debug(GOSSIP_INODE_DEBUG, "Initializing ACL's for inode %pU\n", get_khandle_from_ino(inode)); - orangefs_init_acl(inode, dir); + if (mode != inode->i_mode) { + struct iattr iattr = { + .ia_mode = mode, + .ia_valid = ATTR_MODE, + }; + inode->i_mode = mode; + __orangefs_setattr(inode, &iattr); + __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + } + posix_acl_release(acl); + posix_acl_release(default_acl); return inode; out_iput: iput(inode); + posix_acl_release(acl); + posix_acl_release(default_acl); return ERR_PTR(error); } diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 600e8eee541f..75c1a3dcf68c 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -430,7 +430,7 @@ static int orangefs_rename(struct user_namespace *mnt_userns, /* ORANGEFS implementation of VFS inode operations for directories */ const struct inode_operations orangefs_dir_inode_operations = { .lookup = orangefs_lookup, - .get_acl = orangefs_get_acl, + .get_inode_acl = orangefs_get_acl, .set_acl = orangefs_set_acl, .create = orangefs_create, .unlink = orangefs_unlink, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index b5940ec1836a..6e0cc01b3a14 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -103,13 +103,13 @@ enum orangefs_vfs_op_states { #define ORANGEFS_CACHE_CREATE_FLAGS 0 #endif -extern int orangefs_init_acl(struct inode *inode, struct inode *dir); extern const struct xattr_handler *orangefs_xattr_handlers[]; extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu); extern int orangefs_set_acl(struct user_namespace *mnt_userns, - struct inode *inode, struct posix_acl *acl, + struct dentry *dentry, struct posix_acl *acl, int type); +int __orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type); /* * orangefs data structures @@ -356,11 +356,12 @@ void fsid_key_table_finalize(void); vm_fault_t orangefs_page_mkwrite(struct vm_fault *); struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, - int mode, + umode_t mode, dev_t dev, struct orangefs_object_kref *ref); int __orangefs_setattr(struct inode *, struct iattr *); +int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr); int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *); int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f436d8847f08..6e4e65ee050d 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -44,6 +44,35 @@ static bool ovl_must_copy_xattr(const char *name) !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } +static int ovl_copy_acl(struct ovl_fs *ofs, const struct path *path, + struct dentry *dentry, const char *acl_name) +{ + int err; + struct posix_acl *clone, *real_acl = NULL; + + real_acl = ovl_get_acl_path(path, acl_name, false); + if (!real_acl) + return 0; + + if (IS_ERR(real_acl)) { + err = PTR_ERR(real_acl); + if (err == -ENODATA || err == -EOPNOTSUPP) + return 0; + return err; + } + + clone = posix_acl_clone(real_acl, GFP_KERNEL); + posix_acl_release(real_acl); /* release original acl */ + if (!clone) + return -ENOMEM; + + err = ovl_do_set_acl(ofs, dentry, acl_name, clone); + + /* release cloned acl */ + posix_acl_release(clone); + return err; +} + int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct dentry *new) { struct dentry *old = oldpath->dentry; @@ -93,6 +122,15 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct de error = 0; continue; /* Discard */ } + + if (is_posix_acl_xattr(name)) { + error = ovl_copy_acl(OVL_FS(sb), oldpath, new, name); + if (!error) + continue; + /* POSIX ACLs must be copied. */ + break; + } + retry: size = ovl_do_getxattr(oldpath, name, value, value_size); if (size == -ERANGE) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6b03457f72bb..cbb569d5d234 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -435,28 +435,12 @@ out: } static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry, - const char *name, const struct posix_acl *acl) + const char *acl_name, struct posix_acl *acl) { - void *buffer; - size_t size; - int err; - if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl) return 0; - size = posix_acl_xattr_size(acl->a_count); - buffer = kmalloc(size, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - if (err < 0) - goto out_free; - - err = ovl_do_setxattr(ofs, upperdentry, name, buffer, size, XATTR_CREATE); -out_free: - kfree(buffer); - return err; + return ovl_do_set_acl(ofs, upperdentry, acl_name, acl); } static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, @@ -1311,7 +1295,9 @@ const struct inode_operations ovl_dir_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, + .get_inode_acl = ovl_get_inode_acl, .get_acl = ovl_get_acl, + .set_acl = ovl_set_acl, .update_time = ovl_update_time, .fileattr_get = ovl_fileattr_get, .fileattr_set = ovl_fileattr_set, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 9e61511de7a7..ee6dfa577c93 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -14,6 +14,8 @@ #include <linux/fileattr.h> #include <linux/security.h> #include <linux/namei.h> +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> #include "overlayfs.h" @@ -460,7 +462,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) * of the POSIX ACLs retrieved from the lower layer to this function to not * alter the POSIX ACLs for the underlying filesystem. */ -static void ovl_idmap_posix_acl(struct inode *realinode, +static void ovl_idmap_posix_acl(const struct inode *realinode, struct user_namespace *mnt_userns, struct posix_acl *acl) { @@ -485,6 +487,64 @@ static void ovl_idmap_posix_acl(struct inode *realinode, } /* + * The @noperm argument is used to skip permission checking and is a temporary + * measure. Quoting Miklos from an earlier discussion: + * + * > So there are two paths to getting an acl: + * > 1) permission checking and 2) retrieving the value via getxattr(2). + * > This is a similar situation as reading a symlink vs. following it. + * > When following a symlink overlayfs always reads the link on the + * > underlying fs just as if it was a readlink(2) call, calling + * > security_inode_readlink() instead of security_inode_follow_link(). + * > This is logical: we are reading the link from the underlying storage, + * > and following it on overlayfs. + * > + * > Applying the same logic to acl: we do need to call the + * > security_inode_getxattr() on the underlying fs, even if just want to + * > check permissions on overlay. This is currently not done, which is an + * > inconsistency. + * > + * > Maybe adding the check to ovl_get_acl() is the right way to go, but + * > I'm a little afraid of a performance regression. Will look into that. + * + * Until we have made a decision allow this helper to take the @noperm + * argument. We should hopefully be able to remove it soon. + */ +struct posix_acl *ovl_get_acl_path(const struct path *path, + const char *acl_name, bool noperm) +{ + struct posix_acl *real_acl, *clone; + struct user_namespace *mnt_userns; + struct inode *realinode = d_inode(path->dentry); + + mnt_userns = mnt_user_ns(path->mnt); + + if (noperm) + real_acl = get_inode_acl(realinode, posix_acl_type(acl_name)); + else + real_acl = vfs_get_acl(mnt_userns, path->dentry, acl_name); + if (IS_ERR_OR_NULL(real_acl)) + return real_acl; + + if (!is_idmapped_mnt(path->mnt)) + return real_acl; + + /* + * We cannot alter the ACLs returned from the relevant layer as that + * would alter the cached values filesystem wide for the lower + * filesystem. Instead we can clone the ACLs and then apply the + * relevant idmapping of the layer. + */ + clone = posix_acl_clone(real_acl, GFP_KERNEL); + posix_acl_release(real_acl); /* release original acl */ + if (!clone) + return ERR_PTR(-ENOMEM); + + ovl_idmap_posix_acl(realinode, mnt_userns, clone); + return clone; +} + +/* * When the relevant layer is an idmapped mount we need to take the idmapping * of the layer into account and translate any ACL_{GROUP,USER} values * according to the idmapped mount. @@ -495,10 +555,12 @@ static void ovl_idmap_posix_acl(struct inode *realinode, * * This is obviously only relevant when idmapped layers are used. */ -struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) +struct posix_acl *do_ovl_get_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, + bool rcu, bool noperm) { struct inode *realinode = ovl_inode_real(inode); - struct posix_acl *acl, *clone; + struct posix_acl *acl; struct path realpath; if (!IS_POSIXACL(realinode)) @@ -512,40 +574,115 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) } if (rcu) { + /* + * If the layer is idmapped drop out of RCU path walk + * so we can clone the ACLs. + */ + if (is_idmapped_mnt(realpath.mnt)) + return ERR_PTR(-ECHILD); + acl = get_cached_acl_rcu(realinode, type); } else { const struct cred *old_cred; old_cred = ovl_override_creds(inode->i_sb); - acl = get_acl(realinode, type); + acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm); revert_creds(old_cred); } - /* - * If there are no POSIX ACLs, or we encountered an error, - * or the layer isn't idmapped we don't need to do anything. - */ - if (!is_idmapped_mnt(realpath.mnt) || IS_ERR_OR_NULL(acl)) - return acl; + + return acl; +} + +static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, + struct posix_acl *acl, int type) +{ + int err; + struct path realpath; + const char *acl_name; + const struct cred *old_cred; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct dentry *upperdentry = ovl_dentry_upper(dentry); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); + + err = ovl_want_write(dentry); + if (err) + return err; /* - * We only get here if the layer is idmapped. So drop out of RCU path - * walk so we can clone the ACLs. There's no need to release the ACLs - * since get_cached_acl_rcu() doesn't take a reference on the ACLs. + * If ACL is to be removed from a lower file, check if it exists in + * the first place before copying it up. */ - if (rcu) - return ERR_PTR(-ECHILD); + acl_name = posix_acl_xattr_name(type); + if (!acl && !upperdentry) { + struct posix_acl *real_acl; - clone = posix_acl_clone(acl, GFP_KERNEL); - if (!clone) - clone = ERR_PTR(-ENOMEM); + ovl_path_lower(dentry, &realpath); + old_cred = ovl_override_creds(dentry->d_sb); + real_acl = vfs_get_acl(mnt_user_ns(realpath.mnt), realdentry, + acl_name); + revert_creds(old_cred); + if (IS_ERR(real_acl)) { + err = PTR_ERR(real_acl); + goto out_drop_write; + } + posix_acl_release(real_acl); + } + + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; + + realdentry = ovl_dentry_upper(dentry); + } + + old_cred = ovl_override_creds(dentry->d_sb); + if (acl) + err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); else - ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone); + err = ovl_do_remove_acl(ofs, realdentry, acl_name); + revert_creds(old_cred); + + /* copy c/mtime */ + ovl_copyattr(inode); + +out_drop_write: + ovl_drop_write(dentry); + return err; +} + +int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + struct posix_acl *acl, int type) +{ + int err; + struct inode *inode = d_inode(dentry); + struct dentry *workdir = ovl_workdir(dentry); + struct inode *realinode = ovl_inode_real(inode); + + if (!IS_POSIXACL(d_inode(workdir))) + return -EOPNOTSUPP; + if (!realinode->i_op->set_acl) + return -EOPNOTSUPP; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + if (!inode_owner_or_capable(&init_user_ns, inode)) + return -EPERM; + /* - * Since we're not in RCU path walk we always need to release the - * original ACLs. + * Check if sgid bit needs to be cleared (actual setacl operation will + * be done with mounter's capabilities and so that won't do it for us). */ - posix_acl_release(acl); - return clone; + if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS && + !in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) { + struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; + + err = ovl_setattr(&init_user_ns, dentry, &iattr); + if (err) + return err; + } + + return ovl_set_or_remove_acl(dentry, inode, acl, type); } #endif @@ -721,7 +858,9 @@ static const struct inode_operations ovl_file_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, + .get_inode_acl = ovl_get_inode_acl, .get_acl = ovl_get_acl, + .set_acl = ovl_set_acl, .update_time = ovl_update_time, .fiemap = ovl_fiemap, .fileattr_get = ovl_fileattr_get, @@ -741,7 +880,9 @@ static const struct inode_operations ovl_special_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, + .get_inode_acl = ovl_get_inode_acl, .get_acl = ovl_get_acl, + .set_acl = ovl_set_acl, .update_time = ovl_update_time, }; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index eee8f08d32b6..480e6aabef27 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -8,6 +8,8 @@ #include <linux/uuid.h> #include <linux/fs.h> #include <linux/namei.h> +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> #include "ovl_entry.h" #undef pr_fmt @@ -278,6 +280,18 @@ static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry, return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox)); } +static inline int ovl_do_set_acl(struct ovl_fs *ofs, struct dentry *dentry, + const char *acl_name, struct posix_acl *acl) +{ + return vfs_set_acl(ovl_upper_mnt_userns(ofs), dentry, acl_name, acl); +} + +static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry, + const char *acl_name) +{ + return vfs_remove_acl(ovl_upper_mnt_userns(ofs), dentry, acl_name); +} + static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, struct dentry *olddentry, struct inode *newdir, struct dentry *newdentry, unsigned int flags) @@ -594,9 +608,33 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); #ifdef CONFIG_FS_POSIX_ACL -struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu); +struct posix_acl *do_ovl_get_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, + bool rcu, bool noperm); +static inline struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type, + bool rcu) +{ + return do_ovl_get_acl(&init_user_ns, inode, type, rcu, true); +} +static inline struct posix_acl *ovl_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type) +{ + return do_ovl_get_acl(mnt_userns, d_inode(dentry), type, false, false); +} +int ovl_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + struct posix_acl *acl, int type); +struct posix_acl *ovl_get_acl_path(const struct path *path, + const char *acl_name, bool noperm); #else -#define ovl_get_acl NULL +#define ovl_get_inode_acl NULL +#define ovl_get_acl NULL +#define ovl_set_acl NULL +static inline struct posix_acl *ovl_get_acl_path(const struct path *path, + const char *acl_name, + bool noperm) +{ + return NULL; +} #endif int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index a29a8afe9b26..2addafe4e14a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -813,13 +813,11 @@ retry: * allowed as upper are limited to "normal" ones, where checking * for the above two errors is sufficient. */ - err = ovl_do_removexattr(ofs, work, - XATTR_NAME_POSIX_ACL_DEFAULT); + err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT); if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; - err = ovl_do_removexattr(ofs, work, - XATTR_NAME_POSIX_ACL_ACCESS); + err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS); if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; @@ -1001,83 +999,6 @@ static unsigned int ovl_split_lowerdirs(char *str) return ctr; } -static int __maybe_unused -ovl_posix_acl_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - return ovl_xattr_get(dentry, inode, handler->name, buffer, size); -} - -static int __maybe_unused -ovl_posix_acl_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - struct dentry *workdir = ovl_workdir(dentry); - struct inode *realinode = ovl_inode_real(inode); - struct posix_acl *acl = NULL; - int err; - - /* Check that everything is OK before copy-up */ - if (value) { - /* The above comment can be understood in two ways: - * - * 1. We just want to check whether the basic POSIX ACL format - * is ok. For example, if the header is correct and the size - * is sane. - * 2. We want to know whether the ACL_{GROUP,USER} entries can - * be mapped according to the underlying filesystem. - * - * Currently, we only check 1. If we wanted to check 2. we - * would need to pass the mnt_userns and the fs_userns of the - * underlying filesystem. But frankly, I think checking 1. is - * enough to start the copy-up. - */ - acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } - err = -EOPNOTSUPP; - if (!IS_POSIXACL(d_inode(workdir))) - goto out_acl_release; - if (!realinode->i_op->set_acl) - goto out_acl_release; - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { - err = acl ? -EACCES : 0; - goto out_acl_release; - } - err = -EPERM; - if (!inode_owner_or_capable(&init_user_ns, inode)) - goto out_acl_release; - - posix_acl_release(acl); - - /* - * Check if sgid bit needs to be cleared (actual setacl operation will - * be done with mounter's capabilities and so that won't do it for us). - */ - if (unlikely(inode->i_mode & S_ISGID) && - handler->flags == ACL_TYPE_ACCESS && - !in_group_p(inode->i_gid) && - !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) { - struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; - - err = ovl_setattr(&init_user_ns, dentry, &iattr); - if (err) - return err; - } - - err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); - return err; - -out_acl_release: - posix_acl_release(acl); - return err; -} - static int ovl_own_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) @@ -1110,22 +1031,6 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler, return ovl_xattr_set(dentry, inode, name, value, size, flags); } -static const struct xattr_handler __maybe_unused -ovl_posix_acl_access_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - .flags = ACL_TYPE_ACCESS, - .get = ovl_posix_acl_xattr_get, - .set = ovl_posix_acl_xattr_set, -}; - -static const struct xattr_handler __maybe_unused -ovl_posix_acl_default_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .get = ovl_posix_acl_xattr_get, - .set = ovl_posix_acl_xattr_set, -}; - static const struct xattr_handler ovl_own_trusted_xattr_handler = { .prefix = OVL_XATTR_TRUSTED_PREFIX, .get = ovl_own_xattr_get, @@ -1146,8 +1051,8 @@ static const struct xattr_handler ovl_other_xattr_handler = { static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { #ifdef CONFIG_FS_POSIX_ACL - &ovl_posix_acl_access_xattr_handler, - &ovl_posix_acl_default_xattr_handler, + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, #endif &ovl_own_trusted_xattr_handler, &ovl_other_xattr_handler, @@ -1156,8 +1061,8 @@ static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { static const struct xattr_handler *ovl_user_xattr_handlers[] = { #ifdef CONFIG_FS_POSIX_ACL - &ovl_posix_acl_access_xattr_handler, - &ovl_posix_acl_default_xattr_handler, + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, #endif &ovl_own_user_xattr_handler, &ovl_other_xattr_handler, diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 74dc0f571dc9..e6643db35cce 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -25,6 +25,11 @@ #include <linux/namei.h> #include <linux/mnt_idmapping.h> #include <linux/iversion.h> +#include <linux/security.h> +#include <linux/evm.h> +#include <linux/fsnotify.h> + +#include "internal.h" static struct posix_acl **acl_by_type(struct inode *inode, int type) { @@ -64,7 +69,7 @@ struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) if (acl == ACL_DONT_CACHE) { struct posix_acl *ret; - ret = inode->i_op->get_acl(inode, type, LOOKUP_RCU); + ret = inode->i_op->get_inode_acl(inode, type, LOOKUP_RCU); if (!IS_ERR(ret)) acl = ret; } @@ -106,15 +111,17 @@ void forget_all_cached_acls(struct inode *inode) } EXPORT_SYMBOL(forget_all_cached_acls); -struct posix_acl *get_acl(struct inode *inode, int type) +static struct posix_acl *__get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, struct inode *inode, + int type) { - void *sentinel; + struct posix_acl *sentinel; struct posix_acl **p; struct posix_acl *acl; /* * The sentinel is used to detect when another operation like - * set_cached_acl() or forget_cached_acl() races with get_acl(). + * set_cached_acl() or forget_cached_acl() races with get_inode_acl(). * It is guaranteed that is_uncached_acl(sentinel) is true. */ @@ -133,25 +140,27 @@ struct posix_acl *get_acl(struct inode *inode, int type) * current value of the ACL will not be ACL_NOT_CACHED and so our own * sentinel will not be set; another task will update the cache. We * could wait for that other task to complete its job, but it's easier - * to just call ->get_acl to fetch the ACL ourself. (This is going to - * be an unlikely race.) + * to just call ->get_inode_acl to fetch the ACL ourself. (This is + * going to be an unlikely race.) */ cmpxchg(p, ACL_NOT_CACHED, sentinel); /* - * Normally, the ACL returned by ->get_acl will be cached. + * Normally, the ACL returned by ->get{_inode}_acl will be cached. * A filesystem can prevent that by calling - * forget_cached_acl(inode, type) in ->get_acl. + * forget_cached_acl(inode, type) in ->get{_inode}_acl. * - * If the filesystem doesn't have a get_acl() function at all, we'll - * just create the negative cache entry. + * If the filesystem doesn't have a get{_inode}_ acl() function at all, + * we'll just create the negative cache entry. */ - if (!inode->i_op->get_acl) { + if (dentry && inode->i_op->get_acl) { + acl = inode->i_op->get_acl(mnt_userns, dentry, type); + } else if (inode->i_op->get_inode_acl) { + acl = inode->i_op->get_inode_acl(inode, type, false); + } else { set_cached_acl(inode, type, NULL); return NULL; } - acl = inode->i_op->get_acl(inode, type, false); - if (IS_ERR(acl)) { /* * Remove our sentinel so that we don't block future attempts @@ -169,7 +178,12 @@ struct posix_acl *get_acl(struct inode *inode, int type) posix_acl_release(acl); return acl; } -EXPORT_SYMBOL(get_acl); + +struct posix_acl *get_inode_acl(struct inode *inode, int type) +{ + return __get_acl(&init_user_ns, NULL, inode, type); +} +EXPORT_SYMBOL(get_inode_acl); /* * Init a fresh posix_acl @@ -578,19 +592,20 @@ EXPORT_SYMBOL(__posix_acl_chmod); * posix_acl_chmod - chmod a posix acl * * @mnt_userns: user namespace of the mount @inode was found from - * @inode: inode to check permissions on + * @dentry: dentry to check permissions on * @mode: the new mode of @inode * - * If the inode has been found through an idmapped mount the user namespace of + * If the dentry has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then * take care to map the inode according to @mnt_userns before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs init_user_ns. */ int - posix_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode, + posix_acl_chmod(struct user_namespace *mnt_userns, struct dentry *dentry, umode_t mode) { + struct inode *inode = d_inode(dentry); struct posix_acl *acl; int ret = 0; @@ -599,7 +614,7 @@ int if (!inode->i_op->set_acl) return -EOPNOTSUPP; - acl = get_acl(inode, ACL_TYPE_ACCESS); + acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR_OR_NULL(acl)) { if (acl == ERR_PTR(-EOPNOTSUPP)) return 0; @@ -609,7 +624,7 @@ int ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); if (ret) return ret; - ret = inode->i_op->set_acl(mnt_userns, inode, acl, ACL_TYPE_ACCESS); + ret = inode->i_op->set_acl(mnt_userns, dentry, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); return ret; } @@ -629,7 +644,7 @@ posix_acl_create(struct inode *dir, umode_t *mode, if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) return 0; - p = get_acl(dir, ACL_TYPE_DEFAULT); + p = get_inode_acl(dir, ACL_TYPE_DEFAULT); if (!p || p == ERR_PTR(-EOPNOTSUPP)) { *mode &= ~current_umask(); return 0; @@ -732,118 +747,32 @@ static int posix_acl_fix_xattr_common(const void *value, size_t size) return count; } -void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size) -{ - struct posix_acl_xattr_header *header = value; - struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; - struct user_namespace *fs_userns = i_user_ns(inode); - int count; - vfsuid_t vfsuid; - vfsgid_t vfsgid; - kuid_t uid; - kgid_t gid; - - if (no_idmapping(mnt_userns, i_user_ns(inode))) - return; - - count = posix_acl_fix_xattr_common(value, size); - if (count <= 0) - return; - - for (end = entry + count; entry != end; entry++) { - switch (le16_to_cpu(entry->e_tag)) { - case ACL_USER: - uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsuid = make_vfsuid(mnt_userns, fs_userns, uid); - entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, - vfsuid_into_kuid(vfsuid))); - break; - case ACL_GROUP: - gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsgid = make_vfsgid(mnt_userns, fs_userns, gid); - entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, - vfsgid_into_kgid(vfsgid))); - break; - default: - break; - } - } -} - -static void posix_acl_fix_xattr_userns( - struct user_namespace *to, struct user_namespace *from, - void *value, size_t size) -{ - struct posix_acl_xattr_header *header = value; - struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; - int count; - kuid_t uid; - kgid_t gid; - - count = posix_acl_fix_xattr_common(value, size); - if (count <= 0) - return; - - for (end = entry + count; entry != end; entry++) { - switch(le16_to_cpu(entry->e_tag)) { - case ACL_USER: - uid = make_kuid(from, le32_to_cpu(entry->e_id)); - entry->e_id = cpu_to_le32(from_kuid(to, uid)); - break; - case ACL_GROUP: - gid = make_kgid(from, le32_to_cpu(entry->e_id)); - entry->e_id = cpu_to_le32(from_kgid(to, gid)); - break; - default: - break; - } - } -} - -void posix_acl_fix_xattr_from_user(void *value, size_t size) -{ - struct user_namespace *user_ns = current_user_ns(); - if (user_ns == &init_user_ns) - return; - posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size); -} - -void posix_acl_fix_xattr_to_user(void *value, size_t size) -{ - struct user_namespace *user_ns = current_user_ns(); - if (user_ns == &init_user_ns) - return; - posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size); -} - /** - * make_posix_acl - convert POSIX ACLs from uapi to VFS format using the - * provided callbacks to map ACL_{GROUP,USER} entries into the - * appropriate format - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping + * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format + * @userns: the filesystem's idmapping * @value: the uapi representation of POSIX ACLs * @size: the size of @void - * @uid_cb: callback to use for mapping the uid stored in ACL_USER entries - * @gid_cb: callback to use for mapping the gid stored in ACL_GROUP entries * - * The make_posix_acl() helper is an abstraction to translate from uapi format - * into the VFS format allowing the caller to specific callbacks to map - * ACL_{GROUP,USER} entries into the expected format. This is used in - * posix_acl_from_xattr() and vfs_set_acl_prepare() and avoids pointless code - * duplication. + * Filesystems that store POSIX ACLs in the unaltered uapi format should use + * posix_acl_from_xattr() when reading them from the backing store and + * converting them into the struct posix_acl VFS format. The helper is + * specifically intended to be called from the acl inode operation. + * + * The posix_acl_from_xattr() function will map the raw {g,u}id values stored + * in ACL_{GROUP,USER} entries into idmapping in @userns. + * + * Note that posix_acl_from_xattr() does not take idmapped mounts into account. + * If it did it calling it from the get acl inode operation would return POSIX + * ACLs mapped according to an idmapped mount which would mean that the value + * couldn't be cached for the filesystem. Idmapped mounts are taken into + * account on the fly during permission checking or right at the VFS - + * userspace boundary before reporting them to the user. * * Return: Allocated struct posix_acl on success, NULL for a valid header but * without actual POSIX ACL entries, or ERR_PTR() encoded error code. */ -static struct posix_acl *make_posix_acl(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, const void *value, size_t size, - kuid_t (*uid_cb)(struct user_namespace *, struct user_namespace *, - const struct posix_acl_xattr_entry *), - kgid_t (*gid_cb)(struct user_namespace *, struct user_namespace *, - const struct posix_acl_xattr_entry *)) +struct posix_acl *posix_acl_from_xattr(struct user_namespace *userns, + const void *value, size_t size) { const struct posix_acl_xattr_header *header = value; const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end; @@ -874,12 +803,14 @@ static struct posix_acl *make_posix_acl(struct user_namespace *mnt_userns, break; case ACL_USER: - acl_e->e_uid = uid_cb(mnt_userns, fs_userns, entry); + acl_e->e_uid = make_kuid(userns, + le32_to_cpu(entry->e_id)); if (!uid_valid(acl_e->e_uid)) goto fail; break; case ACL_GROUP: - acl_e->e_gid = gid_cb(mnt_userns, fs_userns, entry); + acl_e->e_gid = make_kgid(userns, + le32_to_cpu(entry->e_id)); if (!gid_valid(acl_e->e_gid)) goto fail; break; @@ -894,181 +825,6 @@ fail: posix_acl_release(acl); return ERR_PTR(-EINVAL); } - -/** - * vfs_set_acl_prepare_kuid - map ACL_USER uid according to mount- and - * filesystem idmapping - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @e: a ACL_USER entry in POSIX ACL uapi format - * - * The uid stored as ACL_USER entry in @e is a kuid_t stored as a raw {g,u}id - * value. The vfs_set_acl_prepare_kuid() will recover the kuid_t through - * KUIDT_INIT() and then map it according to the idmapped mount. The resulting - * kuid_t is the value which the filesystem can map up into a raw backing store - * id in the filesystem's idmapping. - * - * This is used in vfs_set_acl_prepare() to generate the proper VFS - * representation of POSIX ACLs with ACL_USER entries during setxattr(). - * - * Return: A kuid in @fs_userns for the uid stored in @e. - */ -static inline kuid_t -vfs_set_acl_prepare_kuid(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const struct posix_acl_xattr_entry *e) -{ - kuid_t kuid = KUIDT_INIT(le32_to_cpu(e->e_id)); - return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid)); -} - -/** - * vfs_set_acl_prepare_kgid - map ACL_GROUP gid according to mount- and - * filesystem idmapping - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @e: a ACL_GROUP entry in POSIX ACL uapi format - * - * The gid stored as ACL_GROUP entry in @e is a kgid_t stored as a raw {g,u}id - * value. The vfs_set_acl_prepare_kgid() will recover the kgid_t through - * KGIDT_INIT() and then map it according to the idmapped mount. The resulting - * kgid_t is the value which the filesystem can map up into a raw backing store - * id in the filesystem's idmapping. - * - * This is used in vfs_set_acl_prepare() to generate the proper VFS - * representation of POSIX ACLs with ACL_GROUP entries during setxattr(). - * - * Return: A kgid in @fs_userns for the gid stored in @e. - */ -static inline kgid_t -vfs_set_acl_prepare_kgid(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const struct posix_acl_xattr_entry *e) -{ - kgid_t kgid = KGIDT_INIT(le32_to_cpu(e->e_id)); - return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid)); -} - -/** - * vfs_set_acl_prepare - convert POSIX ACLs from uapi to VFS format taking - * mount and filesystem idmappings into account - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @value: the uapi representation of POSIX ACLs - * @size: the size of @void - * - * When setting POSIX ACLs with ACL_{GROUP,USER} entries they need to be - * mapped according to the relevant mount- and filesystem idmapping. It is - * important that the ACL_{GROUP,USER} entries in struct posix_acl will be - * mapped into k{g,u}id_t that are supposed to be mapped up in the filesystem - * idmapping. This is crucial since the resulting struct posix_acl might be - * cached filesystem wide. The vfs_set_acl_prepare() function will take care to - * perform all necessary idmappings. - * - * Note, that since basically forever the {g,u}id values encoded as - * ACL_{GROUP,USER} entries in the uapi POSIX ACLs passed via @value contain - * values that have been mapped according to the caller's idmapping. In other - * words, POSIX ACLs passed in uapi format as @value during setxattr() contain - * {g,u}id values in their ACL_{GROUP,USER} entries that should actually have - * been stored as k{g,u}id_t. - * - * This means, vfs_set_acl_prepare() needs to first recover the k{g,u}id_t by - * calling K{G,U}IDT_INIT(). Afterwards they can be interpreted as vfs{g,u}id_t - * through from_vfs{g,u}id() to account for any idmapped mounts. The - * vfs_set_acl_prepare_k{g,u}id() helpers will take care to generate the - * correct k{g,u}id_t. - * - * The filesystem will then receive the POSIX ACLs ready to be cached - * filesystem wide and ready to be written to the backing store taking the - * filesystem's idmapping into account. - * - * Return: Allocated struct posix_acl on success, NULL for a valid header but - * without actual POSIX ACL entries, or ERR_PTR() encoded error code. - */ -struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const void *value, size_t size) -{ - return make_posix_acl(mnt_userns, fs_userns, value, size, - vfs_set_acl_prepare_kuid, - vfs_set_acl_prepare_kgid); -} -EXPORT_SYMBOL(vfs_set_acl_prepare); - -/** - * posix_acl_from_xattr_kuid - map ACL_USER uid into filesystem idmapping - * @mnt_userns: unused - * @fs_userns: the filesystem's idmapping - * @e: a ACL_USER entry in POSIX ACL uapi format - * - * Map the uid stored as ACL_USER entry in @e into the filesystem's idmapping. - * This is used in posix_acl_from_xattr() to generate the proper VFS - * representation of POSIX ACLs with ACL_USER entries. - * - * Return: A kuid in @fs_userns for the uid stored in @e. - */ -static inline kuid_t -posix_acl_from_xattr_kuid(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const struct posix_acl_xattr_entry *e) -{ - return make_kuid(fs_userns, le32_to_cpu(e->e_id)); -} - -/** - * posix_acl_from_xattr_kgid - map ACL_GROUP gid into filesystem idmapping - * @mnt_userns: unused - * @fs_userns: the filesystem's idmapping - * @e: a ACL_GROUP entry in POSIX ACL uapi format - * - * Map the gid stored as ACL_GROUP entry in @e into the filesystem's idmapping. - * This is used in posix_acl_from_xattr() to generate the proper VFS - * representation of POSIX ACLs with ACL_GROUP entries. - * - * Return: A kgid in @fs_userns for the gid stored in @e. - */ -static inline kgid_t -posix_acl_from_xattr_kgid(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const struct posix_acl_xattr_entry *e) -{ - return make_kgid(fs_userns, le32_to_cpu(e->e_id)); -} - -/** - * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format - * @fs_userns: the filesystem's idmapping - * @value: the uapi representation of POSIX ACLs - * @size: the size of @void - * - * Filesystems that store POSIX ACLs in the unaltered uapi format should use - * posix_acl_from_xattr() when reading them from the backing store and - * converting them into the struct posix_acl VFS format. The helper is - * specifically intended to be called from the ->get_acl() inode operation. - * - * The posix_acl_from_xattr() function will map the raw {g,u}id values stored - * in ACL_{GROUP,USER} entries into the filesystem idmapping in @fs_userns. The - * posix_acl_from_xattr_k{g,u}id() helpers will take care to generate the - * correct k{g,u}id_t. The returned struct posix_acl can be cached. - * - * Note that posix_acl_from_xattr() does not take idmapped mounts into account. - * If it did it calling is from the ->get_acl() inode operation would return - * POSIX ACLs mapped according to an idmapped mount which would mean that the - * value couldn't be cached for the filesystem. Idmapped mounts are taken into - * account on the fly during permission checking or right at the VFS - - * userspace boundary before reporting them to the user. - * - * Return: Allocated struct posix_acl on success, NULL for a valid header but - * without actual POSIX ACL entries, or ERR_PTR() encoded error code. - */ -struct posix_acl * -posix_acl_from_xattr(struct user_namespace *fs_userns, - const void *value, size_t size) -{ - return make_posix_acl(&init_user_ns, fs_userns, value, size, - posix_acl_from_xattr_kuid, - posix_acl_from_xattr_kgid); -} EXPORT_SYMBOL (posix_acl_from_xattr); /* @@ -1113,35 +869,74 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, } EXPORT_SYMBOL (posix_acl_to_xattr); -static int -posix_acl_xattr_get(const struct xattr_handler *handler, - struct dentry *unused, struct inode *inode, - const char *name, void *value, size_t size) -{ - struct posix_acl *acl; - int error; +/** + * vfs_posix_acl_to_xattr - convert from kernel to userspace representation + * @mnt_userns: user namespace of the mount + * @inode: inode the posix acls are set on + * @acl: the posix acls as represented by the vfs + * @buffer: the buffer into which to convert @acl + * @size: size of @buffer + * + * This converts @acl from the VFS representation in the filesystem idmapping + * to the uapi form reportable to userspace. And mount and caller idmappings + * are handled appropriately. + * + * Return: On success, the size of the stored uapi posix acls, on error a + * negative errno. + */ +static ssize_t vfs_posix_acl_to_xattr(struct user_namespace *mnt_userns, + struct inode *inode, + const struct posix_acl *acl, void *buffer, + size_t size) - if (!IS_POSIXACL(inode)) - return -EOPNOTSUPP; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; +{ + struct posix_acl_xattr_header *ext_acl = buffer; + struct posix_acl_xattr_entry *ext_entry; + struct user_namespace *fs_userns, *caller_userns; + ssize_t real_size, n; + vfsuid_t vfsuid; + vfsgid_t vfsgid; - acl = get_acl(inode, handler->flags); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; + real_size = posix_acl_xattr_size(acl->a_count); + if (!buffer) + return real_size; + if (real_size > size) + return -ERANGE; - error = posix_acl_to_xattr(&init_user_ns, acl, value, size); - posix_acl_release(acl); + ext_entry = (void *)(ext_acl + 1); + ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); - return error; + fs_userns = i_user_ns(inode); + caller_userns = current_user_ns(); + for (n=0; n < acl->a_count; n++, ext_entry++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; + ext_entry->e_tag = cpu_to_le16(acl_e->e_tag); + ext_entry->e_perm = cpu_to_le16(acl_e->e_perm); + switch(acl_e->e_tag) { + case ACL_USER: + vfsuid = make_vfsuid(mnt_userns, fs_userns, acl_e->e_uid); + ext_entry->e_id = cpu_to_le32(from_kuid( + caller_userns, vfsuid_into_kuid(vfsuid))); + break; + case ACL_GROUP: + vfsgid = make_vfsgid(mnt_userns, fs_userns, acl_e->e_gid); + ext_entry->e_id = cpu_to_le32(from_kgid( + caller_userns, vfsgid_into_kgid(vfsgid))); + break; + default: + ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); + break; + } + } + return real_size; } int -set_posix_acl(struct user_namespace *mnt_userns, struct inode *inode, +set_posix_acl(struct user_namespace *mnt_userns, struct dentry *dentry, int type, struct posix_acl *acl) { + struct inode *inode = d_inode(dentry); + if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; if (!inode->i_op->set_acl) @@ -1157,40 +952,10 @@ set_posix_acl(struct user_namespace *mnt_userns, struct inode *inode, if (ret) return ret; } - return inode->i_op->set_acl(mnt_userns, inode, acl, type); + return inode->i_op->set_acl(mnt_userns, dentry, acl, type); } EXPORT_SYMBOL(set_posix_acl); -static int -posix_acl_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, - struct dentry *unused, struct inode *inode, - const char *name, const void *value, size_t size, - int flags) -{ - struct posix_acl *acl = NULL; - int ret; - - if (value) { - /* - * By the time we end up here the {g,u}ids stored in - * ACL_{GROUP,USER} have already been mapped according to the - * caller's idmapping. The vfs_set_acl_prepare() helper will - * recover them and take idmapped mounts into account. The - * filesystem will receive the POSIX ACLs in the correct - * format ready to be cached or written to the backing store - * taking the filesystem idmapping into account. - */ - acl = vfs_set_acl_prepare(mnt_userns, i_user_ns(inode), - value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } - ret = set_posix_acl(mnt_userns, inode, handler->flags, acl); - posix_acl_release(acl); - return ret; -} - static bool posix_acl_xattr_list(struct dentry *dentry) { @@ -1201,8 +966,6 @@ const struct xattr_handler posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, .list = posix_acl_xattr_list, - .get = posix_acl_xattr_get, - .set = posix_acl_xattr_set, }; EXPORT_SYMBOL_GPL(posix_acl_access_xattr_handler); @@ -1210,15 +973,14 @@ const struct xattr_handler posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, .list = posix_acl_xattr_list, - .get = posix_acl_xattr_get, - .set = posix_acl_xattr_set, }; EXPORT_SYMBOL_GPL(posix_acl_default_xattr_handler); -int simple_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int simple_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int error; + struct inode *inode = d_inode(dentry); if (type == ACL_TYPE_ACCESS) { error = posix_acl_update_mode(mnt_userns, inode, @@ -1252,3 +1014,252 @@ int simple_acl_create(struct inode *dir, struct inode *inode) posix_acl_release(acl); return 0; } + +static int vfs_set_acl_idmapped_mnt(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + struct posix_acl *acl) +{ + for (int n = 0; n < acl->a_count; n++) { + struct posix_acl_entry *acl_e = &acl->a_entries[n]; + + switch (acl_e->e_tag) { + case ACL_USER: + acl_e->e_uid = from_vfsuid(mnt_userns, fs_userns, + VFSUIDT_INIT(acl_e->e_uid)); + break; + case ACL_GROUP: + acl_e->e_gid = from_vfsgid(mnt_userns, fs_userns, + VFSGIDT_INIT(acl_e->e_gid)); + break; + } + } + + return 0; +} + +/** + * vfs_set_acl - set posix acls + * @mnt_userns: user namespace of the mount + * @dentry: the dentry based on which to set the posix acls + * @acl_name: the name of the posix acl + * @kacl: the posix acls in the appropriate VFS format + * + * This function sets @kacl. The caller must all posix_acl_release() on @kacl + * afterwards. + * + * Return: On success 0, on error negative errno. + */ +int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl) +{ + int acl_type; + int error; + struct inode *inode = d_inode(dentry); + struct inode *delegated_inode = NULL; + + acl_type = posix_acl_type(acl_name); + if (acl_type < 0) + return -EINVAL; + + if (kacl) { + /* + * If we're on an idmapped mount translate from mount specific + * vfs{g,u}id_t into global filesystem k{g,u}id_t. + * Afterwards we can cache the POSIX ACLs filesystem wide and - + * if this is a filesystem with a backing store - ultimately + * translate them to backing store values. + */ + error = vfs_set_acl_idmapped_mnt(mnt_userns, i_user_ns(inode), kacl); + if (error) + return error; + } + +retry_deleg: + inode_lock(inode); + + /* + * We only care about restrictions the inode struct itself places upon + * us otherwise POSIX ACLs aren't subject to any VFS restrictions. + */ + error = may_write_xattr(mnt_userns, inode); + if (error) + goto out_inode_unlock; + + error = security_inode_set_acl(mnt_userns, dentry, acl_name, kacl); + if (error) + goto out_inode_unlock; + + error = try_break_deleg(inode, &delegated_inode); + if (error) + goto out_inode_unlock; + + if (inode->i_opflags & IOP_XATTR) + error = set_posix_acl(mnt_userns, dentry, acl_type, kacl); + else if (unlikely(is_bad_inode(inode))) + error = -EIO; + else + error = -EOPNOTSUPP; + if (!error) { + fsnotify_xattr(dentry); + evm_inode_post_set_acl(dentry, acl_name, kacl); + } + +out_inode_unlock: + inode_unlock(inode); + + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + + return error; +} +EXPORT_SYMBOL_GPL(vfs_set_acl); + +/** + * vfs_get_acl - get posix acls + * @mnt_userns: user namespace of the mount + * @dentry: the dentry based on which to retrieve the posix acls + * @acl_name: the name of the posix acl + * + * This function retrieves @kacl from the filesystem. The caller must all + * posix_acl_release() on @kacl. + * + * Return: On success POSIX ACLs in VFS format, on error negative errno. + */ +struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) +{ + struct inode *inode = d_inode(dentry); + struct posix_acl *acl; + int acl_type, error; + + acl_type = posix_acl_type(acl_name); + if (acl_type < 0) + return ERR_PTR(-EINVAL); + + /* + * The VFS has no restrictions on reading POSIX ACLs so calling + * something like xattr_permission() isn't needed. Only LSMs get a say. + */ + error = security_inode_get_acl(mnt_userns, dentry, acl_name); + if (error) + return ERR_PTR(error); + + if (!IS_POSIXACL(inode)) + return ERR_PTR(-EOPNOTSUPP); + if (S_ISLNK(inode->i_mode)) + return ERR_PTR(-EOPNOTSUPP); + + acl = __get_acl(mnt_userns, dentry, inode, acl_type); + if (IS_ERR(acl)) + return acl; + if (!acl) + return ERR_PTR(-ENODATA); + + return acl; +} +EXPORT_SYMBOL_GPL(vfs_get_acl); + +/** + * vfs_remove_acl - remove posix acls + * @mnt_userns: user namespace of the mount + * @dentry: the dentry based on which to retrieve the posix acls + * @acl_name: the name of the posix acl + * + * This function removes posix acls. + * + * Return: On success 0, on error negative errno. + */ +int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name) +{ + int acl_type; + int error; + struct inode *inode = d_inode(dentry); + struct inode *delegated_inode = NULL; + + acl_type = posix_acl_type(acl_name); + if (acl_type < 0) + return -EINVAL; + +retry_deleg: + inode_lock(inode); + + /* + * We only care about restrictions the inode struct itself places upon + * us otherwise POSIX ACLs aren't subject to any VFS restrictions. + */ + error = may_write_xattr(mnt_userns, inode); + if (error) + goto out_inode_unlock; + + error = security_inode_remove_acl(mnt_userns, dentry, acl_name); + if (error) + goto out_inode_unlock; + + error = try_break_deleg(inode, &delegated_inode); + if (error) + goto out_inode_unlock; + + if (inode->i_opflags & IOP_XATTR) + error = set_posix_acl(mnt_userns, dentry, acl_type, NULL); + else if (unlikely(is_bad_inode(inode))) + error = -EIO; + else + error = -EOPNOTSUPP; + if (!error) { + fsnotify_xattr(dentry); + evm_inode_post_remove_acl(mnt_userns, dentry, acl_name); + } + +out_inode_unlock: + inode_unlock(inode); + + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + + return error; +} +EXPORT_SYMBOL_GPL(vfs_remove_acl); + +int do_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, const void *kvalue, size_t size) +{ + int error; + struct posix_acl *acl = NULL; + + if (size) { + /* + * Note that posix_acl_from_xattr() uses GFP_NOFS when it + * probably doesn't need to here. + */ + acl = posix_acl_from_xattr(current_user_ns(), kvalue, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + + error = vfs_set_acl(mnt_userns, dentry, acl_name, acl); + posix_acl_release(acl); + return error; +} + +ssize_t do_get_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, void *kvalue, size_t size) +{ + ssize_t error; + struct posix_acl *acl; + + acl = vfs_get_acl(mnt_userns, dentry, acl_name); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + error = vfs_posix_acl_to_xattr(mnt_userns, d_inode(dentry), + acl, kvalue, size); + posix_acl_release(acl); + return error; +} diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index fa762c5fbcb2..91fe1597af7b 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include "internal.h" static int cmdline_proc_show(struct seq_file *m, void *v) { @@ -13,7 +14,10 @@ static int cmdline_proc_show(struct seq_file *m, void *v) static int __init proc_cmdline_init(void) { - proc_create_single("cmdline", 0, NULL, cmdline_proc_show); + struct proc_dir_entry *pde; + + pde = proc_create_single("cmdline", 0, NULL, cmdline_proc_show); + pde->size = saved_command_line_len + 1; return 0; } fs_initcall(proc_cmdline_init); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index dfe6ce3505ce..e0758fe7936d 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -33,7 +33,16 @@ static int show_console_dev(struct seq_file *m, void *v) if (con->device) { const struct tty_driver *driver; int index; + + /* + * Take console_lock to serialize device() callback with + * other console operations. For example, fg_console is + * modified under console_lock when switching vt. + */ + console_lock(); driver = con->device(con, &index); + console_unlock(); + if (driver) { dev = MKDEV(driver->major, driver->minor_start); dev += index; @@ -63,7 +72,12 @@ static void *c_start(struct seq_file *m, loff_t *pos) struct console *con; loff_t off = 0; - console_lock(); + /* + * Hold the console_list_lock to guarantee safe traversal of the + * console list. SRCU cannot be used because there is no + * place to store the SRCU cookie. + */ + console_list_lock(); for_each_console(con) if (off++ == *pos) break; @@ -74,13 +88,14 @@ static void *c_start(struct seq_file *m, loff_t *pos) static void *c_next(struct seq_file *m, void *v, loff_t *pos) { struct console *con = v; + ++*pos; - return con->next; + return hlist_entry_safe(con->node.next, struct console, node); } static void c_stop(struct seq_file *m, void *v) { - console_unlock(); + console_list_unlock(); } static const struct seq_operations consoles_op = { diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 913bef0d2a36..fc46d6fe080c 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -7,6 +7,7 @@ #include <linux/namei.h> #include <linux/pid.h> #include <linux/ptrace.h> +#include <linux/bitmap.h> #include <linux/security.h> #include <linux/file.h> #include <linux/seq_file.h> @@ -279,6 +280,30 @@ out: return 0; } +static int proc_readfd_count(struct inode *inode, loff_t *count) +{ + struct task_struct *p = get_proc_task(inode); + struct fdtable *fdt; + + if (!p) + return -ENOENT; + + task_lock(p); + if (p->files) { + rcu_read_lock(); + + fdt = files_fdtable(p->files); + *count = bitmap_weight(fdt->open_fds, fdt->max_fds); + + rcu_read_unlock(); + } + task_unlock(p); + + put_task_struct(p); + + return 0; +} + static int proc_readfd(struct file *file, struct dir_context *ctx) { return proc_readfd_common(file, ctx, proc_fd_instantiate); @@ -319,9 +344,29 @@ int proc_fd_permission(struct user_namespace *mnt_userns, return rv; } +static int proc_fd_getattr(struct user_namespace *mnt_userns, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + int rv = 0; + + generic_fillattr(&init_user_ns, inode, stat); + + /* If it's a directory, put the number of open fds there */ + if (S_ISDIR(inode->i_mode)) { + rv = proc_readfd_count(inode, &stat->size); + if (rv < 0) + return rv; + } + + return rv; +} + const struct inode_operations proc_fd_inode_operations = { .lookup = proc_lookupfd, .permission = proc_fd_permission, + .getattr = proc_fd_getattr, .setattr = proc_setattr, }; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5101131e6047..440960110a42 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif show_val_kb(m, "PageTables: ", global_node_page_state(NR_PAGETABLE)); - show_val_kb(m, "SecPageTables: ", + show_val_kb(m, "SecPageTables: ", global_node_page_state(NR_SECONDARY_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8b4f3073f8f5..8a74cdcc9af0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -902,7 +902,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) goto out_put_mm; hold_task_mempolicy(priv); - vma = mas_find(&mas, 0); + vma = mas_find(&mas, ULONG_MAX); if (unlikely(!vma)) goto empty_set; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index f2aa86c421f2..09a81e4b1273 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -199,7 +199,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) struct kvec kvec = { .iov_base = buf, .iov_len = count }; struct iov_iter iter; - iov_iter_kvec(&iter, READ, &kvec, 1, count); + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count); return read_from_oldmem(&iter, count, ppos, false); } @@ -212,7 +212,7 @@ ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) struct kvec kvec = { .iov_base = buf, .iov_len = count }; struct iov_iter iter; - iov_iter_kvec(&iter, READ, &kvec, 1, count); + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count); return read_from_oldmem(&iter, count, ppos, cc_platform_has(CC_ATTR_MEM_ENCRYPT)); @@ -437,7 +437,7 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) offset = (loff_t) index << PAGE_SHIFT; kvec.iov_base = page_address(page); kvec.iov_len = PAGE_SIZE; - iov_iter_kvec(&iter, READ, &kvec, 1, PAGE_SIZE); + iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, PAGE_SIZE); rc = __read_vmcore(&iter, &offset); if (rc < 0) { @@ -1567,6 +1567,7 @@ static int __init vmcore_init(void) return rc; rc = parse_crash_elf_headers(); if (rc) { + elfcorehdr_free(elfcorehdr_addr); pr_warn("Kdump: vmcore not initialized\n"); return rc; } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 0c034ea39954..cbc0b468c1ab 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -89,6 +89,11 @@ static char *compress = module_param(compress, charp, 0444); MODULE_PARM_DESC(compress, "compression to use"); +/* How much of the kernel log to snapshot */ +unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES; +module_param(kmsg_bytes, ulong, 0444); +MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)"); + /* Compression parameters */ static struct crypto_comp *tfm; @@ -100,9 +105,6 @@ struct pstore_zbackend { static char *big_oops_buf; static size_t big_oops_buf_sz; -/* How much of the console log to snapshot */ -unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES; - void pstore_set_kmsg_bytes(int bytes) { kmsg_bytes = bytes; @@ -391,6 +393,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, const char *why; unsigned int part = 1; unsigned long flags = 0; + int saved_ret = 0; int ret; why = kmsg_dump_reason_str(reason); @@ -461,12 +464,21 @@ static void pstore_dump(struct kmsg_dumper *dumper, if (ret == 0 && reason == KMSG_DUMP_OOPS) { pstore_new_entry = 1; pstore_timer_kick(); + } else { + /* Preserve only the first non-zero returned value. */ + if (!saved_ret) + saved_ret = ret; } total += record.size; part++; } spin_unlock_irqrestore(&psinfo->buf_lock, flags); + + if (saved_ret) { + pr_err_once("backend (%s) writing error (%d)\n", psinfo->name, + saved_ret); + } } static struct kmsg_dumper pstore_dumper = { @@ -562,8 +574,9 @@ out: int pstore_register(struct pstore_info *psi) { if (backend && strcmp(backend, psi->name)) { - pr_warn("ignoring unexpected backend '%s'\n", psi->name); - return -EPERM; + pr_warn("backend '%s' already in use: ignoring '%s'\n", + backend, psi->name); + return -EBUSY; } /* Sanity check flags. */ @@ -662,6 +675,8 @@ void pstore_unregister(struct pstore_info *psi) psinfo = NULL; kfree(backend); backend = NULL; + + pr_info("Unregistered %s as persistent store backend\n", psi->name); mutex_unlock(&psinfo_lock); } EXPORT_SYMBOL_GPL(pstore_unregister); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index fefe3d391d3a..9a5052431fd3 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -18,10 +18,11 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/compiler.h> -#include <linux/pstore_ram.h> #include <linux/of.h> #include <linux/of_address.h> + #include "internal.h" +#include "ram_internal.h" #define RAMOOPS_KERNMSG_HDR "====" #define MIN_MEM_SIZE 4096UL @@ -451,20 +452,28 @@ static void ramoops_free_przs(struct ramoops_context *cxt) { int i; + /* Free pmsg PRZ */ + persistent_ram_free(&cxt->mprz); + + /* Free console PRZ */ + persistent_ram_free(&cxt->cprz); + /* Free dump PRZs */ if (cxt->dprzs) { for (i = 0; i < cxt->max_dump_cnt; i++) - persistent_ram_free(cxt->dprzs[i]); + persistent_ram_free(&cxt->dprzs[i]); kfree(cxt->dprzs); + cxt->dprzs = NULL; cxt->max_dump_cnt = 0; } /* Free ftrace PRZs */ if (cxt->fprzs) { for (i = 0; i < cxt->max_ftrace_cnt; i++) - persistent_ram_free(cxt->fprzs[i]); + persistent_ram_free(&cxt->fprzs[i]); kfree(cxt->fprzs); + cxt->fprzs = NULL; cxt->max_ftrace_cnt = 0; } } @@ -548,9 +557,10 @@ static int ramoops_init_przs(const char *name, while (i > 0) { i--; - persistent_ram_free(prz_ar[i]); + persistent_ram_free(&prz_ar[i]); } kfree(prz_ar); + prz_ar = NULL; goto fail; } *paddr += zone_sz; @@ -735,6 +745,7 @@ static int ramoops_probe(struct platform_device *pdev) /* Make sure we didn't get bogus platform data pointer. */ if (!pdata) { pr_err("NULL platform data\n"); + err = -EINVAL; goto fail_out; } @@ -742,6 +753,7 @@ static int ramoops_probe(struct platform_device *pdev) !pdata->ftrace_size && !pdata->pmsg_size)) { pr_err("The memory size and the record/console size must be " "non-zero\n"); + err = -EINVAL; goto fail_out; } @@ -772,12 +784,17 @@ static int ramoops_probe(struct platform_device *pdev) dump_mem_sz, cxt->record_size, &cxt->max_dump_cnt, 0, 0); if (err) - goto fail_out; + goto fail_init; err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr, cxt->console_size, 0); if (err) - goto fail_init_cprz; + goto fail_init; + + err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr, + cxt->pmsg_size, 0); + if (err) + goto fail_init; cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) ? nr_cpu_ids @@ -788,12 +805,7 @@ static int ramoops_probe(struct platform_device *pdev) (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) ? PRZ_FLAG_NO_LOCK : 0); if (err) - goto fail_init_fprz; - - err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr, - cxt->pmsg_size, 0); - if (err) - goto fail_init_mprz; + goto fail_init; cxt->pstore.data = cxt; /* @@ -857,11 +869,7 @@ fail_buf: kfree(cxt->pstore.buf); fail_clear: cxt->pstore.bufsize = 0; - persistent_ram_free(cxt->mprz); -fail_init_mprz: -fail_init_fprz: - persistent_ram_free(cxt->cprz); -fail_init_cprz: +fail_init: ramoops_free_przs(cxt); fail_out: return err; @@ -876,8 +884,6 @@ static int ramoops_remove(struct platform_device *pdev) kfree(cxt->pstore.buf); cxt->pstore.bufsize = 0; - persistent_ram_free(cxt->mprz); - persistent_ram_free(cxt->cprz); ramoops_free_przs(cxt); return 0; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index a89e33719fcf..966191d3a5ba 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -13,13 +13,14 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/memblock.h> -#include <linux/pstore_ram.h> #include <linux/rslib.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <asm/page.h> +#include "ram_internal.h" + /** * struct persistent_ram_buffer - persistent circular RAM buffer * @@ -439,7 +440,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size, phys_addr_t addr = page_start + i * PAGE_SIZE; pages[i] = pfn_to_page(addr >> PAGE_SHIFT); } - vaddr = vmap(pages, page_count, VM_MAP, prot); + /* + * VM_IOREMAP used here to bypass this region during vread() + * and kmap_atomic() (i.e. kcore) to avoid __va() failures. + */ + vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot); kfree(pages); /* @@ -543,8 +548,14 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, return 0; } -void persistent_ram_free(struct persistent_ram_zone *prz) +void persistent_ram_free(struct persistent_ram_zone **_prz) { + struct persistent_ram_zone *prz; + + if (!_prz) + return; + + prz = *_prz; if (!prz) return; @@ -568,6 +579,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) persistent_ram_free_old(prz); kfree(prz->label); kfree(prz); + *_prz = NULL; } struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, @@ -604,6 +616,6 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, return prz; err: - persistent_ram_free(prz); + persistent_ram_free(&prz); return ERR_PTR(ret); } diff --git a/fs/pstore/ram_internal.h b/fs/pstore/ram_internal.h new file mode 100644 index 000000000000..5f694698351f --- /dev/null +++ b/fs/pstore/ram_internal.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com> + * Copyright (C) 2011 Kees Cook <keescook@chromium.org> + * Copyright (C) 2011 Google, Inc. + */ + +#include <linux/pstore_ram.h> + +/* + * Choose whether access to the RAM zone requires locking or not. If a zone + * can be written to from different CPUs like with ftrace for example, then + * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. + */ +#define PRZ_FLAG_NO_LOCK BIT(0) +/* + * If a PRZ should only have a single-boot lifetime, this marks it as + * getting wiped after its contents get copied out after boot. + */ +#define PRZ_FLAG_ZAP_OLD BIT(1) + +/** + * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ) + * used as a pstore backend + * + * @paddr: physical address of the mapped RAM area + * @size: size of mapping + * @label: unique name of this PRZ + * @type: frontend type for this PRZ + * @flags: holds PRZ_FLAGS_* bits + * + * @buffer_lock: + * locks access to @buffer "size" bytes and "start" offset + * @buffer: + * pointer to actual RAM area managed by this PRZ + * @buffer_size: + * bytes in @buffer->data (not including any trailing ECC bytes) + * + * @par_buffer: + * pointer into @buffer->data containing ECC bytes for @buffer->data + * @par_header: + * pointer into @buffer->data containing ECC bytes for @buffer header + * (i.e. all fields up to @data) + * @rs_decoder: + * RSLIB instance for doing ECC calculations + * @corrected_bytes: + * ECC corrected bytes accounting since boot + * @bad_blocks: + * ECC uncorrectable bytes accounting since boot + * @ecc_info: + * ECC configuration details + * + * @old_log: + * saved copy of @buffer->data prior to most recent wipe + * @old_log_size: + * bytes contained in @old_log + * + */ +struct persistent_ram_zone { + phys_addr_t paddr; + size_t size; + void *vaddr; + char *label; + enum pstore_type_id type; + u32 flags; + + raw_spinlock_t buffer_lock; + struct persistent_ram_buffer *buffer; + size_t buffer_size; + + char *par_buffer; + char *par_header; + struct rs_control *rs_decoder; + int corrected_bytes; + int bad_blocks; + struct persistent_ram_ecc_info ecc_info; + + char *old_log; + size_t old_log_size; +}; + +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype, u32 flags, char *label); +void persistent_ram_free(struct persistent_ram_zone **_prz); +void persistent_ram_zap(struct persistent_ram_zone *prz); + +int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, + unsigned int count); +int persistent_ram_write_user(struct persistent_ram_zone *prz, + const void __user *s, unsigned int count); + +void persistent_ram_save_old(struct persistent_ram_zone *prz); +size_t persistent_ram_old_size(struct persistent_ram_zone *prz); +void *persistent_ram_old(struct persistent_ram_zone *prz); +void persistent_ram_free_old(struct persistent_ram_zone *prz); +ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, + char *str, size_t len); diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 017d0d4ad329..2770746bb7aa 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, /* avoid destroying old data, allocate a new one */ len = zone->buffer_size + sizeof(*zone->buffer); zone->oldbuf = zone->buffer; - zone->buffer = kzalloc(len, GFP_KERNEL); + zone->buffer = kzalloc(len, GFP_ATOMIC); if (!zone->buffer) { zone->buffer = zone->oldbuf; return -ENOMEM; diff --git a/fs/read_write.c b/fs/read_write.c index 328ce8cf9a85..7a2ff6157eda 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -384,7 +384,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = (ppos ? *ppos : 0); - iov_iter_ubuf(&iter, READ, buf, len); + iov_iter_ubuf(&iter, ITER_DEST, buf, len); ret = call_read_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); @@ -424,7 +424,7 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) init_sync_kiocb(&kiocb, file); kiocb.ki_pos = pos ? *pos : 0; - iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); + iov_iter_kvec(&iter, ITER_DEST, &iov, 1, iov.iov_len); ret = file->f_op->read_iter(&kiocb, &iter); if (ret > 0) { if (pos) @@ -486,7 +486,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = (ppos ? *ppos : 0); - iov_iter_ubuf(&iter, WRITE, (void __user *)buf, len); + iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)buf, len); ret = call_write_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); @@ -533,7 +533,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t .iov_len = min_t(size_t, count, MAX_RW_COUNT), }; struct iov_iter iter; - iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); + iov_iter_kvec(&iter, ITER_SOURCE, &iov, 1, iov.iov_len); return __kernel_write_iter(file, &iter, pos); } /* @@ -911,7 +911,7 @@ static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, struct iov_iter iter; ssize_t ret; - ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); + ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); if (ret >= 0) { ret = do_iter_read(file, &iter, pos, flags); kfree(iov); @@ -928,7 +928,7 @@ static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, struct iov_iter iter; ssize_t ret; - ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); + ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); if (ret >= 0) { file_start_write(file); ret = do_iter_write(file, &iter, pos, flags); @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); } @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (file_out->f_op->copy_file_range) { + if (flags & COPY_FILE_SPLICE) { + /* cross sb splice is allowed */ + } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV; @@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret; + bool splice = flags & COPY_FILE_SPLICE; - if (flags != 0) + if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, @@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (file_out->f_op->copy_file_range) { + if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; } - if (file_in->f_op->remap_file_range && + if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, @@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range(). + * + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; } + ret = -EINVAL; + if (flags != 0) + goto out; + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) { diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h index d9052b8ce6dd..29c503a06db4 100644 --- a/fs/reiserfs/acl.h +++ b/fs/reiserfs/acl.h @@ -49,9 +49,9 @@ static inline int reiserfs_acl_count(size_t size) #ifdef CONFIG_REISERFS_FS_POSIX_ACL struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu); -int reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +int reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); -int reiserfs_acl_chmod(struct inode *inode); +int reiserfs_acl_chmod(struct dentry *dentry); int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, struct inode *dir, struct dentry *dentry, struct inode *inode); @@ -63,7 +63,7 @@ int reiserfs_cache_default_acl(struct inode *dir); #define reiserfs_get_acl NULL #define reiserfs_set_acl NULL -static inline int reiserfs_acl_chmod(struct inode *inode) +static inline int reiserfs_acl_chmod(struct dentry *dentry) { return 0; } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6e228bfbe7ef..467d13da198f 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -256,7 +256,7 @@ const struct inode_operations reiserfs_file_inode_operations = { .setattr = reiserfs_setattr, .listxattr = reiserfs_listxattr, .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, + .get_inode_acl = reiserfs_get_acl, .set_acl = reiserfs_set_acl, .fileattr_get = reiserfs_fileattr_get, .fileattr_set = reiserfs_fileattr_set, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index b9580a6515ee..c7d1fa526dea 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3404,7 +3404,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (!error && reiserfs_posixacl(inode->i_sb)) { if (attr->ia_valid & ATTR_MODE) - error = reiserfs_acl_chmod(inode); + error = reiserfs_acl_chmod(dentry); } out: diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 3d7a35d6a18b..4d428e8704bc 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1659,7 +1659,7 @@ const struct inode_operations reiserfs_dir_inode_operations = { .setattr = reiserfs_setattr, .listxattr = reiserfs_listxattr, .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, + .get_inode_acl = reiserfs_get_acl, .set_acl = reiserfs_set_acl, .fileattr_get = reiserfs_fileattr_get, .fileattr_set = reiserfs_fileattr_set, @@ -1683,6 +1683,6 @@ const struct inode_operations reiserfs_special_inode_operations = { .setattr = reiserfs_setattr, .listxattr = reiserfs_listxattr, .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, + .get_inode_acl = reiserfs_get_acl, .set_acl = reiserfs_set_acl, }; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index d6fcddc46f5b..93fe414fed18 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -18,7 +18,7 @@ static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th, int -reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +reiserfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { int error, error2; @@ -26,6 +26,7 @@ reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, size_t jcreate_blocks; int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; int update_mode = 0; + struct inode *inode = d_inode(dentry); umode_t mode = inode->i_mode; /* @@ -371,7 +372,7 @@ int reiserfs_cache_default_acl(struct inode *inode) if (IS_PRIVATE(inode)) return 0; - acl = get_acl(inode, ACL_TYPE_DEFAULT); + acl = get_inode_acl(inode, ACL_TYPE_DEFAULT); if (acl && !IS_ERR(acl)) { int size = reiserfs_acl_size(acl->a_count); @@ -396,13 +397,15 @@ int reiserfs_cache_default_acl(struct inode *inode) /* * Called under i_mutex */ -int reiserfs_acl_chmod(struct inode *inode) +int reiserfs_acl_chmod(struct dentry *dentry) { + struct inode *inode = d_inode(dentry); + if (IS_PRIVATE(inode)) return 0; if (get_inode_sd_version(inode) == STAT_DATA_V1 || !reiserfs_posixacl(inode->i_sb)) return 0; - return posix_acl_chmod(&init_user_ns, inode, inode->i_mode); + return posix_acl_chmod(&init_user_ns, dentry, inode->i_mode); } diff --git a/fs/seq_file.c b/fs/seq_file.c index 9456a2032224..f5fdaf3b1572 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -156,7 +156,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) ssize_t ret; init_sync_kiocb(&kiocb, file); - iov_iter_init(&iter, READ, &iov, 1, size); + iov_iter_init(&iter, ITER_DEST, &iov, 1, size); kiocb.ki_pos = *ppos; ret = seq_read_iter(&kiocb, &iter); diff --git a/fs/splice.c b/fs/splice.c index 0878b852b355..5969b7a1d353 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -303,7 +303,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct kiocb kiocb; int ret; - iov_iter_pipe(&to, READ, pipe, len); + iov_iter_pipe(&to, ITER_DEST, pipe, len); init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); @@ -682,7 +682,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, n++; } - iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); + iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left); ret = vfs_iter_write(out, &from, &sd.pos, 0); if (ret <= 0) break; @@ -1263,9 +1263,9 @@ static int vmsplice_type(struct fd f, int *type) if (!f.file) return -EBADF; if (f.file->f_mode & FMODE_WRITE) { - *type = WRITE; + *type = ITER_SOURCE; } else if (f.file->f_mode & FMODE_READ) { - *type = READ; + *type = ITER_DEST; } else { fdput(f); return -EBADF; @@ -1314,7 +1314,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, if (!iov_iter_count(&iter)) error = 0; - else if (iov_iter_rw(&iter) == WRITE) + else if (type == ITER_SOURCE) error = vmsplice_to_pipe(f.file, &iter, flags); else error = vmsplice_to_user(f.file, &iter, flags); diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 916e78fabcaa..60fc98bdf421 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -54,9 +54,35 @@ config SQUASHFS_FILE_DIRECT endchoice +config SQUASHFS_DECOMP_SINGLE + depends on SQUASHFS + def_bool n + +config SQUASHFS_DECOMP_MULTI + depends on SQUASHFS + def_bool n + +config SQUASHFS_DECOMP_MULTI_PERCPU + depends on SQUASHFS + def_bool n + +config SQUASHFS_CHOICE_DECOMP_BY_MOUNT + bool "Select the parallel decompression mode during mount" + depends on SQUASHFS + default n + select SQUASHFS_DECOMP_SINGLE + select SQUASHFS_DECOMP_MULTI + select SQUASHFS_DECOMP_MULTI_PERCPU + select SQUASHFS_MOUNT_DECOMP_THREADS + help + Compile all parallel decompression modes and specify the + decompression mode by setting "threads=" during mount. + default Decompressor parallelisation is SQUASHFS_DECOMP_SINGLE + choice - prompt "Decompressor parallelisation options" + prompt "Select decompression parallel mode at compile time" depends on SQUASHFS + depends on !SQUASHFS_CHOICE_DECOMP_BY_MOUNT help Squashfs now supports three parallelisation options for decompression. Each one exhibits various trade-offs between @@ -64,15 +90,17 @@ choice If in doubt, select "Single threaded compression" -config SQUASHFS_DECOMP_SINGLE +config SQUASHFS_COMPILE_DECOMP_SINGLE bool "Single threaded compression" + select SQUASHFS_DECOMP_SINGLE help Traditionally Squashfs has used single-threaded decompression. Only one block (data or metadata) can be decompressed at any one time. This limits CPU and memory usage to a minimum. -config SQUASHFS_DECOMP_MULTI +config SQUASHFS_COMPILE_DECOMP_MULTI bool "Use multiple decompressors for parallel I/O" + select SQUASHFS_DECOMP_MULTI help By default Squashfs uses a single decompressor but it gives poor performance on parallel I/O workloads when using multiple CPU @@ -85,8 +113,9 @@ config SQUASHFS_DECOMP_MULTI decompressors per core. It dynamically allocates decompressors on a demand basis. -config SQUASHFS_DECOMP_MULTI_PERCPU +config SQUASHFS_COMPILE_DECOMP_MULTI_PERCPU bool "Use percpu multiple decompressors for parallel I/O" + select SQUASHFS_DECOMP_MULTI_PERCPU help By default Squashfs uses a single decompressor but it gives poor performance on parallel I/O workloads when using multiple CPU @@ -95,9 +124,21 @@ config SQUASHFS_DECOMP_MULTI_PERCPU This decompressor implementation uses a maximum of one decompressor per core. It uses percpu variables to ensure decompression is load-balanced across the cores. - endchoice +config SQUASHFS_MOUNT_DECOMP_THREADS + bool "Add the mount parameter 'threads=' for squashfs" + depends on SQUASHFS + depends on SQUASHFS_DECOMP_MULTI + default n + help + Use threads= to set the decompression parallel mode and the number of threads. + If SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y + threads=<single|multi|percpu|1|2|3|...> + else + threads=<2|3|...> + The upper limit is num_online_cpus() * 2. + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 833aca92301f..bed3bb8b27fa 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -216,7 +216,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, res = -EIO; goto out_free_bio; } - res = squashfs_decompress(msblk, bio, offset, length, output); + res = msblk->thread_ops->decompress(msblk, bio, offset, length, output); } else { res = copy_bio_to_actor(bio, output, offset, length); } diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index d57bef91ab08..8893cb9b4198 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -134,7 +134,7 @@ void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) if (IS_ERR(comp_opts)) return comp_opts; - stream = squashfs_decompressor_create(msblk, comp_opts); + stream = msblk->thread_ops->create(msblk, comp_opts); if (IS_ERR(stream)) kfree(comp_opts); diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index db9f12a3ea05..416c53eedbd1 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -29,12 +29,11 @@ #define MAX_DECOMPRESSOR (num_online_cpus() * 2) -int squashfs_max_decompressors(void) +static int squashfs_max_decompressors(void) { return MAX_DECOMPRESSOR; } - struct squashfs_stream { void *comp_opts; struct list_head strm_list; @@ -59,7 +58,7 @@ static void put_decomp_stream(struct decomp_stream *decomp_strm, wake_up(&stream->wait); } -void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, +static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, void *comp_opts) { struct squashfs_stream *stream; @@ -103,7 +102,7 @@ out: } -void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) { struct squashfs_stream *stream = msblk->stream; if (stream) { @@ -145,7 +144,7 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, * If there is no available decomp and already full, * let's wait for releasing decomp from other users. */ - if (stream->avail_decomp >= MAX_DECOMPRESSOR) + if (stream->avail_decomp >= msblk->max_thread_num) goto wait; /* Let's allocate new decomp */ @@ -161,7 +160,7 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, } stream->avail_decomp++; - WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); + WARN_ON(stream->avail_decomp > msblk->max_thread_num); mutex_unlock(&stream->mutex); break; @@ -180,7 +179,7 @@ wait: } -int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, +static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { @@ -195,3 +194,10 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, msblk->decompressor->name); return res; } + +const struct squashfs_decompressor_thread_ops squashfs_decompressor_multi = { + .create = squashfs_decompressor_create, + .destroy = squashfs_decompressor_destroy, + .decompress = squashfs_decompress, + .max_decompressors = squashfs_max_decompressors, +}; diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index b881b9283b7f..1dfadf76ed9a 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -25,7 +25,7 @@ struct squashfs_stream { local_lock_t lock; }; -void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, +static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, void *comp_opts) { struct squashfs_stream *stream; @@ -59,7 +59,7 @@ out: return ERR_PTR(err); } -void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) { struct squashfs_stream __percpu *percpu = (struct squashfs_stream __percpu *) msblk->stream; @@ -75,19 +75,21 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, +static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; int res; - local_lock(&msblk->stream->lock); - stream = this_cpu_ptr(msblk->stream); + local_lock(&percpu->lock); + stream = this_cpu_ptr(percpu); res = msblk->decompressor->decompress(msblk, stream->stream, bio, offset, length, output); - local_unlock(&msblk->stream->lock); + local_unlock(&percpu->lock); if (res < 0) ERROR("%s decompression failed, data probably corrupt\n", @@ -96,7 +98,14 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, return res; } -int squashfs_max_decompressors(void) +static int squashfs_max_decompressors(void) { return num_possible_cpus(); } + +const struct squashfs_decompressor_thread_ops squashfs_decompressor_percpu = { + .create = squashfs_decompressor_create, + .destroy = squashfs_decompressor_destroy, + .decompress = squashfs_decompress, + .max_decompressors = squashfs_max_decompressors, +}; diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index 4eb3d083d45e..6f161887710b 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -24,7 +24,7 @@ struct squashfs_stream { struct mutex mutex; }; -void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, +static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, void *comp_opts) { struct squashfs_stream *stream; @@ -49,7 +49,7 @@ out: return ERR_PTR(err); } -void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) { struct squashfs_stream *stream = msblk->stream; @@ -59,7 +59,7 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, +static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { @@ -78,7 +78,14 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, return res; } -int squashfs_max_decompressors(void) +static int squashfs_max_decompressors(void) { return 1; } + +const struct squashfs_decompressor_thread_ops squashfs_decompressor_single = { + .create = squashfs_decompressor_create, + .destroy = squashfs_decompressor_destroy, + .decompress = squashfs_decompress, + .max_decompressors = squashfs_max_decompressors, +}; diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index e56510964b22..8ba8c4c50770 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -506,8 +506,9 @@ static int squashfs_readahead_fragment(struct page **page, squashfs_i(inode)->fragment_size); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; + int error = buffer->error; - if (buffer->error) + if (error) goto out; expected += squashfs_i(inode)->fragment_offset; @@ -529,7 +530,7 @@ static int squashfs_readahead_fragment(struct page **page, out: squashfs_cache_put(buffer); - return buffer->error; + return error; } static void squashfs_readahead(struct readahead_control *ractl) @@ -557,6 +558,13 @@ static void squashfs_readahead(struct readahead_control *ractl) int res, bsize; u64 block = 0; unsigned int expected; + struct page *last_page; + + expected = start >> msblk->block_log == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + + max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT; nr_pages = __readahead_batch(ractl, pages, max_pages); if (!nr_pages) @@ -566,13 +574,10 @@ static void squashfs_readahead(struct readahead_control *ractl) goto skip_pages; index = pages[0]->index >> shift; + if ((pages[nr_pages - 1]->index >> shift) != index) goto skip_pages; - expected = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - if (index == file_end && squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) { res = squashfs_readahead_fragment(pages, nr_pages, @@ -593,15 +598,15 @@ static void squashfs_readahead(struct readahead_control *ractl) res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - squashfs_page_actor_free(actor); + last_page = squashfs_page_actor_free(actor); if (res == expected) { int bytes; /* Last page (if present) may have trailing bytes not filled */ bytes = res % PAGE_SIZE; - if (pages[nr_pages - 1]->index == file_end && bytes) - memzero_page(pages[nr_pages - 1], bytes, + if (index == file_end && bytes && last_page) + memzero_page(last_page, bytes, PAGE_SIZE - bytes); for (i = 0; i < nr_pages; i++) { diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 54b93bf4a25c..81af6c4ca115 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -71,11 +71,13 @@ static void *handle_next_page(struct squashfs_page_actor *actor) (actor->next_index != actor->page[actor->next_page]->index)) { actor->next_index++; actor->returned_pages++; + actor->last_page = NULL; return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM); } actor->next_index++; actor->returned_pages++; + actor->last_page = actor->page[actor->next_page]; return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]); } @@ -125,6 +127,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ actor->returned_pages = 0; actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; + actor->last_page = NULL; actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 95ffbb543d91..97d4983559b1 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -16,6 +16,7 @@ struct squashfs_page_actor { void *(*squashfs_first_page)(struct squashfs_page_actor *); void *(*squashfs_next_page)(struct squashfs_page_actor *); void (*squashfs_finish_page)(struct squashfs_page_actor *); + struct page *last_page; int pages; int length; int next_page; @@ -29,10 +30,13 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, extern struct squashfs_page_actor *squashfs_page_actor_init_special( struct squashfs_sb_info *msblk, struct page **page, int pages, int length); -static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor) +static inline struct page *squashfs_page_actor_free(struct squashfs_page_actor *actor) { + struct page *last_page = actor->last_page; + kfree(actor->tmp_buffer); kfree(actor); + return last_page; } static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 9783e01c8100..a6164fdf9435 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -38,11 +38,24 @@ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); /* decompressor_xxx.c */ -extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); -extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); -extern int squashfs_decompress(struct squashfs_sb_info *, struct bio *, - int, int, struct squashfs_page_actor *); -extern int squashfs_max_decompressors(void); + +struct squashfs_decompressor_thread_ops { + void * (*create)(struct squashfs_sb_info *msblk, void *comp_opts); + void (*destroy)(struct squashfs_sb_info *msblk); + int (*decompress)(struct squashfs_sb_info *msblk, struct bio *bio, + int offset, int length, struct squashfs_page_actor *output); + int (*max_decompressors)(void); +}; + +#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE +extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_single; +#endif +#ifdef CONFIG_SQUASHFS_DECOMP_MULTI +extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_multi; +#endif +#ifdef CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU +extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_percpu; +#endif /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1e90c2575f9b..659082e9e51d 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -53,7 +53,7 @@ struct squashfs_sb_info { __le64 *xattr_id_table; struct mutex meta_index_mutex; struct meta_index *meta_index; - struct squashfs_stream *stream; + void *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; @@ -66,5 +66,7 @@ struct squashfs_sb_info { int xattr_ids; unsigned int ids; bool panic_on_errors; + const struct squashfs_decompressor_thread_ops *thread_ops; + int max_thread_num; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 32565dafa7f3..7d5265a39d20 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -47,10 +47,13 @@ enum Opt_errors { enum squashfs_param { Opt_errors, + Opt_threads, }; struct squashfs_mount_opts { enum Opt_errors errors; + const struct squashfs_decompressor_thread_ops *thread_ops; + int thread_num; }; static const struct constant_table squashfs_param_errors[] = { @@ -61,9 +64,66 @@ static const struct constant_table squashfs_param_errors[] = { static const struct fs_parameter_spec squashfs_fs_parameters[] = { fsparam_enum("errors", Opt_errors, squashfs_param_errors), + fsparam_string("threads", Opt_threads), {} }; + +static int squashfs_parse_param_threads_str(const char *str, struct squashfs_mount_opts *opts) +{ +#ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT + if (strcmp(str, "single") == 0) { + opts->thread_ops = &squashfs_decompressor_single; + return 0; + } + if (strcmp(str, "multi") == 0) { + opts->thread_ops = &squashfs_decompressor_multi; + return 0; + } + if (strcmp(str, "percpu") == 0) { + opts->thread_ops = &squashfs_decompressor_percpu; + return 0; + } +#endif + return -EINVAL; +} + +static int squashfs_parse_param_threads_num(const char *str, struct squashfs_mount_opts *opts) +{ +#ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS + int ret; + unsigned long num; + + ret = kstrtoul(str, 0, &num); + if (ret != 0) + return -EINVAL; + if (num > 1) { + opts->thread_ops = &squashfs_decompressor_multi; + if (num > opts->thread_ops->max_decompressors()) + return -EINVAL; + opts->thread_num = (int)num; + return 0; + } +#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE + if (num == 1) { + opts->thread_ops = &squashfs_decompressor_single; + opts->thread_num = 1; + return 0; + } +#endif +#endif /* !CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS */ + return -EINVAL; +} + +static int squashfs_parse_param_threads(const char *str, struct squashfs_mount_opts *opts) +{ + int ret = squashfs_parse_param_threads_str(str, opts); + + if (ret == 0) + return ret; + return squashfs_parse_param_threads_num(str, opts); +} + static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct squashfs_mount_opts *opts = fc->fs_private; @@ -78,6 +138,10 @@ static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *para case Opt_errors: opts->errors = result.uint_32; break; + case Opt_threads: + if (squashfs_parse_param_threads(param->string, opts) != 0) + return -EINVAL; + break; default: return -EINVAL; } @@ -133,6 +197,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) return -ENOMEM; } msblk = sb->s_fs_info; + msblk->thread_ops = opts->thread_ops; msblk->panic_on_errors = (opts->errors == Opt_errors_panic); @@ -168,6 +233,12 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) goto failed_mount; } + if (opts->thread_num == 0) { + msblk->max_thread_num = msblk->thread_ops->max_decompressors(); + } else { + msblk->max_thread_num = opts->thread_num; + } + /* Check the MAJOR & MINOR versions and lookup compression type */ msblk->decompressor = supported_squashfs_filesystem( fc, @@ -252,7 +323,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) /* Allocate read_page block */ msblk->read_page = squashfs_cache_init("data", - squashfs_max_decompressors(), msblk->block_size); + msblk->max_thread_num, msblk->block_size); if (msblk->read_page == NULL) { errorf(fc, "Failed to allocate read_page block"); goto failed_mount; @@ -383,7 +454,7 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_destroy(msblk); + msblk->thread_ops->destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -435,6 +506,19 @@ static int squashfs_show_options(struct seq_file *s, struct dentry *root) else seq_puts(s, ",errors=continue"); +#ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT + if (msblk->thread_ops == &squashfs_decompressor_single) { + seq_puts(s, ",threads=single"); + return 0; + } + if (msblk->thread_ops == &squashfs_decompressor_percpu) { + seq_puts(s, ",threads=percpu"); + return 0; + } +#endif +#ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS + seq_printf(s, ",threads=%d", msblk->max_thread_num); +#endif return 0; } @@ -446,6 +530,16 @@ static int squashfs_init_fs_context(struct fs_context *fc) if (!opts) return -ENOMEM; +#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE + opts->thread_ops = &squashfs_decompressor_single; +#elif defined(CONFIG_SQUASHFS_DECOMP_MULTI) + opts->thread_ops = &squashfs_decompressor_multi; +#elif defined(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) + opts->thread_ops = &squashfs_decompressor_percpu; +#else +#error "fail: unknown squashfs decompression thread mode?" +#endif + opts->thread_num = 0; fc->fs_private = opts; fc->ops = &squashfs_context_ops; return 0; @@ -478,7 +572,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_destroy(sbi); + sbi->thread_ops->destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/super.c b/fs/super.c index 6a82660e1adb..12c08cb20405 100644 --- a/fs/super.c +++ b/fs/super.c @@ -291,6 +291,7 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); + fscrypt_destroy_keyring(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); @@ -479,7 +480,7 @@ void generic_shutdown_super(struct super_block *sb) evict_inodes(sb); /* only nonzero refcount inodes can have marks */ fsnotify_sb_delete(sb); - fscrypt_sb_delete(sb); + fscrypt_destroy_keyring(sb); security_sb_delete(sb); if (sb->s_dio_done_wq) { @@ -1111,55 +1112,14 @@ static int test_single_super(struct super_block *s, struct fs_context *fc) return 1; } -/** - * vfs_get_super - Get a superblock with a search key set in s_fs_info. - * @fc: The filesystem context holding the parameters - * @keying: How to distinguish superblocks - * @fill_super: Helper to initialise a new superblock - * - * Search for a superblock and create a new one if not found. The search - * criterion is controlled by @keying. If the search fails, a new superblock - * is created and @fill_super() is called to initialise it. - * - * @keying can take one of a number of values: - * - * (1) vfs_get_single_super - Only one superblock of this type may exist on the - * system. This is typically used for special system filesystems. - * - * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have - * distinct keys (where the key is in s_fs_info). Searching for the same - * key again will turn up the superblock for that key. - * - * (3) vfs_get_independent_super - Multiple superblocks may exist and are - * unkeyed. Each call will get a new superblock. - * - * A permissions check is made by sget_fc() unless we're getting a superblock - * for a kernel-internal mount or a submount. - */ -int vfs_get_super(struct fs_context *fc, - enum vfs_get_super_keying keying, - int (*fill_super)(struct super_block *sb, - struct fs_context *fc)) +static int vfs_get_super(struct fs_context *fc, bool reconf, + int (*test)(struct super_block *, struct fs_context *), + int (*fill_super)(struct super_block *sb, + struct fs_context *fc)) { - int (*test)(struct super_block *, struct fs_context *); struct super_block *sb; int err; - switch (keying) { - case vfs_get_single_super: - case vfs_get_single_reconf_super: - test = test_single_super; - break; - case vfs_get_keyed_super: - test = test_keyed_super; - break; - case vfs_get_independent_super: - test = NULL; - break; - default: - BUG(); - } - sb = sget_fc(fc, test, set_anon_super_fc); if (IS_ERR(sb)) return PTR_ERR(sb); @@ -1173,7 +1133,7 @@ int vfs_get_super(struct fs_context *fc, fc->root = dget(sb->s_root); } else { fc->root = dget(sb->s_root); - if (keying == vfs_get_single_reconf_super) { + if (reconf) { err = reconfigure_super(fc); if (err < 0) { dput(fc->root); @@ -1189,13 +1149,12 @@ error: deactivate_locked_super(sb); return err; } -EXPORT_SYMBOL(vfs_get_super); int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { - return vfs_get_super(fc, vfs_get_independent_super, fill_super); + return vfs_get_super(fc, false, NULL, fill_super); } EXPORT_SYMBOL(get_tree_nodev); @@ -1203,7 +1162,7 @@ int get_tree_single(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { - return vfs_get_super(fc, vfs_get_single_super, fill_super); + return vfs_get_super(fc, false, test_single_super, fill_super); } EXPORT_SYMBOL(get_tree_single); @@ -1211,7 +1170,7 @@ int get_tree_single_reconf(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)) { - return vfs_get_super(fc, vfs_get_single_reconf_super, fill_super); + return vfs_get_super(fc, true, test_single_super, fill_super); } EXPORT_SYMBOL(get_tree_single_reconf); @@ -1221,7 +1180,7 @@ int get_tree_keyed(struct fs_context *fc, void *key) { fc->s_fs_info = key; - return vfs_get_super(fc, vfs_get_keyed_super, fill_super); + return vfs_get_super(fc, false, test_keyed_super, fill_super); } EXPORT_SYMBOL(get_tree_keyed); diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index d4ec9bb97de9..3b8567564e7e 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -438,7 +438,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size) res += blocks; direct = 1; } - return blocks; + return res; } int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path, diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 3f128b9fdfbb..9c9d3f0e36a4 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2467,7 +2467,7 @@ error_dump: static inline int chance(unsigned int n, unsigned int out_of) { - return !!(prandom_u32_max(out_of) + 1 <= n); + return !!(get_random_u32_below(out_of) + 1 <= n); } @@ -2485,13 +2485,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) if (chance(1, 2)) { d->pc_delay = 1; /* Fail within 1 minute */ - delay = prandom_u32_max(60000); + delay = get_random_u32_below(60000); d->pc_timeout = jiffies; d->pc_timeout += msecs_to_jiffies(delay); ubifs_warn(c, "failing after %lums", delay); } else { d->pc_delay = 2; - delay = prandom_u32_max(10000); + delay = get_random_u32_below(10000); /* Fail within 10000 operations */ d->pc_cnt_max = delay; ubifs_warn(c, "failing after %lu calls", delay); @@ -2571,7 +2571,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf, unsigned int from, to, ffs = chance(1, 2); unsigned char *p = (void *)buf; - from = prandom_u32_max(len); + from = get_random_u32_below(len); /* Corruption span max to end of write unit */ to = min(len, ALIGN(from + 1, c->max_write_size)); diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index cfbc31f709f4..c4d079328b92 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1970,28 +1970,28 @@ static int dbg_populate_lsave(struct ubifs_info *c) if (!dbg_is_chk_gen(c)) return 0; - if (prandom_u32_max(4)) + if (get_random_u32_below(4)) return 0; for (i = 0; i < c->lsave_cnt; i++) c->lsave[i] = c->main_first; list_for_each_entry(lprops, &c->empty_list, list) - c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum; list_for_each_entry(lprops, &c->freeable_list, list) - c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum; list_for_each_entry(lprops, &c->frdi_idx_list, list) - c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum; heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; for (i = 0; i < heap->cnt; i++) - c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum; heap = &c->lpt_heap[LPROPS_DIRTY - 1]; for (i = 0; i < heap->cnt; i++) - c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum; heap = &c->lpt_heap[LPROPS_FREE - 1]; for (i = 0; i < heap->cnt; i++) - c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum; return 1; } diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 01362ad5f804..a55e04822d16 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -700,7 +700,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_is_chk_index(c) && !prandom_u32_max(8)) + if (dbg_is_chk_index(c) && !get_random_u32_below(8)) return -ENOSPC; return 0; } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index fb4c30e05245..ae7bc13a5298 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -240,7 +240,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, poffset - lfi); else { if (!copy_name) { - copy_name = kmalloc(UDF_NAME_LEN, + copy_name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS); if (!copy_name) { fi = ERR_PTR(-ENOMEM); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 07c81ab3fd4d..98ac37e34e3d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1630,17 +1630,20 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) { vma = prev; + mas_pause(&mas); goto next; } if (vma->vm_start < start) { ret = split_vma(mm, vma, start, 1); if (ret) break; + mas_pause(&mas); } if (vma->vm_end > end) { ret = split_vma(mm, vma, end, 0); if (ret) break; + mas_pause(&mas); } next: /* diff --git a/fs/xattr.c b/fs/xattr.c index 61107b6bbed2..df3af9fa8c77 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -80,6 +80,31 @@ xattr_resolve_name(struct inode *inode, const char **name) return ERR_PTR(-EOPNOTSUPP); } +/** + * may_write_xattr - check whether inode allows writing xattr + * @mnt_userns: User namespace of the mount the inode was found from + * @inode: the inode on which to set an xattr + * + * Check whether the inode allows writing xattrs. Specifically, we can never + * set or remove an extended attribute on a read-only filesystem or on an + * immutable / append-only inode. + * + * We also need to ensure that the inode has a mapping in the mount to + * not risk writing back invalid i_{g,u}id values. + * + * Return: On success zero is returned. On error a negative errno is returned. + */ +int may_write_xattr(struct user_namespace *mnt_userns, struct inode *inode) +{ + if (IS_IMMUTABLE(inode)) + return -EPERM; + if (IS_APPEND(inode)) + return -EPERM; + if (HAS_UNMAPPED_ID(mnt_userns, inode)) + return -EPERM; + return 0; +} + /* * Check permissions for extended attribute access. This is a bit complicated * because different namespaces have very different rules. @@ -88,20 +113,12 @@ static int xattr_permission(struct user_namespace *mnt_userns, struct inode *inode, const char *name, int mask) { - /* - * We can never set or remove an extended attribute on a read-only - * filesystem or on an immutable / append-only inode. - */ if (mask & MAY_WRITE) { - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - /* - * Updating an xattr will likely cause i_uid and i_gid - * to be writen back improperly if their true value is - * unknown to the vfs. - */ - if (HAS_UNMAPPED_ID(mnt_userns, inode)) - return -EPERM; + int ret; + + ret = may_write_xattr(mnt_userns, inode); + if (ret) + return ret; } /* @@ -172,6 +189,9 @@ __vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, { const struct xattr_handler *handler; + if (is_posix_acl_xattr(name)) + return -EOPNOTSUPP; + handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); @@ -282,12 +302,6 @@ out: } EXPORT_SYMBOL_GPL(__vfs_setxattr_locked); -static inline bool is_posix_acl_xattr(const char *name) -{ - return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || - (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0); -} - int vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) @@ -399,6 +413,9 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, { const struct xattr_handler *handler; + if (is_posix_acl_xattr(name)) + return -EOPNOTSUPP; + handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); @@ -437,10 +454,7 @@ vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry, return ret; } nolsm: - error = __vfs_getxattr(dentry, inode, name, value, size); - if (error > 0 && is_posix_acl_xattr(name)) - posix_acl_getxattr_idmapped_mnt(mnt_userns, inode, value, size); - return error; + return __vfs_getxattr(dentry, inode, name, value, size); } EXPORT_SYMBOL_GPL(vfs_getxattr); @@ -471,6 +485,9 @@ __vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode = d_inode(dentry); const struct xattr_handler *handler; + if (is_posix_acl_xattr(name)) + return -EOPNOTSUPP; + handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); @@ -580,17 +597,13 @@ int setxattr_copy(const char __user *name, struct xattr_ctx *ctx) return error; } -static void setxattr_convert(struct user_namespace *mnt_userns, - struct dentry *d, struct xattr_ctx *ctx) -{ - if (ctx->size && is_posix_acl_xattr(ctx->kname->name)) - posix_acl_fix_xattr_from_user(ctx->kvalue, ctx->size); -} - int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct xattr_ctx *ctx) { - setxattr_convert(mnt_userns, dentry, ctx); + if (is_posix_acl_xattr(ctx->kname->name)) + return do_set_acl(mnt_userns, dentry, ctx->kname->name, + ctx->kvalue, ctx->size); + return vfs_setxattr(mnt_userns, dentry, ctx->kname->name, ctx->kvalue, ctx->size, ctx->flags); } @@ -697,10 +710,11 @@ do_getxattr(struct user_namespace *mnt_userns, struct dentry *d, return -ENOMEM; } - error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size); + if (is_posix_acl_xattr(ctx->kname->name)) + error = do_get_acl(mnt_userns, d, kname, ctx->kvalue, ctx->size); + else + error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size); if (error > 0) { - if (is_posix_acl_xattr(kname)) - posix_acl_fix_xattr_to_user(ctx->kvalue, error); if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error)) error = -EFAULT; } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) { @@ -875,6 +889,9 @@ removexattr(struct user_namespace *mnt_userns, struct dentry *d, if (error < 0) return error; + if (is_posix_acl_xattr(kname)) + return vfs_remove_acl(mnt_userns, d, kname); + return vfs_removexattr(mnt_userns, d, kname); } diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 517a138faa66..191b22b9a35b 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) return true; } +static inline bool +xfs_verify_agbext( + struct xfs_perag *pag, + xfs_agblock_t agbno, + xfs_agblock_t len) +{ + if (agbno + len <= agbno) + return false; + + if (!xfs_verify_agbno(pag, agbno)) + return false; + + return xfs_verify_agbno(pag, agbno + len - 1); +} + /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata. diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6261599bb389..989cf341779b 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -263,11 +263,7 @@ xfs_alloc_get_rec( goto out_bad_rec; /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, *bno)) - goto out_bad_rec; - if (*bno > *bno + *len) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, *bno + *len - 1)) + if (!xfs_verify_agbext(pag, *bno, *len)) goto out_bad_rec; return 0; @@ -1520,7 +1516,7 @@ xfs_alloc_ag_vextent_lastblock( #ifdef DEBUG /* Randomly don't execute the first algorithm. */ - if (prandom_u32_max(2)) + if (get_random_u32_below(2)) return 0; #endif diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index d9b66306a9a7..cb9e950a911d 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int( xfs_dir2_leaf_tail_t *ltp; int stale; int i; + bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC || + hdr->magic == XFS_DIR3_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(geo, leaf); @@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int( return __this_address; /* Leaves and bests don't overlap in leaf format. */ - if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || - hdr->magic == XFS_DIR3_LEAF1_MAGIC) && + if (isleaf1 && (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) return __this_address; @@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int( } if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; + if (isleaf1 && xfs_dir2_dataptr_to_db(geo, + be32_to_cpu(hdr->ents[i].address)) >= + be32_to_cpu(ltp->bestcount)) + return __this_address; } if (hdr->stale != stale) return __this_address; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b55bdfa9c8a8..371dc07233e0 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1564,20 +1564,6 @@ struct xfs_rmap_rec { #define RMAPBT_UNUSED_OFFSET_BITLEN 7 #define RMAPBT_OFFSET_BITLEN 54 -#define XFS_RMAP_ATTR_FORK (1 << 0) -#define XFS_RMAP_BMBT_BLOCK (1 << 1) -#define XFS_RMAP_UNWRITTEN (1 << 2) -#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ - XFS_RMAP_BMBT_BLOCK) -#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) -struct xfs_rmap_irec { - xfs_agblock_t rm_startblock; /* extent start block */ - xfs_extlen_t rm_blockcount; /* extent length */ - uint64_t rm_owner; /* extent owner */ - uint64_t rm_offset; /* offset within the owner */ - unsigned int rm_flags; /* state flags */ -}; - /* * Key structure * @@ -1626,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp); * on the startblock. This speeds up mount time deletion of stale * staging extents because they're all at the right side of the tree. */ -#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31)) +#define XFS_REFC_COWFLAG (1U << 31) #define REFCNTBT_COWFLAG_BITLEN 1 #define REFCNTBT_AGBLOCK_BITLEN 31 @@ -1640,12 +1626,6 @@ struct xfs_refcount_key { __be32 rc_startblock; /* starting block number */ }; -struct xfs_refcount_irec { - xfs_agblock_t rc_startblock; /* starting block number */ - xfs_extlen_t rc_blockcount; /* count of free blocks */ - xfs_nlink_t rc_refcount; /* number of inodes linked here */ -}; - #define MAXREFCOUNT ((xfs_nlink_t)~0U) #define MAXREFCEXTLEN ((xfs_extlen_t)~0U) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 94db50eb706a..5118dedf9267 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -636,7 +636,7 @@ xfs_ialloc_ag_alloc( /* randomly do sparse inode allocations */ if (xfs_has_sparseinodes(tp->t_mountp) && igeo->ialloc_min_blks < igeo->ialloc_blks) - do_sparse = prandom_u32_max(2); + do_sparse = get_random_u32_below(2); #endif /* diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index b351b9dc6561..f13e0809dc63 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -613,25 +613,49 @@ typedef struct xfs_efi_log_format { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_t efi_extents[1]; /* array of extents to free */ + xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; +static inline size_t +xfs_efi_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efi_log_format_32 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_32_t efi_extents[1]; /* array of extents to free */ + xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; +static inline size_t +xfs_efi_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efi_log_format_64 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_64_t efi_extents[1]; /* array of extents to free */ + xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; +static inline size_t +xfs_efi_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * This is the structure used to lay out an efd log item in the * log. The efd_extents array is a variable size array whose @@ -642,25 +666,49 @@ typedef struct xfs_efd_log_format { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_t efd_extents[1]; /* array of extents freed */ + xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; +static inline size_t +xfs_efd_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efd_log_format_32 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_32_t efd_extents[1]; /* array of extents freed */ + xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; +static inline size_t +xfs_efd_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efd_log_format_64 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_64_t efd_extents[1]; /* array of extents freed */ + xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; +static inline size_t +xfs_efd_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * RUI/RUD (reverse mapping) log format definitions */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 64b910caafaa..3f34bafe18dd 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, int xfs_refcount_lookup_le( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } @@ -63,13 +66,16 @@ xfs_refcount_lookup_le( int xfs_refcount_lookup_ge( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } @@ -80,13 +86,16 @@ xfs_refcount_lookup_ge( int xfs_refcount_lookup_eq( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } @@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec) { - irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); + uint32_t start; + + start = be32_to_cpu(rec->refc.rc_startblock); + if (start & XFS_REFC_COWFLAG) { + start &= ~XFS_REFC_COWFLAG; + irec->rc_domain = XFS_REFC_DOMAIN_COW; + } else { + irec->rc_domain = XFS_REFC_DOMAIN_SHARED; + } + + irec->rc_startblock = start; irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); } @@ -114,7 +133,6 @@ xfs_refcount_get_rec( struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; - xfs_agblock_t realstart; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !*stat) @@ -124,22 +142,11 @@ xfs_refcount_get_rec( if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; - /* handle special COW-staging state */ - realstart = irec->rc_startblock; - if (realstart & XFS_REFC_COW_START) { - if (irec->rc_refcount != 1) - goto out_bad_rec; - realstart &= ~XFS_REFC_COW_START; - } else if (irec->rc_refcount < 2) { + if (!xfs_refcount_check_domain(irec)) goto out_bad_rec; - } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, realstart)) - goto out_bad_rec; - if (realstart > realstart + irec->rc_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1)) + if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) @@ -169,12 +176,17 @@ xfs_refcount_update( struct xfs_refcount_irec *irec) { union xfs_btree_rec rec; + uint32_t start; int error; trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); - rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec.refc.rc_startblock = cpu_to_be32(start); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); + error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, @@ -196,9 +208,12 @@ xfs_refcount_insert( int error; trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); + cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; + cur->bc_rec.rc.rc_domain = irec->rc_domain; + error = xfs_btree_insert(cur, i); if (error) goto out_error; @@ -244,7 +259,8 @@ xfs_refcount_delete( } if (error) goto out_error; - error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); + error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, + &found_rec); out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, @@ -343,6 +359,7 @@ xfs_refc_next( STATIC int xfs_refcount_split_extent( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t agbno, bool *shape_changed) { @@ -351,7 +368,7 @@ xfs_refcount_split_extent( int error; *shape_changed = false; - error = xfs_refcount_lookup_le(cur, agbno, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); if (error) goto out_error; if (!found_rec) @@ -364,6 +381,8 @@ xfs_refcount_split_extent( error = -EFSCORRUPTED; goto out_error; } + if (rcext.rc_domain != domain) + return 0; if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; @@ -415,6 +434,9 @@ xfs_refcount_merge_center_extents( trace_xfs_refcount_merge_center_extents(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, center, right); + ASSERT(left->rc_domain == center->rc_domain); + ASSERT(right->rc_domain == center->rc_domain); + /* * Make sure the center and right extents are not in the btree. * If the center extent was synthesized, the first delete call @@ -423,8 +445,8 @@ xfs_refcount_merge_center_extents( * call removes the center and the second one removes the right * extent. */ - error = xfs_refcount_lookup_ge(cur, center->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_ge(cur, center->rc_domain, + center->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -451,8 +473,8 @@ xfs_refcount_merge_center_extents( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -491,10 +513,12 @@ xfs_refcount_merge_left_extent( trace_xfs_refcount_merge_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft); + ASSERT(left->rc_domain == cleft->rc_domain); + /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cleft->rc_domain, + cleft->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -512,8 +536,8 @@ xfs_refcount_merge_left_extent( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -552,13 +576,15 @@ xfs_refcount_merge_right_extent( trace_xfs_refcount_merge_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right); + ASSERT(right->rc_domain == cright->rc_domain); + /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, * remove it. */ if (cright->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cright->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cright->rc_domain, + cright->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -576,8 +602,8 @@ xfs_refcount_merge_right_extent( } /* Enlarge the right extent. */ - error = xfs_refcount_lookup_le(cur, right->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, right->rc_domain, + right->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -600,8 +626,6 @@ out_error: return error; } -#define XFS_FIND_RCEXT_SHARED 1 -#define XFS_FIND_RCEXT_COW 2 /* * Find the left extent and the one after it (cleft). This function assumes * that we've already split any extent crossing agbno. @@ -611,16 +635,16 @@ xfs_refcount_find_left_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *cleft, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; int error; int found_rec; left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) goto out_error; if (!found_rec) @@ -634,11 +658,9 @@ xfs_refcount_find_left_extents( goto out_error; } - if (xfs_refc_next(&tmp) != agbno) - return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (tmp.rc_domain != domain) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (xfs_refc_next(&tmp) != agbno) return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -655,6 +677,9 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) *cleft = tmp; @@ -671,8 +696,10 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = min(aglen, tmp.rc_startblock - agbno); cleft->rc_refcount = 1; + cleft->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -680,6 +707,7 @@ xfs_refcount_find_left_extents( cleft->rc_startblock = agbno; cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; + cleft->rc_domain = domain; } trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft, agbno); @@ -700,16 +728,16 @@ xfs_refcount_find_right_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; int error; int found_rec; right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); + error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) goto out_error; if (!found_rec) @@ -723,11 +751,9 @@ xfs_refcount_find_right_extents( goto out_error; } - if (tmp.rc_startblock != agbno + aglen) - return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (tmp.rc_domain != domain) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (tmp.rc_startblock != agbno + aglen) return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -744,6 +770,9 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) *cright = tmp; @@ -760,8 +789,10 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = right->rc_startblock - cright->rc_startblock; cright->rc_refcount = 1; + cright->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -769,6 +800,7 @@ xfs_refcount_find_right_extents( cright->rc_startblock = agbno; cright->rc_blockcount = aglen; cright->rc_refcount = 1; + cright->rc_domain = domain; } trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right, agbno + aglen); @@ -794,10 +826,10 @@ xfs_refc_valid( STATIC int xfs_refcount_merge_extents( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adjust, - int flags, bool *shape_changed) { struct xfs_refcount_irec left = {0}, cleft = {0}; @@ -812,12 +844,12 @@ xfs_refcount_merge_extents( * just below (agbno + aglen) [cright], and just above (agbno + aglen) * [right]. */ - error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, - *aglen, flags); + error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, + *agbno, *aglen); if (error) return error; - error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, - *aglen, flags); + error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, + *agbno, *aglen); if (error) return error; @@ -870,7 +902,7 @@ xfs_refcount_merge_extents( aglen); } - return error; + return 0; } /* @@ -933,7 +965,8 @@ xfs_refcount_adjust_extents( if (*aglen == 0) return 0; - error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, + &found_rec); if (error) goto out_error; @@ -941,10 +974,11 @@ xfs_refcount_adjust_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; - if (!found_rec) { + if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_SHARED; } /* @@ -957,6 +991,8 @@ xfs_refcount_adjust_extents( tmp.rc_blockcount = min(*aglen, ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; + tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -986,15 +1022,30 @@ xfs_refcount_adjust_extents( (*agbno) += tmp.rc_blockcount; (*aglen) -= tmp.rc_blockcount; - error = xfs_refcount_lookup_ge(cur, *agbno, + /* Stop if there's nothing left to modify */ + if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) + break; + + /* Move the cursor to the start of ext. */ + error = xfs_refcount_lookup_ge(cur, + XFS_REFC_DOMAIN_SHARED, *agbno, &found_rec); if (error) goto out_error; } - /* Stop if there's nothing left to modify */ - if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) - break; + /* + * A previous step trimmed agbno/aglen such that the end of the + * range would not be in the middle of the record. If this is + * no longer the case, something is seriously wrong with the + * btree. Make sure we never feed the synthesized record into + * the processing loop below. + */ + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || + XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { + error = -EFSCORRUPTED; + goto out_error; + } /* * Adjust the reference count and either update the tree @@ -1070,13 +1121,15 @@ xfs_refcount_adjust( /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno + aglen, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1085,8 +1138,8 @@ xfs_refcount_adjust( /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, - XFS_FIND_RCEXT_SHARED, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, + new_agbno, new_aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1125,6 +1178,32 @@ xfs_refcount_finish_one_cleanup( } /* + * Set up a continuation a deferred refcount operation by updating the intent. + * Checks to make sure we're not going to run off the end of the AG. + */ +static inline int +xfs_refcount_continue_op( + struct xfs_btree_cur *cur, + xfs_fsblock_t startblock, + xfs_agblock_t new_agbno, + xfs_extlen_t new_len, + xfs_fsblock_t *new_fsbno) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_perag *pag = cur->bc_ag.pag; + + if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) + return -EFSCORRUPTED; + + *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + + ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); + ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); + + return 0; +} + +/* * Process one of the deferred refcount operations. We pass back the * btree cursor to maintain our lock on the btree between calls. * This saves time and eliminates a buffer deadlock between the @@ -1191,12 +1270,20 @@ xfs_refcount_finish_one( case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_INCREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_DECREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_ALLOC_COW: *new_fsb = startblock + blockcount; @@ -1307,7 +1394,8 @@ xfs_refcount_find_shared( *flen = 0; /* Try to find a refcount extent that crosses the start */ - error = xfs_refcount_lookup_le(cur, agbno, &have); + error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, + &have); if (error) goto out_error; if (!have) { @@ -1325,6 +1413,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { @@ -1340,6 +1430,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; } /* If the extent starts after the range we want, bail out */ @@ -1371,7 +1463,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } - if (tmp.rc_startblock >= agbno + aglen || + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || + tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); @@ -1455,17 +1548,23 @@ xfs_refcount_adjust_cow_extents( return 0; /* Find any overlapping refcount records */ - error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, + &found_rec); if (error) goto out_error; error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && + ext.rc_domain != XFS_REFC_DOMAIN_COW)) { + error = -EFSCORRUPTED; + goto out_error; + } if (!found_rec) { - ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + - XFS_REFC_COW_START; + ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_COW; } switch (adj) { @@ -1480,6 +1579,8 @@ xfs_refcount_adjust_cow_extents( tmp.rc_startblock = agbno; tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; + tmp.rc_domain = XFS_REFC_DOMAIN_COW; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -1542,24 +1643,24 @@ xfs_refcount_adjust_cow( bool shape_changed; int error; - agbno += XFS_REFC_COW_START; - /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno, &shape_changed); if (error) goto out_error; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno + aglen, &shape_changed); if (error) goto out_error; /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, - XFS_FIND_RCEXT_COW, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, + &aglen, adj, &shape_changed); if (error) goto out_error; @@ -1666,10 +1767,18 @@ xfs_refcount_recover_extent( be32_to_cpu(rec->refc.rc_refcount) != 1)) return -EFSCORRUPTED; - rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); + rr = kmalloc(sizeof(struct xfs_refcount_recovery), + GFP_KERNEL | __GFP_NOFAIL); + INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - list_add_tail(&rr->rr_list, debris); + if (XFS_IS_CORRUPT(cur->bc_mp, + rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { + kfree(rr); + return -EFSCORRUPTED; + } + + list_add_tail(&rr->rr_list, debris); return 0; } @@ -1687,10 +1796,11 @@ xfs_refcount_recover_cow_leftovers( union xfs_btree_irec low; union xfs_btree_irec high; xfs_fsblock_t fsb; - xfs_agblock_t agbno; int error; - if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) + /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ + BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); + if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; INIT_LIST_HEAD(&debris); @@ -1717,7 +1827,7 @@ xfs_refcount_recover_cow_leftovers( /* Find all the leftover CoW staging extents. */ memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); - low.rc.rc_startblock = XFS_REFC_COW_START; + low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; high.rc.rc_startblock = -1U; error = xfs_btree_query_range(cur, &low, &high, xfs_refcount_recover_extent, &debris); @@ -1738,8 +1848,8 @@ xfs_refcount_recover_cow_leftovers( &rr->rr_rrec); /* Free the orphan record */ - agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; - fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); + fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, + rr->rr_rrec.rc_startblock); xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); @@ -1751,7 +1861,7 @@ xfs_refcount_recover_cow_leftovers( goto out_free; list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; @@ -1761,7 +1871,7 @@ out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; } @@ -1770,6 +1880,7 @@ out_free: int xfs_refcount_has_record( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, bool *exists) @@ -1781,6 +1892,7 @@ xfs_refcount_has_record( low.rc.rc_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.rc.rc_startblock = bno + len - 1; + low.rc.rc_domain = high.rc.rc_domain = domain; return xfs_btree_has_record(cur, &low, &high, exists); } diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index e8b322de7f3d..452f30556f5a 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -14,14 +14,33 @@ struct xfs_bmbt_irec; struct xfs_refcount_irec; extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +static inline uint32_t +xfs_refcount_encode_startblock( + xfs_agblock_t startblock, + enum xfs_refc_domain domain) +{ + uint32_t start; + + /* + * low level btree operations need to handle the generic btree range + * query functions (which set rc_domain == -1U), so we check that the + * domain is /not/ shared. + */ + start = startblock & ~XFS_REFC_COWFLAG; + if (domain != XFS_REFC_DOMAIN_SHARED) + start |= XFS_REFC_COWFLAG; + + return start; +} + enum xfs_refcount_intent_type { XFS_REFCOUNT_INCREASE = 1, XFS_REFCOUNT_DECREASE, @@ -36,6 +55,18 @@ struct xfs_refcount_intent { xfs_fsblock_t ri_startblock; }; +/* Check that the refcount is appropriate for the record domain. */ +static inline bool +xfs_refcount_check_domain( + const struct xfs_refcount_irec *irec) +{ + if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) + return false; + if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) + return false; + return true; +} + void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); void xfs_refcount_decrease_extent(struct xfs_trans *tp, @@ -79,7 +110,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, #define XFS_REFCOUNT_ITEM_OVERHEAD 32 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, - xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + enum xfs_refc_domain domain, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 316c1ec0c3c2..e1f789866683 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -13,6 +13,7 @@ #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_refcount_btree.h" +#include "xfs_refcount.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { - rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock); + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec->refc.rc_startblock = cpu_to_be32(start); rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount); rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount); } @@ -182,10 +188,13 @@ xfs_refcountbt_key_diff( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { - struct xfs_refcount_irec *rec = &cur->bc_rec.rc; const struct xfs_refcount_key *kp = &key->refc; + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; - return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + return (int64_t)be32_to_cpu(kp->rc_startblock) - start; } STATIC int64_t diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 094dfc897ebc..b56aca1e7c66 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -235,13 +235,8 @@ xfs_rmap_get_rec( goto out_bad_rec; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, irec->rm_startblock)) - goto out_bad_rec; - if (irec->rm_startblock > - irec->rm_startblock + irec->rm_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, - irec->rm_startblock + irec->rm_blockcount - 1)) + if (!xfs_verify_agbext(pag, irec->rm_startblock, + irec->rm_blockcount)) goto out_bad_rec; } diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 2c4ad6e4bb14..5b2f27cbdb80 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize( /* * In renaming a files we can modify: - * the four inodes involved: 4 * inode size + * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets @@ -437,7 +437,7 @@ xfs_calc_rename_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 4) + + max((xfs_calc_inode_res(mp, 5) + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index a6b7d98cf68f..5ebdda7e1078 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -166,6 +166,36 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +enum xfs_refc_domain { + XFS_REFC_DOMAIN_SHARED = 0, + XFS_REFC_DOMAIN_COW, +}; + +#define XFS_REFC_DOMAIN_STRINGS \ + { XFS_REFC_DOMAIN_SHARED, "shared" }, \ + { XFS_REFC_DOMAIN_COW, "cow" } + +struct xfs_refcount_irec { + xfs_agblock_t rc_startblock; /* starting block number */ + xfs_extlen_t rc_blockcount; /* count of free blocks */ + xfs_nlink_t rc_refcount; /* number of inodes linked here */ + enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */ +}; + +#define XFS_RMAP_ATTR_FORK (1 << 0) +#define XFS_RMAP_BMBT_BLOCK (1 << 1) +#define XFS_RMAP_UNWRITTEN (1 << 2) +#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ + XFS_RMAP_BMBT_BLOCK) +#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) +struct xfs_rmap_irec { + xfs_agblock_t rm_startblock; /* extent start block */ + xfs_extlen_t rm_blockcount; /* extent length */ + uint64_t rm_owner; /* extent owner */ + uint64_t rm_offset; /* offset within the owner */ + unsigned int rm_flags; /* state flags */ +}; + /* per-AG block reservation types */ enum xfs_ag_resv_type { XFS_AG_RESV_NONE = 0, diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index ab427b4d7fe0..3b38f4e2a537 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -100,9 +100,7 @@ xchk_allocbt_rec( bno = be32_to_cpu(rec->alloc.ar_startblock); len = be32_to_cpu(rec->alloc.ar_blockcount); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_allocbt_xref(bs->sc, bno, len); diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index e1026e07bf94..e312be7cd375 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -108,9 +108,8 @@ xchk_iallocbt_chunk( xfs_agblock_t bno; bno = XFS_AGINO_TO_AGBNO(mp, agino); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index c68b767dc08f..a26ee0f24ef2 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -269,15 +269,13 @@ done: STATIC void xchk_refcountbt_xref_rmap( struct xfs_scrub *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_nlink_t refcount) + const struct xfs_refcount_irec *irec) { struct xchk_refcnt_check refchk = { - .sc = sc, - .bno = bno, - .len = len, - .refcount = refcount, + .sc = sc, + .bno = irec->rc_startblock, + .len = irec->rc_blockcount, + .refcount = irec->rc_refcount, .seen = 0, }; struct xfs_rmap_irec low; @@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap( /* Cross-reference with the rmapbt to confirm the refcount. */ memset(&low, 0, sizeof(low)); - low.rm_startblock = bno; + low.rm_startblock = irec->rc_startblock; memset(&high, 0xFF, sizeof(high)); - high.rm_startblock = bno + len - 1; + high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1; INIT_LIST_HEAD(&refchk.fragments); error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, @@ -302,7 +300,7 @@ xchk_refcountbt_xref_rmap( goto out_free; xchk_refcountbt_process_rmap_fragments(&refchk); - if (refcount != refchk.seen) + if (irec->rc_refcount != refchk.seen) xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); out_free: @@ -315,17 +313,16 @@ out_free: /* Cross-reference with the other btrees. */ STATIC void xchk_refcountbt_xref( - struct xfs_scrub *sc, - xfs_agblock_t agbno, - xfs_extlen_t len, - xfs_nlink_t refcount) + struct xfs_scrub *sc, + const struct xfs_refcount_irec *irec) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xchk_xref_is_used_space(sc, agbno, len); - xchk_xref_is_not_inode_chunk(sc, agbno, len); - xchk_refcountbt_xref_rmap(sc, agbno, len, refcount); + xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount); + xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock, + irec->rc_blockcount); + xchk_refcountbt_xref_rmap(sc, irec); } /* Scrub a refcountbt record. */ @@ -334,35 +331,27 @@ xchk_refcountbt_rec( struct xchk_btree *bs, const union xfs_btree_rec *rec) { + struct xfs_refcount_irec irec; xfs_agblock_t *cow_blocks = bs->private; struct xfs_perag *pag = bs->cur->bc_ag.pag; - xfs_agblock_t bno; - xfs_extlen_t len; - xfs_nlink_t refcount; - bool has_cowflag; - bno = be32_to_cpu(rec->refc.rc_startblock); - len = be32_to_cpu(rec->refc.rc_blockcount); - refcount = be32_to_cpu(rec->refc.rc_refcount); + xfs_refcount_btrec_to_irec(rec, &irec); - /* Only CoW records can have refcount == 1. */ - has_cowflag = (bno & XFS_REFC_COW_START); - if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) + /* Check the domain and refcount are not incompatible. */ + if (!xfs_refcount_check_domain(&irec)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (has_cowflag) - (*cow_blocks) += len; + + if (irec.rc_domain == XFS_REFC_DOMAIN_COW) + (*cow_blocks) += irec.rc_blockcount; /* Check the extent. */ - bno &= ~XFS_REFC_COW_START; - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (refcount == 0) + if (irec.rc_refcount == 0) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xchk_refcountbt_xref(bs->sc, bno, len, refcount); + xchk_refcountbt_xref(bs->sc, &irec); return 0; } @@ -426,7 +415,6 @@ xchk_xref_is_cow_staging( xfs_extlen_t len) { struct xfs_refcount_irec rc; - bool has_cowflag; int has_refcount; int error; @@ -434,8 +422,8 @@ xchk_xref_is_cow_staging( return; /* Find the CoW staging extent. */ - error = xfs_refcount_lookup_le(sc->sa.refc_cur, - agbno + XFS_REFC_COW_START, &has_refcount); + error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, + agbno, &has_refcount); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (!has_refcount) { @@ -451,9 +439,8 @@ xchk_xref_is_cow_staging( return; } - /* CoW flag must be set, refcount must be 1. */ - has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); - if (!has_cowflag || rc.rc_refcount != 1) + /* CoW lookup returned a shared extent record? */ + if (rc.rc_domain != XFS_REFC_DOMAIN_COW) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ @@ -477,7 +464,8 @@ xchk_xref_is_not_shared( if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; - error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED, + agbno, len, &shared); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (shared) diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b744c62052b6..a05f44eb8178 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -242,12 +242,13 @@ xfs_acl_set_mode( } int -xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { umode_t mode; bool set_mode = false; int error = 0; + struct inode *inode = d_inode(dentry); if (!acl) goto set_acl; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 263404d0bfda..dcd176149c7a 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -11,7 +11,7 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); -extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +extern int xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index cf5ce607dc05..2788a6f2edcd 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -245,28 +245,6 @@ xfs_attri_init( return attrip; } -/* - * Copy an attr format buffer from the given buf, and into the destination attr - * format structure. - */ -STATIC int -xfs_attri_copy_format( - struct xfs_log_iovec *buf, - struct xfs_attri_log_format *dst_attr_fmt) -{ - struct xfs_attri_log_format *src_attr_fmt = buf->i_addr; - size_t len; - - len = sizeof(struct xfs_attri_log_format); - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len); - return 0; -} - static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_attrd_log_item, attrd_item); @@ -731,24 +709,50 @@ xlog_recover_attri_commit_pass2( struct xfs_attri_log_nameval *nv; const void *attr_value = NULL; const void *attr_name; - int error; + size_t len; attri_formatp = item->ri_buf[0].i_addr; attr_name = item->ri_buf[1].i_addr; /* Validate xfs_attri_log_format before the large memory allocation */ + len = sizeof(struct xfs_attri_log_format); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + if (!xfs_attri_validate(mp, attri_formatp)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + + /* Validate the attr name */ + if (item->ri_buf[1].i_len != + xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[1].i_addr, item->ri_buf[1].i_len); return -EFSCORRUPTED; } - if (attri_formatp->alfi_value_len) + /* Validate the attr value, if present */ + if (attri_formatp->alfi_value_len != 0) { + if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, + item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + attr_value = item->ri_buf[2].i_addr; + } /* * Memory alloc failure will cause replay to abort. We attach the @@ -760,9 +764,7 @@ xlog_recover_attri_commit_pass2( attri_formatp->alfi_value_len); attrip = xfs_attri_init(mp, nv); - error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format); - if (error) - goto out; + memcpy(&attrip->attri_format, attri_formatp, len); /* * The ATTRI has two references. One for the ATTRD and one for ATTRI to @@ -774,10 +776,6 @@ xlog_recover_attri_commit_pass2( xfs_attri_release(attrip); xfs_attri_log_nameval_put(nv); return 0; -out: - xfs_attri_item_free(attrip); - xfs_attri_log_nameval_put(nv); - return error; } /* @@ -842,7 +840,8 @@ xlog_recover_attrd_commit_pass2( attrd_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 51f66e982484..41323da523d1 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_relog = xfs_bui_item_relog, }; -/* - * Copy an BUI format buffer from the given buf, and into the destination - * BUI format structure. The BUI/BUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_bui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_bui_log_format *dst_bui_fmt) + struct xfs_bui_log_format *dst, + const struct xfs_bui_log_format *src) { - struct xfs_bui_log_format *src_bui_fmt; - uint len; + unsigned int i; - src_bui_fmt = buf->i_addr; - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); + memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents)); - if (buf->i_len == len) { - memcpy(dst_bui_fmt, src_bui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->bui_nextents; i++) + memcpy(&dst->bui_extents[i], &src->bui_extents[i], + sizeof(struct xfs_map_extent)); } /* @@ -646,23 +636,34 @@ xlog_recover_bui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_bui_log_item *buip; struct xfs_bui_log_format *bui_formatp; + size_t len; bui_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } - buip = xfs_bui_init(mp); - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); - if (error) { - xfs_bui_item_free(buip); - return error; + + len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; } + + buip = xfs_bui_init(mp); + xfs_bui_copy_format(&buip->bui_format, bui_formatp); atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); /* * Insert the intent into the AIL directly and drop one reference so @@ -696,7 +697,8 @@ xlog_recover_bud_commit_pass2( bud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 7db588ed0be5..822e6a0e9d1a 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -234,13 +234,18 @@ int xfs_errortag_init( struct xfs_mount *mp) { + int ret; + mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, KM_MAYFAIL); if (!mp->m_errortag) return -ENOMEM; - return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, - &mp->m_kobj, "errortag"); + ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + &mp->m_kobj, "errortag"); + if (ret) + kmem_free(mp->m_errortag); + return ret; } void @@ -274,7 +279,7 @@ xfs_errortag_test( ASSERT(error_tag < XFS_ERRTAG_MAX); randfactor = mp->m_errortag[error_tag]; - if (!randfactor || prandom_u32_max(randfactor)) + if (!randfactor || get_random_u32_below(randfactor)) return false; xfs_warn_ratelimited(mp, diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 27ccfcd82f04..d5130d1fcfae 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -66,27 +66,16 @@ xfs_efi_release( xfs_efi_item_free(efip); } -/* - * This returns the number of iovecs needed to log the given efi item. - * We only need 1 iovec for an efi item. It just logs the efi_log_format - * structure. - */ -static inline int -xfs_efi_item_sizeof( - struct xfs_efi_log_item *efip) -{ - return sizeof(struct xfs_efi_log_format) + - (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); -} - STATIC void xfs_efi_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip)); + *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents); } /* @@ -112,7 +101,7 @@ xfs_efi_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, - xfs_efi_item_sizeof(efip)); + xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } @@ -155,13 +144,11 @@ xfs_efi_init( { struct xfs_efi_log_item *efip; - uint size; ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { - size = (uint)(sizeof(struct xfs_efi_log_item) + - ((nextents - 1) * sizeof(xfs_extent_t))); - efip = kmem_zalloc(size, 0); + efip = kzalloc(xfs_efi_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -188,15 +175,17 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) { xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; - uint len = sizeof(xfs_efi_log_format_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); - uint len32 = sizeof(xfs_efi_log_format_32_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t); - uint len64 = sizeof(xfs_efi_log_format_64_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t); + uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); + uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); + uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); if (buf->i_len == len) { - memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); + memcpy(dst_efi_fmt, src_efi_fmt, + offsetof(struct xfs_efi_log_format, efi_extents)); + for (i = 0; i < src_efi_fmt->efi_nextents; i++) + memcpy(&dst_efi_fmt->efi_extents[i], + &src_efi_fmt->efi_extents[i], + sizeof(struct xfs_extent)); return 0; } else if (buf->i_len == len32) { xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; @@ -227,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr, + buf->i_len); return -EFSCORRUPTED; } @@ -246,27 +236,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) kmem_cache_free(xfs_efd_cache, efdp); } -/* - * This returns the number of iovecs needed to log the given efd item. - * We only need 1 iovec for an efd item. It just logs the efd_log_format - * structure. - */ -static inline int -xfs_efd_item_sizeof( - struct xfs_efd_log_item *efdp) -{ - return sizeof(xfs_efd_log_format_t) + - (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); -} - STATIC void xfs_efd_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip)); + *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents); } /* @@ -291,7 +270,7 @@ xfs_efd_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, - xfs_efd_item_sizeof(efdp)); + xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } /* @@ -340,9 +319,8 @@ xfs_trans_get_efd( ASSERT(nextents > 0); if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { - efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + - (nextents - 1) * sizeof(struct xfs_extent), - 0); + efdp = kzalloc(xfs_efd_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -733,6 +711,12 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + efip = xfs_efi_init(mp, efi_formatp->efi_nextents); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); if (error) { @@ -769,12 +753,24 @@ xlog_recover_efd_commit_pass2( xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; + int buflen = item->ri_buf[0].i_len; efd_formatp = item->ri_buf[0].i_addr; - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); + + if (buflen < sizeof(struct xfs_efd_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } + + if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof( + efd_formatp->efd_nextents) && + item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof( + efd_formatp->efd_nextents)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 186d0f2137f1..da6a5afa607c 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -52,6 +52,14 @@ struct xfs_efi_log_item { xfs_efi_log_format_t efi_format; }; +static inline size_t +xfs_efi_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efi_log_item, efi_format) + + xfs_efi_log_format_sizeof(nr); +} + /* * This is the "extent free done" log item. It is used to log * the fact that some extents earlier mentioned in an efi item @@ -64,6 +72,14 @@ struct xfs_efd_log_item { xfs_efd_log_format_t efd_format; }; +static inline size_t +xfs_efd_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efd_log_item, efd_format) + + xfs_efd_log_format_sizeof(nr); +} + /* * Max number of extents in fast allocation path. */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c6c80265c0b2..e462d39c840e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1261,7 +1261,7 @@ xfs_file_llseek( } #ifdef CONFIG_FS_DAX -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, @@ -1274,14 +1274,15 @@ xfs_dax_fault( &xfs_read_iomap_ops); } #else -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, bool write_fault, pfn_t *pfn) { - return 0; + ASSERT(0); + return VM_FAULT_SIGBUS; } #endif diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c000b74dd203..aa303be11576 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2818,7 +2818,7 @@ retry: * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source - * directory, we can lock from 2 to 4 inodes. + * directory, we can lock from 2 to 5 inodes. */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2e10e1c66ad6..712238305bc3 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -651,6 +651,7 @@ xfs_vn_change_ok( static int xfs_setattr_nonsize( struct user_namespace *mnt_userns, + struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) { @@ -757,7 +758,7 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if (mask & ATTR_MODE) { - error = posix_acl_chmod(mnt_userns, inode, inode->i_mode); + error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); if (error) return error; } @@ -779,6 +780,7 @@ out_dqrele: STATIC int xfs_setattr_size( struct user_namespace *mnt_userns, + struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) { @@ -810,7 +812,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(mnt_userns, ip, iattr); + return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); } /* @@ -987,7 +989,7 @@ xfs_vn_setattr_size( error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (error) return error; - return xfs_setattr_size(mnt_userns, ip, iattr); + return xfs_setattr_size(mnt_userns, dentry, ip, iattr); } STATIC int @@ -1019,7 +1021,7 @@ xfs_vn_setattr( error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (!error) - error = xfs_setattr_nonsize(mnt_userns, ip, iattr); + error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); } return error; @@ -1101,7 +1103,7 @@ xfs_vn_tmpfile( } static const struct inode_operations xfs_inode_operations = { - .get_acl = xfs_get_acl, + .get_inode_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, @@ -1128,7 +1130,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, + .get_inode_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, @@ -1155,7 +1157,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, + .get_inode_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 17e923b9c5fa..322eb2ee6c55 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2552,6 +2552,8 @@ xlog_recover_process_intents( for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; lip = xfs_trans_ail_cursor_next(ailp, &cur)) { + const struct xfs_item_ops *ops; + if (!xlog_item_is_intent(lip)) break; @@ -2567,13 +2569,17 @@ xlog_recover_process_intents( * deferred ops, you /must/ attach them to the capture list in * the recover routine or else those subsequent intents will be * replayed in the wrong order! + * + * The recovery function can free the log item, so we must not + * access lip after it returns. */ spin_unlock(&ailp->ail_lock); - error = lip->li_ops->iop_recover(lip, &capture_list); + ops = lip->li_ops; + error = ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); if (error) { trace_xlog_intent_recovery_failed(log->l_mp, error, - lip->li_ops->iop_recover); + ops->iop_recover); break; } } diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 758702b9495f..9737b5a9f405 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void) /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); @@ -134,6 +134,21 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40); XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7e97bf19793d..858e3e9eb4a8 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -523,7 +523,9 @@ xfs_cui_item_recover( type = refc_type; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -536,7 +538,8 @@ xfs_cui_item_recover( &new_fsb, &new_len, &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - refc, sizeof(*refc)); + &cuip->cui_format, + sizeof(cuip->cui_format)); if (error) goto abort_error; @@ -622,28 +625,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_relog = xfs_cui_item_relog, }; -/* - * Copy an CUI format buffer from the given buf, and into the destination - * CUI format structure. The CUI/CUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_cui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_cui_log_format *dst_cui_fmt) + struct xfs_cui_log_format *dst, + const struct xfs_cui_log_format *src) { - struct xfs_cui_log_format *src_cui_fmt; - uint len; + unsigned int i; - src_cui_fmt = buf->i_addr; - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); + memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); - if (buf->i_len == len) { - memcpy(dst_cui_fmt, src_cui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->cui_nextents; i++) + memcpy(&dst->cui_extents[i], &src->cui_extents[i], + sizeof(struct xfs_phys_extent)); } /* @@ -660,19 +653,28 @@ xlog_recover_cui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_cui_log_item *cuip; struct xfs_cui_log_format *cui_formatp; + size_t len; cui_formatp = item->ri_buf[0].i_addr; - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); - if (error) { - xfs_cui_item_free(cuip); - return error; + if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; } + + len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); + xfs_cui_copy_format(&cuip->cui_format, cui_formatp); atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); /* * Insert the intent into the AIL directly and drop one reference so @@ -706,7 +708,8 @@ xlog_recover_cud_commit_pass2( cud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index fef92e02f3bb..534504ede1a3 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -155,31 +155,6 @@ xfs_rui_init( return ruip; } -/* - * Copy an RUI format buffer from the given buf, and into the destination - * RUI format structure. The RUI/RUD items were designed not to need any - * special alignment handling. - */ -STATIC int -xfs_rui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_rui_log_format *dst_rui_fmt) -{ - struct xfs_rui_log_format *src_rui_fmt; - uint len; - - src_rui_fmt = buf->i_addr; - len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); - - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy(dst_rui_fmt, src_rui_fmt, len); - return 0; -} - static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_rud_log_item, rud_item); @@ -582,7 +557,9 @@ xfs_rui_item_recover( type = XFS_RMAP_FREE; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -652,6 +629,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_relog = xfs_rui_item_relog, }; +static inline void +xfs_rui_copy_format( + struct xfs_rui_log_format *dst, + const struct xfs_rui_log_format *src) +{ + unsigned int i; + + memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents)); + + for (i = 0; i < src->rui_nextents; i++) + memcpy(&dst->rui_extents[i], &src->rui_extents[i], + sizeof(struct xfs_map_extent)); +} + /* * This routine is called to create an in-core extent rmap update * item from the rui format structure which was logged on disk. @@ -666,19 +657,28 @@ xlog_recover_rui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_rui_log_item *ruip; struct xfs_rui_log_format *rui_formatp; + size_t len; rui_formatp = item->ri_buf[0].i_addr; - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); - if (error) { - xfs_rui_item_free(ruip); - return error; + if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + + len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; } + + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); + xfs_rui_copy_format(&ruip->rui_format, rui_formatp); atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); /* * Insert the intent into the AIL directly and drop one reference so @@ -711,7 +711,11 @@ xlog_recover_rud_commit_pass2( struct xfs_rud_log_format *rud_formatp; rud_formatp = item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); + if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + rud_formatp, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id); return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f029c6702dda..ee4b429a2f2c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2028,18 +2028,14 @@ xfs_init_caches(void) goto out_destroy_trans_cache; xfs_efd_cache = kmem_cache_create("xfs_efd_item", - (sizeof(struct xfs_efd_log_item) + - (XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efd_cache) goto out_destroy_buf_item_cache; xfs_efi_cache = kmem_cache_create("xfs_efi_item", - (sizeof(struct xfs_efi_log_item) + - (XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efi_cache) goto out_destroy_efd_cache; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 43585850f154..513095e353a5 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -33,10 +33,15 @@ xfs_sysfs_init( const char *name) { struct kobject *parent; + int err; parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + if (err) + kobject_put(&kobj->kobject); + + return err; } static inline void diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index cb7c81ba7fa3..372d871bccc5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE); TRACE_DEFINE_ENUM(PE_SIZE_PMD); TRACE_DEFINE_ENUM(PE_SIZE_PUD); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); + TRACE_EVENT(xfs_filemap_fault, TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, bool write_fault), @@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount) @@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount, @@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount) @@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, @@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) + __field(enum xfs_refc_domain, i3_domain) __field(xfs_agblock_t, i3_startblock) __field(xfs_extlen_t, i3_blockcount) __field(xfs_nlink_t, i3_refcount) @@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; + __entry->i3_domain = i3->rc_domain; __entry->i3_startblock = i3->rc_startblock; __entry->i3_blockcount = i3->rc_blockcount; __entry->i3_refcount = i3->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, + __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i3_startblock, __entry->i3_blockcount, __entry->i3_refcount) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 16fbf2a1144c..f51df7d94ef7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -730,11 +730,10 @@ void xfs_ail_push_all_sync( struct xfs_ail *ailp) { - struct xfs_log_item *lip; DEFINE_WAIT(wait); spin_lock(&ailp->ail_lock); - while ((lip = xfs_ail_max(ailp)) != NULL) { + while (xfs_ail_max(ailp) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 860f0b1032c6..2c53fbb8d918 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode) return; /* + * For zones that transitioned to the offline or readonly condition, + * we only need to clear the active state. + */ + if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) + goto out; + + /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. */ @@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode) return; } +out: /* The zone is not active. If it was, update the active count */ if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; @@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* @@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->cond = BLK_ZONE_COND_OFFLINE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; } + zi->i_flags |= ZONEFS_ZONE_READONLY; inode->i_mode &= ~0222; return i_size_read(inode); case BLK_ZONE_COND_FULL: @@ -478,8 +489,7 @@ static void __zonefs_io_error(struct inode *inode, bool write) struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = - zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + unsigned int nr_zones = 1; struct zonefs_ioerr_data err = { .inode = inode, .write = write, @@ -487,6 +497,15 @@ static void __zonefs_io_error(struct inode *inode, bool write) int ret; /* + * The only files that have more than one zone are conventional zone + * files with aggregated conventional zones, for which the inode zone + * size is always larger than the device zone size. + */ + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) + nr_zones = zi->i_zone_size >> + (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + + /* * Memory allocations in blkdev_report_zones() can trigger a memory * reclaim which may in turn cause a recursion into zonefs as well as * struct request allocations for the same device. The former case may @@ -1407,6 +1426,14 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_ztype = type; zi->i_zsector = zone->start; zi->i_zone_size = zone->len << SECTOR_SHIFT; + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && + !(sbi->s_features & ZONEFS_F_AGGRCNV)) { + zonefs_err(sb, + "zone size %llu doesn't match device's zone sectors %llu\n", + zi->i_zone_size, + bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); + return -EINVAL; + } zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, zone->capacity << SECTOR_SHIFT); @@ -1456,11 +1483,11 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, struct inode *dir = d_inode(parent); struct dentry *dentry; struct inode *inode; - int ret; + int ret = -ENOMEM; dentry = d_alloc_name(parent, name); if (!dentry) - return NULL; + return ERR_PTR(ret); inode = new_inode(parent->d_sb); if (!inode) @@ -1485,7 +1512,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, dput: dput(dentry); - return NULL; + return ERR_PTR(ret); } struct zonefs_zone_data { @@ -1505,7 +1532,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, struct blk_zone *zone, *next, *end; const char *zgroup_name; char *file_name; - struct dentry *dir; + struct dentry *dir, *dent; unsigned int n = 0; int ret; @@ -1523,8 +1550,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, zgroup_name = "seq"; dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); - if (!dir) { - ret = -ENOMEM; + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); goto free; } @@ -1570,8 +1597,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, * Use the file number within its group as file name. */ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); - if (!zonefs_create_inode(dir, file_name, zone, type)) { - ret = -ENOMEM; + dent = zonefs_create_inode(dir, file_name, zone, type); + if (IS_ERR(dent)) { + ret = PTR_ERR(dent); goto free; } @@ -1905,18 +1933,18 @@ static int __init zonefs_init(void) if (ret) return ret; - ret = register_filesystem(&zonefs_type); + ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; - ret = zonefs_sysfs_init(); + ret = register_filesystem(&zonefs_type); if (ret) - goto unregister_fs; + goto sysfs_exit; return 0; -unregister_fs: - unregister_filesystem(&zonefs_type); +sysfs_exit: + zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); @@ -1925,9 +1953,9 @@ destroy_inodecache: static void __exit zonefs_exit(void) { + unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - unregister_filesystem(&zonefs_type); } MODULE_AUTHOR("Damien Le Moal"); diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c index 9cb6755ce39a..9920689dc098 100644 --- a/fs/zonefs/sysfs.c +++ b/fs/zonefs/sysfs.c @@ -15,11 +15,6 @@ struct zonefs_sysfs_attr { ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf); }; -static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr) -{ - return container_of(attr, struct zonefs_sysfs_attr, attr); -} - #define ZONEFS_SYSFS_ATTR_RO(name) \ static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name) diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 4b3de66c3233..1dbe78119ff1 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) return ZONEFS_ZTYPE_SEQ; } -#define ZONEFS_ZONE_OPEN (1 << 0) -#define ZONEFS_ZONE_ACTIVE (1 << 1) +#define ZONEFS_ZONE_OPEN (1U << 0) +#define ZONEFS_ZONE_ACTIVE (1U << 1) +#define ZONEFS_ZONE_OFFLINE (1U << 2) +#define ZONEFS_ZONE_READONLY (1U << 3) /* * In-memory inode data. |