summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/namei.c15
-rw-r--r--fs/afs/addr_list.c25
-rw-r--r--fs/afs/callback.c84
-rw-r--r--fs/afs/cmservice.c67
-rw-r--r--fs/afs/dir.c54
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/flock.c6
-rw-r--r--fs/afs/fsclient.c31
-rw-r--r--fs/afs/inode.c19
-rw-r--r--fs/afs/internal.h25
-rw-r--r--fs/afs/proc.c134
-rw-r--r--fs/afs/rotate.c20
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/afs/security.c17
-rw-r--r--fs/afs/server.c30
-rw-r--r--fs/afs/server_list.c7
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/vlclient.c19
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/aio.c7
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/befs/linuxvfs.c17
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/ctree.c22
-rw-r--r--fs/btrfs/ctree.h27
-rw-r--r--fs/btrfs/delayed-inode.c20
-rw-r--r--fs/btrfs/delayed-ref.c19
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent-tree.c80
-rw-r--r--fs/btrfs/extent_io.c25
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c52
-rw-r--r--fs/btrfs/print-tree.c25
-rw-r--r--fs/btrfs/print-tree.h2
-rw-r--r--fs/btrfs/props.c12
-rw-r--r--fs/btrfs/qgroup.c43
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/transaction.c1
-rw-r--r--fs/btrfs/transaction.h14
-rw-r--r--fs/btrfs/tree-log.c144
-rw-r--r--fs/btrfs/volumes.c9
-rw-r--r--fs/cachefiles/namei.c10
-rw-r--r--fs/cachefiles/proc.c19
-rw-r--r--fs/ceph/file.c205
-rw-r--r--fs/ceph/inode.c10
-rw-r--r--fs/ceph/xattr.c28
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/cifs_debug.c15
-rw-r--r--fs/cifs/cifs_debug.h2
-rw-r--r--fs/cifs/cifsfs.c13
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/connect.c40
-rw-r--r--fs/cifs/dir.c9
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/smb2ops.c28
-rw-r--r--fs/cifs/smb2pdu.c86
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/cifs/smbdirect.c42
-rw-r--r--fs/cifs/transport.c9
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/dcache.c144
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/crypto.c41
-rw-r--r--fs/ecryptfs/file.c21
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/exofs/ore.c10
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext2/inode.c10
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext4/balloc.c9
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c16
-rw-r--r--fs/ext4/mballoc.c29
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/ext4/sysfs.c49
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/sysfs.c29
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/fscache/histogram.c17
-rw-r--r--fs/fscache/internal.h5
-rw-r--r--fs/fscache/proc.c8
-rw-r--r--fs/fscache/stats.c17
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/inode.c1
-rw-r--r--fs/internal.h1
-rw-r--r--fs/isofs/compress.c19
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--fs/jffs2/dir.c12
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jfs/jfs_debug.c43
-rw-r--r--fs/jfs/jfs_debug.h10
-rw-r--r--fs/jfs/jfs_logmgr.c14
-rw-r--r--fs/jfs/jfs_metapage.c14
-rw-r--r--fs/jfs/jfs_txnmgr.c28
-rw-r--r--fs/jfs/jfs_xtree.c14
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/kernfs/mount.c1
-rw-r--r--fs/locks.c16
-rw-r--r--fs/namei.c15
-rw-r--r--fs/namespace.c5
-rw-r--r--fs/nfs/client.c43
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/vfs.c22
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/notify/fanotify/fanotify.c34
-rw-r--r--fs/notify/fsnotify.c25
-rw-r--r--fs/ocfs2/cluster/heartbeat.c11
-rw-r--r--fs/ocfs2/refcounttree.c14
-rw-r--r--fs/open.c44
-rw-r--r--fs/orangefs/namei.c9
-rw-r--r--fs/orangefs/super.c5
-rw-r--r--fs/proc/array.c48
-rw-r--r--fs/proc/base.c32
-rw-r--r--fs/proc/cmdline.c14
-rw-r--r--fs/proc/consoles.c14
-rw-r--r--fs/proc/devices.c14
-rw-r--r--fs/proc/generic.c148
-rw-r--r--fs/proc/internal.h13
-rw-r--r--fs/proc/interrupts.c14
-rw-r--r--fs/proc/kcore.c23
-rw-r--r--fs/proc/loadavg.c16
-rw-r--r--fs/proc/meminfo.c14
-rw-r--r--fs/proc/nommu.c14
-rw-r--r--fs/proc/proc_net.c104
-rw-r--r--fs/proc/proc_tty.c22
-rw-r--r--fs/proc/self.c4
-rw-r--r--fs/proc/softirqs.c14
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/proc/thread_self.c4
-rw-r--r--fs/proc/uptime.c14
-rw-r--r--fs/proc/version.c14
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/reiserfs/procfs.c16
-rw-r--r--fs/seq_file.c5
-rw-r--r--fs/super.c39
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/udf/unicode.c6
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c21
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_aops.h2
-rw-r--r--fs/xfs/xfs_file.c24
-rw-r--r--fs/xfs/xfs_stats.c33
-rw-r--r--fs/xfs/xfs_super.c11
156 files changed, 1740 insertions, 1605 deletions
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index d8aa0ae3d037..41c5749f4db7 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
struct super_block *sb = dir->i_sb;
struct buffer_head *bh;
struct inode *inode = NULL;
+ struct dentry *res;
pr_debug("%s(\"%pd\")\n", __func__, dentry);
affs_lock_dir(dir);
bh = affs_find_entry(dir, dentry);
- affs_unlock_dir(dir);
- if (IS_ERR(bh))
+ if (IS_ERR(bh)) {
+ affs_unlock_dir(dir);
return ERR_CAST(bh);
+ }
if (bh) {
u32 ino = bh->b_blocknr;
@@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
}
affs_brelse(bh);
inode = affs_iget(sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
}
- d_add(dentry, inode);
- return NULL;
+ res = d_splice_alias(inode, dentry);
+ if (!IS_ERR_OR_NULL(res))
+ res->d_fsdata = dentry->d_fsdata;
+ affs_unlock_dir(dir);
+ return res;
}
int
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 3bedfed608a2..7587fb665ff1 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
p = text;
do {
struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
- char tdelim = delim;
+ const char *q, *stop;
if (*p == delim) {
p++;
@@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
if (*p == '[') {
p++;
- tdelim = ']';
+ q = memchr(p, ']', end - p);
+ } else {
+ for (q = p; q < end; q++)
+ if (*q == '+' || *q == delim)
+ break;
}
- if (in4_pton(p, end - p,
+ if (in4_pton(p, q - p,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
- tdelim, &p)) {
+ -1, &stop)) {
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- } else if (in6_pton(p, end - p,
+ } else if (in6_pton(p, q - p,
srx->transport.sin6.sin6_addr.s6_addr,
- tdelim, &p)) {
+ -1, &stop)) {
/* Nothing to do */
} else {
goto bad_address;
}
- if (tdelim == ']') {
- if (p == end || *p != ']')
- goto bad_address;
+ if (stop != q)
+ goto bad_address;
+
+ p = q;
+ if (q < end && *q == ']')
p++;
- }
if (p < end) {
if (*p == '+') {
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index abd9a84f4e88..571437dcb252 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -23,36 +23,55 @@
/*
* Set up an interest-in-callbacks record for a volume on a server and
* register it with the server.
- * - Called with volume->server_sem held.
+ * - Called with vnode->io_lock held.
*/
int afs_register_server_cb_interest(struct afs_vnode *vnode,
- struct afs_server_entry *entry)
+ struct afs_server_list *slist,
+ unsigned int index)
{
- struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+ struct afs_server_entry *entry = &slist->servers[index];
+ struct afs_cb_interest *cbi, *vcbi, *new, *old;
struct afs_server *server = entry->server;
again:
+ if (vnode->cb_interest &&
+ likely(vnode->cb_interest == entry->cb_interest))
+ return 0;
+
+ read_lock(&slist->lock);
+ cbi = afs_get_cb_interest(entry->cb_interest);
+ read_unlock(&slist->lock);
+
vcbi = vnode->cb_interest;
if (vcbi) {
- if (vcbi == cbi)
+ if (vcbi == cbi) {
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
return 0;
+ }
+ /* Use a new interest in the server list for the same server
+ * rather than an old one that's still attached to a vnode.
+ */
if (cbi && vcbi->server == cbi->server) {
write_seqlock(&vnode->cb_lock);
- vnode->cb_interest = afs_get_cb_interest(cbi);
+ old = vnode->cb_interest;
+ vnode->cb_interest = cbi;
write_sequnlock(&vnode->cb_lock);
- afs_put_cb_interest(afs_v2net(vnode), cbi);
+ afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
}
+ /* Re-use the one attached to the vnode. */
if (!cbi && vcbi->server == server) {
- afs_get_cb_interest(vcbi);
- x = cmpxchg(&entry->cb_interest, cbi, vcbi);
- if (x != cbi) {
- cbi = x;
- afs_put_cb_interest(afs_v2net(vnode), vcbi);
+ write_lock(&slist->lock);
+ if (entry->cb_interest) {
+ write_unlock(&slist->lock);
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
goto again;
}
+
+ entry->cb_interest = cbi;
+ write_unlock(&slist->lock);
return 0;
}
}
@@ -72,13 +91,16 @@ again:
list_add_tail(&new->cb_link, &server->cb_interests);
write_unlock(&server->cb_break_lock);
- x = cmpxchg(&entry->cb_interest, cbi, new);
- if (x == cbi) {
+ write_lock(&slist->lock);
+ if (!entry->cb_interest) {
+ entry->cb_interest = afs_get_cb_interest(new);
cbi = new;
+ new = NULL;
} else {
- cbi = x;
- afs_put_cb_interest(afs_v2net(vnode), new);
+ cbi = afs_get_cb_interest(entry->cb_interest);
}
+ write_unlock(&slist->lock);
+ afs_put_cb_interest(afs_v2net(vnode), new);
}
ASSERT(cbi);
@@ -88,11 +110,14 @@ again:
*/
write_seqlock(&vnode->cb_lock);
- vnode->cb_interest = afs_get_cb_interest(cbi);
+ old = vnode->cb_interest;
+ vnode->cb_interest = cbi;
vnode->cb_s_break = cbi->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
}
@@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server,
if (cbi->vid != fid->vid)
continue;
- data.volume = NULL;
- data.fid = *fid;
- inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
- if (inode) {
- vnode = AFS_FS_I(inode);
- afs_break_callback(vnode);
- iput(inode);
+ if (fid->vnode == 0 && fid->unique == 0) {
+ /* The callback break applies to an entire volume. */
+ struct afs_super_info *as = AFS_FS_S(cbi->sb);
+ struct afs_volume *volume = as->volume;
+
+ write_lock(&volume->cb_break_lock);
+ volume->cb_v_break++;
+ write_unlock(&volume->cb_break_lock);
+ } else {
+ data.volume = NULL;
+ data.fid = *fid;
+ inode = ilookup5_nowait(cbi->sb, fid->vnode,
+ afs_iget5_test, &data);
+ if (inode) {
+ vnode = AFS_FS_I(inode);
+ afs_break_callback(vnode);
+ iput(inode);
+ }
}
}
@@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
ASSERT(server != NULL);
ASSERTCMP(count, <=, AFSCBMAX);
+ /* TODO: Sort the callback break list by volume ID */
+
for (; count > 0; callbacks++, count--) {
_debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }",
callbacks->fid.vid,
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 357de908df3a..c332c95a6940 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
/*
- * clean up a cache manager call
+ * Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
{
- _enter("");
-
- /* Break the callbacks here so that we do it after the final ACK is
- * received. The step number here must match the final number in
- * afs_deliver_cb_callback().
- */
- if (call->unmarshall == 5) {
- ASSERT(call->cm_server && call->count && call->request);
- afs_break_callbacks(call->cm_server, call->count, call->request);
- }
-
kfree(call->buffer);
call->buffer = NULL;
}
@@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
_enter("");
- /* be sure to send the reply *before* attempting to spam the AFS server
- * with FSFetchStatus requests on the vnodes with broken callbacks lest
- * the AFS server get into a vicious cycle of trying to break further
- * callbacks because it hadn't received completion of the CBCallBack op
- * yet */
- afs_send_empty_reply(call);
+ /* We need to break the callbacks before sending the reply as the
+ * server holds up change visibility till it receives our reply so as
+ * to maintain cache coherency.
+ */
+ if (call->cm_server)
+ afs_break_callbacks(call->cm_server, call->count, call->request);
- afs_break_callbacks(call->cm_server, call->count, call->request);
+ afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
}
@@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
struct sockaddr_rxrpc srx;
- struct afs_server *server;
__be32 *bp;
int ret, loop;
@@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->offset = 0;
call->unmarshall++;
-
- /* Record that the message was unmarshalled successfully so
- * that the call destructor can know do the callback breaking
- * work, even if the final ACK isn't received.
- *
- * If the step number changes, then afs_cm_destructor() must be
- * updated also.
- */
- call->unmarshall++;
case 5:
break;
}
@@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ call->cm_server = afs_find_server(call->net, &srx);
+ if (!call->cm_server)
+ trace_afs_cm_no_server(call, &srx);
return afs_queue_call_work(call);
}
@@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
_enter("{%p}", call->cm_server);
- afs_init_callback_state(call->cm_server);
+ if (call->cm_server)
+ afs_init_callback_state(call->cm_server);
afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
@@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
- struct afs_server *server;
int ret;
_enter("");
@@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ call->cm_server = afs_find_server(call->net, &srx);
+ if (!call->cm_server)
+ trace_afs_cm_no_server(call, &srx);
return afs_queue_call_work(call);
}
@@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
*/
static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
{
- struct sockaddr_rxrpc srx;
- struct afs_server *server;
struct afs_uuid *r;
unsigned loop;
__be32 *b;
@@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ rcu_read_lock();
+ call->cm_server = afs_find_server_by_uuid(call->net, call->request);
+ rcu_read_unlock();
+ if (!call->cm_server)
+ trace_afs_cm_no_server_u(call, call->request);
return afs_queue_call_work(call);
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5889f70d4d27..7d623008157f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
* get reclaimed during the iteration.
*/
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+ __acquires(&dvnode->validate_lock)
{
struct afs_read *req;
loff_t i_size;
@@ -261,18 +262,21 @@ retry:
/* If we're going to reload, we need to lock all the pages to prevent
* races.
*/
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- ret = -ERESTARTSYS;
- for (i = 0; i < req->nr_pages; i++)
- if (lock_page_killable(req->pages[i]) < 0)
- goto error_unlock;
+ ret = -ERESTARTSYS;
+ if (down_read_killable(&dvnode->validate_lock) < 0)
+ goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ goto success;
+
+ up_read(&dvnode->validate_lock);
+ if (down_write_killable(&dvnode->validate_lock) < 0)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
ret = afs_fetch_data(dvnode, key, req);
if (ret < 0)
- goto error_unlock_all;
+ goto error_unlock;
task_io_account_read(PAGE_SIZE * req->nr_pages);
@@ -284,33 +288,26 @@ retry:
for (i = 0; i < req->nr_pages; i++)
if (!afs_dir_check_page(dvnode, req->pages[i],
req->actual_len))
- goto error_unlock_all;
+ goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
}
+ downgrade_write(&dvnode->validate_lock);
success:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
return req;
-error_unlock_all:
- i = req->nr_pages;
error_unlock:
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
error:
afs_put_read(req);
_leave(" = %d", ret);
return ERR_PTR(ret);
content_has_grown:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
afs_put_read(req);
goto retry;
}
@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
}
out:
+ up_read(&dvnode->validate_lock);
afs_put_read(req);
_leave(" = %d", ret);
return ret;
@@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
&newfid, &newstatus, &newcb);
}
@@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_remove(&fc, dentry->d_name.name, true,
data_version);
}
@@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_remove(&fc, dentry->d_name.name, false,
data_version);
}
@@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
&newfid, &newstatus, &newcb);
}
@@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
}
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
- fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
}
@@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_symlink(&fc, dentry->d_name.name,
content, data_version,
&newfid, &newstatus);
@@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
while (afs_select_fileserver(&fc)) {
- fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
- fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
afs_fs_rename(&fc, old_dentry->d_name.name,
new_dvnode, new_dentry->d_name.name,
orig_data_version, new_data_version);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index c24c08016dd9..7d4f26198573 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_fetch_data(&fc, desc);
}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 7a0e017070ec..dc62d15a964b 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_set_lock(&fc, type);
}
@@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_current_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_extend_lock(&fc);
}
@@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_current_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_release_lock(&fc);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index efacdb7c1dee..b273e1d60478 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
struct afs_read *read_req)
{
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+ bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
u8 flags = 0;
@@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
if (vnode)
write_seqlock(&vnode->cb_lock);
+ abort_code = ntohl(xdr->abort_code);
+
if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+ if (xdr->if_version == htonl(0) &&
+ abort_code != 0 &&
+ inline_error) {
+ /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+ * whereby it doesn't set the interface version in the error
+ * case.
+ */
+ status->abort_code = abort_code;
+ ret = 0;
+ goto out;
+ }
+
pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
goto bad;
}
+ if (abort_code != 0 && inline_error) {
+ status->abort_code = abort_code;
+ ret = 0;
+ goto out;
+ }
+
type = ntohl(xdr->type);
- abort_code = ntohl(xdr->abort_code);
switch (type) {
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
@@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
}
status->type = type;
break;
- case AFS_FTYPE_INVALID:
- if (abort_code != 0) {
- status->abort_code = abort_code;
- ret = 0;
- goto out;
- }
- /* Fall through */
default:
goto bad;
}
@@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
write_seqlock(&vnode->cb_lock);
- if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
+ if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
vnode->cb_version = ntohl(*bp++);
cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 06194cfe9724..479b7fdda124 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_fetch_file_status(&fc, NULL, new_inode);
}
@@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
read_seqlock_excl(&vnode->cb_lock);
if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+ if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
+ vnode->cb_v_break != vnode->volume->cb_v_break) {
vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ valid = false;
} else if (vnode->status.type == AFS_FTYPE_DIR &&
test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
- valid = true;
+ valid = true;
} else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
- valid = true;
+ valid = true;
}
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
@@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (valid)
goto valid;
- mutex_lock(&vnode->validate_lock);
+ down_write(&vnode->validate_lock);
/* if the promise has expired, we need to check the server again to get
* a new promise - note that if the (parent) directory's metadata was
@@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* different */
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
valid:
_leave(" = 0");
return 0;
error_unlock:
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
_leave(" = %d", ret);
return ret;
}
@@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_setattr(&fc, attr);
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f8086ec95e24..e3f8a46663db 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -396,6 +396,7 @@ struct afs_server {
#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */
#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */
#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
atomic_t usage;
u32 addr_version; /* Address list version */
@@ -433,6 +434,7 @@ struct afs_server_list {
unsigned short index; /* Server currently in use */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
+ rwlock_t lock;
struct afs_server_entry servers[];
};
@@ -459,6 +461,9 @@ struct afs_volume {
rwlock_t servers_lock; /* Lock for ->servers */
unsigned int servers_seq; /* Incremented each time ->servers changes */
+ unsigned cb_v_break; /* Break-everything counter. */
+ rwlock_t cb_break_lock;
+
afs_voltype_t type; /* type of volume */
short error;
char type_force; /* force volume type (suppress R/O -> R/W) */
@@ -494,7 +499,7 @@ struct afs_vnode {
#endif
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
struct mutex io_lock; /* Lock for serialising I/O on this mutex */
- struct mutex validate_lock; /* lock for validating this vnode */
+ struct rw_semaphore validate_lock; /* lock for validating this vnode */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
@@ -519,6 +524,7 @@ struct afs_vnode {
/* outstanding callback notification on this file */
struct afs_cb_interest *cb_interest; /* Server on which this resides */
unsigned int cb_s_break; /* Mass break counter on ->server */
+ unsigned int cb_v_break; /* Mass break counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
@@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *);
extern void afs_break_callback(struct afs_vnode *);
extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
-extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
+extern int afs_register_server_cb_interest(struct afs_vnode *,
+ struct afs_server_list *, unsigned int);
extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
{
- refcount_inc(&cbi->usage);
+ if (cbi)
+ refcount_inc(&cbi->usage);
return cbi;
}
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+ return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+}
+
+static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
+ struct afs_cb_interest *cbi)
+{
+ return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+}
+
/*
* cell.c
*/
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 839a22280606..3aad32762989 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -62,7 +62,6 @@ static const struct file_operations afs_proc_rootcell_fops = {
.llseek = no_llseek,
};
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -76,15 +75,6 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
.show = afs_proc_cell_volumes_show,
};
-static const struct file_operations afs_proc_cell_volumes_fops = {
- .open = afs_proc_cell_volumes_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int afs_proc_cell_vlservers_open(struct inode *inode,
- struct file *file);
static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -98,14 +88,6 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = {
.show = afs_proc_cell_vlservers_show,
};
-static const struct file_operations afs_proc_cell_vlservers_fops = {
- .open = afs_proc_cell_vlservers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int afs_proc_servers_open(struct inode *inode, struct file *file);
static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_servers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -119,13 +101,6 @@ static const struct seq_operations afs_proc_servers_ops = {
.show = afs_proc_servers_show,
};
-static const struct file_operations afs_proc_servers_fops = {
- .open = afs_proc_servers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int afs_proc_sysname_open(struct inode *inode, struct file *file);
static int afs_proc_sysname_release(struct inode *inode, struct file *file);
static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos);
@@ -152,7 +127,7 @@ static const struct file_operations afs_proc_sysname_fops = {
.write = afs_proc_sysname_write,
};
-static const struct file_operations afs_proc_stats_fops;
+static int afs_proc_stats_show(struct seq_file *m, void *v);
/*
* initialise the /proc/fs/afs/ directory
@@ -167,8 +142,8 @@ int afs_proc_init(struct afs_net *net)
if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
!proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
- !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) ||
- !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) ||
+ !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) ||
+ !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) ||
!proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops))
goto error_tree;
@@ -196,16 +171,7 @@ void afs_proc_cleanup(struct afs_net *net)
*/
static int afs_proc_cells_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &afs_proc_cells_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE_DATA(inode);
- return 0;
+ return seq_open(file, &afs_proc_cells_ops);
}
/*
@@ -430,10 +396,11 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
if (!dir)
goto error_dir;
- if (!proc_create_data("vlservers", 0, dir,
- &afs_proc_cell_vlservers_fops, cell) ||
- !proc_create_data("volumes", 0, dir,
- &afs_proc_cell_volumes_fops, cell))
+ if (!proc_create_seq_data("vlservers", 0, dir,
+ &afs_proc_cell_vlservers_ops, cell))
+ goto error_tree;
+ if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops,
+ cell))
goto error_tree;
_leave(" = 0");
@@ -459,36 +426,13 @@ void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell)
}
/*
- * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
- */
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
-{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_volumes_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
-
- return 0;
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
__acquires(cell->proc_lock)
{
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
_enter("cell=%p pos=%Ld", cell, *_pos);
@@ -502,7 +446,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *_pos)
{
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
_enter("cell=%p pos=%Ld", cell, *_pos);
return seq_list_next(v, &cell->proc_volumes, _pos);
@@ -514,7 +458,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
__releases(cell->proc_lock)
{
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
read_unlock(&cell->proc_lock);
}
@@ -530,7 +474,7 @@ static const char afs_vol_types[3][3] = {
*/
static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
{
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
/* Display header on line 1 */
@@ -547,30 +491,6 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
- * location server
- */
-static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
-{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_vlservers_ops);
- if (ret<0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
-
- return 0;
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
@@ -578,7 +498,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
__acquires(rcu)
{
struct afs_addr_list *alist;
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
@@ -603,7 +523,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *_pos)
{
struct afs_addr_list *alist;
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
loff_t pos;
alist = rcu_dereference(cell->vl_addrs);
@@ -644,15 +564,6 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/afs/servers" which provides a summary of active
- * servers
- */
-static int afs_proc_servers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &afs_proc_servers_ops);
-}
-
-/*
* Set up the iterator to start reading from the server list and return the
* first item.
*/
@@ -931,18 +842,3 @@ static int afs_proc_stats_show(struct seq_file *m, void *v)
atomic_long_read(&net->n_store_bytes));
return 0;
}
-
-/*
- * Open "/proc/fs/afs/stats" to allow reading of the stat counters.
- */
-static int afs_proc_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, afs_proc_stats_show, NULL);
-}
-
-static const struct file_operations afs_proc_stats_fops = {
- .open = afs_proc_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index ac0feac9d746..e065bc0768e6 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
*/
if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
fc->ac.error = -EREMOTEIO;
- goto failed;
+ goto next_server;
}
write_lock(&vnode->volume->servers_lock);
@@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
*/
if (vnode->volume->servers == fc->server_list) {
fc->ac.error = -EREMOTEIO;
- goto failed;
+ goto next_server;
}
/* Try again */
@@ -350,8 +350,8 @@ use_server:
* break request before we've finished decoding the reply and
* installing the vnode.
*/
- fc->ac.error = afs_register_server_cb_interest(
- vnode, &fc->server_list->servers[fc->index]);
+ fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
+ fc->index);
if (fc->ac.error < 0)
goto failed;
@@ -369,8 +369,16 @@ use_server:
if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
fc->ac.alist = afs_get_addrlist(alist);
- if (!afs_probe_fileserver(fc))
- goto failed;
+ if (!afs_probe_fileserver(fc)) {
+ switch (fc->ac.error) {
+ case -ENOMEM:
+ case -ERESTARTSYS:
+ case -EINTR:
+ goto failed;
+ default:
+ goto next_server;
+ }
+ }
}
if (!fc->ac.alist)
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5c6263972ec9..08735948f15d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
+ unsigned int min_level;
int ret;
_enter("");
@@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net)
srx.transport.sin6.sin6_family = AF_INET6;
srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
+ min_level = RXRPC_SECURITY_ENCRYPT;
+ ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+ (void *)&min_level, sizeof(min_level));
+ if (ret < 0)
+ goto error_2;
+
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
if (ret == -EADDRINUSE) {
srx.transport.sin6.sin6_port = 0;
@@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call)
state = READ_ONCE(call->state);
switch (ret) {
case 0:
- if (state == AFS_CALL_CL_PROC_REPLY)
+ if (state == AFS_CALL_CL_PROC_REPLY) {
+ if (call->cbi)
+ set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+ &call->cbi->server->flags);
goto call_complete;
+ }
ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
goto done;
case -EINPROGRESS:
@@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
goto done;
- case -ENOTCONN:
- abort_code = RX_CALL_DEAD;
- rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KNC");
- goto local_abort;
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
diff --git a/fs/afs/security.c b/fs/afs/security.c
index cea2fff313dc..81dfedb7879f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
break;
}
- if (cb_break != (vnode->cb_break +
- vnode->cb_interest->server->cb_s_break)) {
+ if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
changed = true;
break;
}
@@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
}
}
- if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break))
+ if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
goto someone_else_changed_it;
/* We need a ref on any permits list we want to copy as we'll have to
@@ -257,7 +256,7 @@ found:
spin_lock(&vnode->lock);
zap = rcu_access_pointer(vnode->permit_cache);
- if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+ if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
zap == permits)
rcu_assign_pointer(vnode->permit_cache, replacement);
else
@@ -373,18 +372,14 @@ int afs_permission(struct inode *inode, int mask)
mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
if (S_ISDIR(inode->i_mode)) {
- if (mask & MAY_EXEC) {
+ if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
- } else if (mask & MAY_READ) {
- if (!(access & AFS_ACE_LOOKUP))
- goto permission_denied;
- } else if (mask & MAY_WRITE) {
+ }
+ if (mask & MAY_WRITE) {
if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
goto permission_denied;
- } else {
- BUG();
}
} else {
if (!(access & AFS_ACE_LOOKUP))
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e23be63998a8..3af4625e2f8c 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
sizeof(struct in6_addr));
if (diff == 0)
goto found;
- if (diff < 0) {
- // TODO: Sort the list
- //if (i == alist->nr_ipv4)
- // goto not_found;
- break;
- }
}
}
} else {
@@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net,
(u32 __force)b->sin6_addr.s6_addr32[3]);
if (diff == 0)
goto found;
- if (diff < 0) {
- // TODO: Sort the list
- //if (i == 0)
- // goto not_found;
- break;
- }
}
}
}
- //not_found:
server = NULL;
found:
if (server && !atomic_inc_not_zero(&server->usage))
@@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
struct afs_addr_cursor ac = {
.alist = alist,
- .addr = &alist->addrs[0],
.start = alist->index,
- .index = alist->index,
+ .index = 0,
+ .addr = &alist->addrs[alist->index],
.error = 0,
};
_enter("%p", server);
- afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+ afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
@@ -428,8 +417,15 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
}
write_sequnlock(&net->fs_lock);
- if (deleted)
+ if (deleted) {
+ write_seqlock(&net->fs_addr_lock);
+ if (!hlist_unhashed(&server->addr4_link))
+ hlist_del_rcu(&server->addr4_link);
+ if (!hlist_unhashed(&server->addr6_link))
+ hlist_del_rcu(&server->addr6_link);
+ write_sequnlock(&net->fs_addr_lock);
afs_destroy_server(net, server);
+ }
}
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 0f8dc4c8f07c..8a5760aa5832 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
goto error;
refcount_set(&slist->usage, 1);
+ rwlock_init(&slist->lock);
/* Make sure a records exists for each server in the list. */
for (i = 0; i < vldb->nr_servers; i++) {
@@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
goto error_2;
}
- /* Insertion-sort by server pointer */
+ /* Insertion-sort by UUID */
for (j = 0; j < slist->nr_servers; j++)
- if (slist->servers[j].server >= server)
+ if (memcmp(&slist->servers[j].server->uuid,
+ &server->uuid,
+ sizeof(server->uuid)) >= 0)
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 65081ec3c36e..9e5d7966621c 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode);
mutex_init(&vnode->io_lock);
- mutex_init(&vnode->validate_lock);
+ init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
INIT_LIST_HEAD(&vnode->wb_keys);
@@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (afs_begin_vnode_operation(&fc, vnode, key)) {
fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_get_volume_status(&fc, &vs);
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 1ed7e2fd2f35..c3b740813fc7 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
struct afs_uvldbentry__xdr *uvldb;
struct afs_vldb_entry *entry;
bool new_only = false;
- u32 tmp, nr_servers;
+ u32 tmp, nr_servers, vlflags;
int i, ret;
_enter("");
@@ -55,6 +55,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
new_only = true;
}
+ vlflags = ntohl(uvldb->flags);
for (i = 0; i < nr_servers; i++) {
struct afs_uuid__xdr *xdr;
struct afs_uuid *uuid;
@@ -64,12 +65,13 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
if (tmp & AFS_VLSF_DONTUSE ||
(new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
continue;
- if (tmp & AFS_VLSF_RWVOL)
+ if (tmp & AFS_VLSF_RWVOL) {
entry->fs_mask[i] |= AFS_VOL_VTM_RW;
+ if (vlflags & AFS_VLF_BACKEXISTS)
+ entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+ }
if (tmp & AFS_VLSF_ROVOL)
entry->fs_mask[i] |= AFS_VOL_VTM_RO;
- if (tmp & AFS_VLSF_BACKVOL)
- entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
if (!entry->fs_mask[i])
continue;
@@ -89,15 +91,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
for (i = 0; i < AFS_MAXTYPES; i++)
entry->vid[i] = ntohl(uvldb->volumeId[i]);
- tmp = ntohl(uvldb->flags);
- if (tmp & AFS_VLF_RWEXISTS)
+ if (vlflags & AFS_VLF_RWEXISTS)
__set_bit(AFS_VLDB_HAS_RW, &entry->flags);
- if (tmp & AFS_VLF_ROEXISTS)
+ if (vlflags & AFS_VLF_ROEXISTS)
__set_bit(AFS_VLDB_HAS_RO, &entry->flags);
- if (tmp & AFS_VLF_BACKEXISTS)
+ if (vlflags & AFS_VLF_BACKEXISTS)
__set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
- if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+ if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
entry->error = -ENOMEDIUM;
__set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c164698dc304..8b39e6ebb40b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -351,7 +351,7 @@ found_key:
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_store_data(&fc, mapping, first, last, offset, to);
}
diff --git a/fs/aio.c b/fs/aio.c
index 88d7927ffbc6..49f53516eef0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -634,9 +634,8 @@ static void free_ioctx_users(struct percpu_ref *ref)
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
struct aio_kiocb, ki_list);
-
- list_del_init(&req->ki_list);
kiocb_cancel(req);
+ list_del_init(&req->ki_list);
}
spin_unlock_irq(&ctx->ctx_lock);
@@ -1078,8 +1077,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
- percpu_ref_get(&ctx->users);
- ret = ctx;
+ if (percpu_ref_tryget_live(&ctx->users))
+ ret = ctx;
}
out:
rcu_read_unlock();
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 82e8f6edfb48..b12e37f27530 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -749,7 +749,7 @@ static int autofs4_dir_mkdir(struct inode *dir,
autofs4_del_active(dentry);
- inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555);
+ inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode);
if (!inode)
return -ENOMEM;
d_add(dentry, inode);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index af2832aaeec5..4700b4534439 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
if (ret == BEFS_BT_NOT_FOUND) {
befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
- d_add(dentry, NULL);
- return ERR_PTR(-ENOENT);
-
+ inode = NULL;
} else if (ret != BEFS_OK || offset == 0) {
befs_error(sb, "<--- %s Error", __func__);
- return ERR_PTR(-ENODATA);
+ inode = ERR_PTR(-ENODATA);
+ } else {
+ inode = befs_iget(dir->i_sb, (ino_t) offset);
}
-
- inode = befs_iget(dir->i_sb, (ino_t) offset);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- d_add(dentry, inode);
-
befs_debug(sb, "<--- %s", __func__);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 41e04183e4ce..4ad6f669fe34 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -377,10 +377,10 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
} else
map_addr = vm_mmap(filep, addr, size, prot, type, off);
- if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr))
- pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n",
- task_pid_nr(current), current->comm,
- (void *)addr);
+ if ((type & MAP_FIXED_NOREPLACE) &&
+ PTR_ERR((void *)map_addr) == -EEXIST)
+ pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
+ task_pid_nr(current), current->comm, (void *)addr);
return(map_addr);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7ec920e27065..bef6934b6189 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -272,7 +272,7 @@ struct blkdev_dio {
struct bio bio;
};
-static struct bio_set *blkdev_dio_pool __read_mostly;
+static struct bio_set blkdev_dio_pool;
static void blkdev_bio_end_io(struct bio *bio)
{
@@ -334,7 +334,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool);
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
@@ -432,10 +432,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
- if (!blkdev_dio_pool)
- return -ENOMEM;
- return 0;
+ return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
}
module_init(blkdev_init);
@@ -1322,27 +1319,30 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
* check_disk_size_change - checks for disk size change and adjusts bdev size.
* @disk: struct gendisk to check
* @bdev: struct bdev to adjust.
+ * @verbose: if %true log a message about a size change if there is any
*
* This routine checks to see if the bdev size does not match the disk size
* and adjusts it if it differs. When shrinking the bdev size, its all caches
* are freed.
*/
-void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
+ bool verbose)
{
loff_t disk_size, bdev_size;
disk_size = (loff_t)get_capacity(disk) << 9;
bdev_size = i_size_read(bdev->bd_inode);
if (disk_size != bdev_size) {
- printk(KERN_INFO
- "%s: detected capacity change from %lld to %lld\n",
- disk->disk_name, bdev_size, disk_size);
+ if (verbose) {
+ printk(KERN_INFO
+ "%s: detected capacity change from %lld to %lld\n",
+ disk->disk_name, bdev_size, disk_size);
+ }
i_size_write(bdev->bd_inode, disk_size);
if (bdev_size > disk_size)
flush_disk(bdev, false);
}
}
-EXPORT_SYMBOL(check_disk_size_change);
/**
* revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
@@ -1364,7 +1364,7 @@ int revalidate_disk(struct gendisk *disk)
return ret;
mutex_lock(&bdev->bd_mutex);
- check_disk_size_change(disk, bdev);
+ check_disk_size_change(disk, bdev, ret == 0);
bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3fd44835b386..8c68961925b1 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
- btrfs_release_path(p);
-
ret = -EAGAIN;
- tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+ tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
&first_key);
if (!IS_ERR(tmp)) {
/*
@@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
} else {
ret = PTR_ERR(tmp);
}
+
+ btrfs_release_path(p);
return ret;
}
@@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
down_read(&fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
- left_path->nodes[left_level] = left_root->commit_root;
+ left_path->nodes[left_level] =
+ btrfs_clone_extent_buffer(left_root->commit_root);
+ if (!left_path->nodes[left_level]) {
+ up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+ goto out;
+ }
extent_buffer_get(left_path->nodes[left_level]);
right_level = btrfs_header_level(right_root->commit_root);
right_root_level = right_level;
- right_path->nodes[right_level] = right_root->commit_root;
+ right_path->nodes[right_level] =
+ btrfs_clone_extent_buffer(right_root->commit_root);
+ if (!right_path->nodes[right_level]) {
+ up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+ goto out;
+ }
extent_buffer_get(right_path->nodes[right_level]);
up_read(&fs_info->commit_root_sem);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5474ef14d6e6..0d422c9908b8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -459,6 +459,25 @@ struct btrfs_block_rsv {
unsigned short full;
unsigned short type;
unsigned short failfast;
+
+ /*
+ * Qgroup equivalent for @size @reserved
+ *
+ * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
+ * about things like csum size nor how many tree blocks it will need to
+ * reserve.
+ *
+ * Qgroup cares more about net change of the extent usage.
+ *
+ * So for one newly inserted file extent, in worst case it will cause
+ * leaf split and level increase, nodesize for each file extent is
+ * already too much.
+ *
+ * In short, qgroup_size/reserved is the upper limit of possible needed
+ * qgroup metadata reservation.
+ */
+ u64 qgroup_rsv_size;
+ u64 qgroup_rsv_reserved;
};
/*
@@ -714,6 +733,12 @@ struct btrfs_delayed_root;
*/
#define BTRFS_FS_EXCL_OP 16
+/*
+ * To info transaction_kthread we need an immediate commit so it doesn't
+ * need to wait for commit_interval
+ */
+#define BTRFS_FS_NEED_ASYNC_COMMIT 17
+
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -3157,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 06ec8ab6d9ba..a8d492dbd3e7 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
+
+ /*
+ * Here we migrate space rsv from transaction rsv, since have already
+ * reserved space when starting a transaction. So no need to reserve
+ * qgroup space here.
+ */
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_item",
@@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
return;
rsv = &fs_info->delayed_block_rsv;
- btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
+ /*
+ * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
+ * to release/reserve qgroup space.
+ */
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
@@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
- if (ret < 0)
- return ret;
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
@@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
+ ret = btrfs_qgroup_reserve_meta_prealloc(root,
+ fs_info->nodesize, true);
+ if (ret < 0)
+ return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
/*
@@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata(
"delayed_inode",
btrfs_ino(inode),
num_bytes, 1);
+ } else {
+ btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
}
return ret;
}
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 9e98295de7ce..e1b0651686f7 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
- int action, int is_data, int *qrecord_inserted_ret,
+ int action, int is_data, int is_system,
+ int *qrecord_inserted_ret,
int *old_ref_mod, int *new_ref_mod)
+
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref->ref_mod = count_mod;
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
+ head_ref->is_system = is_system;
head_ref->ref_tree = RB_ROOT;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
@@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
+ int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, 0, 0, action, 0,
- &qrecord_inserted, old_ref_mod,
- new_ref_mod);
+ is_system, &qrecord_inserted,
+ old_ref_mod, new_ref_mod);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
@@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, ref_root, reserved,
- action, 1, &qrecord_inserted,
+ action, 1, 0, &qrecord_inserted,
old_ref_mod, new_ref_mod);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
+ /*
+ * extent_ops just modify the flags of an extent and they don't result
+ * in ref count changes, hence it's safe to pass false/0 for is_system
+ * argument
+ */
add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data, NULL, NULL, NULL);
+ extent_op->is_data, 0, NULL, NULL, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 741869dbc316..7f00db50bd24 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head {
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
+ unsigned int is_system:1;
unsigned int processing:1;
};
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4ac8b1d21baf..c3504b4d281b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg)
now = get_seconds();
if (cur->state < TRANS_STATE_BLOCKED &&
+ !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
now - cur->start_time < fs_info->commit_interval)) {
spin_unlock(&fs_info->trans_lock);
@@ -3817,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
btrfs_free_qgroup_config(fs_info);
+ ASSERT(list_empty(&fs_info->delalloc_roots));
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -4124,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
{
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(fs_info);
+
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
-
- /* cleanup FS via transaction */
- btrfs_cleanup_transaction(fs_info);
}
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4257,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
+ struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
-
- list_del_init(&btrfs_inode->delalloc_inodes);
- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &btrfs_inode->runtime_flags);
+ __btrfs_del_delalloc_inode(root, btrfs_inode);
spin_unlock(&root->delalloc_lock);
- btrfs_invalidate_inodes(btrfs_inode->root);
-
+ /*
+ * Make sure we get a live inode and that it'll not disappear
+ * meanwhile.
+ */
+ inode = igrab(&btrfs_inode->vfs_inode);
+ if (inode) {
+ invalidate_inode_pages2(inode->i_mapping);
+ iput(inode);
+ }
spin_lock(&root->delalloc_lock);
}
-
spin_unlock(&root->delalloc_lock);
}
@@ -4285,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
- list_del_init(&root->delalloc_root);
root = btrfs_grab_fs_root(root);
BUG_ON(!root);
spin_unlock(&fs_info->delalloc_root_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 75cfb80d2551..51b5e2da708c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
trace_run_delayed_ref_head(fs_info, head, 0);
if (head->total_ref_mod < 0) {
- struct btrfs_block_group_cache *cache;
+ struct btrfs_space_info *space_info;
+ u64 flags;
- cache = btrfs_lookup_block_group(fs_info, head->bytenr);
- ASSERT(cache);
- percpu_counter_add(&cache->space_info->total_bytes_pinned,
+ if (head->is_data)
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ else if (head->is_system)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ space_info = __find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add(&space_info->total_bytes_pinned,
-head->num_bytes);
- btrfs_put_block_group(cache);
if (head->is_data) {
spin_lock(&delayed_refs->lock);
@@ -3136,7 +3142,11 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
struct rb_node *node;
int ret = 0;
+ spin_lock(&root->fs_info->trans_lock);
cur_trans = root->fs_info->running_transaction;
+ if (cur_trans)
+ refcount_inc(&cur_trans->use_count);
+ spin_unlock(&root->fs_info->trans_lock);
if (!cur_trans)
return 0;
@@ -3145,6 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head) {
spin_unlock(&delayed_refs->lock);
+ btrfs_put_transaction(cur_trans);
return 0;
}
@@ -3161,6 +3172,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
+ btrfs_put_transaction(cur_trans);
return -EAGAIN;
}
spin_unlock(&delayed_refs->lock);
@@ -3193,6 +3205,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
}
spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
+ btrfs_put_transaction(cur_trans);
return ret;
}
@@ -5559,14 +5572,18 @@ again:
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
- struct btrfs_block_rsv *dest, u64 num_bytes)
+ struct btrfs_block_rsv *dest, u64 num_bytes,
+ u64 *qgroup_to_release_ret)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
+ u64 qgroup_to_release = 0;
u64 ret;
spin_lock(&block_rsv->lock);
- if (num_bytes == (u64)-1)
+ if (num_bytes == (u64)-1) {
num_bytes = block_rsv->size;
+ qgroup_to_release = block_rsv->qgroup_rsv_size;
+ }
block_rsv->size -= num_bytes;
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
@@ -5575,6 +5592,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
+ if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+ block_rsv->qgroup_rsv_size;
+ block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+ } else {
+ qgroup_to_release = 0;
+ }
spin_unlock(&block_rsv->lock);
ret = num_bytes;
@@ -5597,6 +5621,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info_add_old_bytes(fs_info, space_info,
num_bytes);
}
+ if (qgroup_to_release_ret)
+ *qgroup_to_release_ret = qgroup_to_release;
return ret;
}
@@ -5738,17 +5764,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 num_bytes = 0;
+ u64 qgroup_num_bytes = 0;
int ret = -ENOSPC;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size)
num_bytes = block_rsv->size - block_rsv->reserved;
+ if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
+ qgroup_num_bytes = block_rsv->qgroup_rsv_size -
+ block_rsv->qgroup_rsv_reserved;
spin_unlock(&block_rsv->lock);
if (num_bytes == 0)
return 0;
- ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
if (ret)
return ret;
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
@@ -5756,7 +5786,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
block_rsv_add_bytes(block_rsv, num_bytes, 0);
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 1);
- }
+
+ /* Don't forget to increase qgroup_rsv_reserved */
+ spin_lock(&block_rsv->lock);
+ block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
+ spin_unlock(&block_rsv->lock);
+ } else
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
return ret;
}
@@ -5777,20 +5813,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 released = 0;
+ u64 qgroup_to_release = 0;
/*
* Since we statically set the block_rsv->size we just want to say we
* are releasing 0 bytes, and then we'll just get the reservation over
* the size free'd.
*/
- released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
+ released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
+ &qgroup_to_release);
if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), released, 0);
if (qgroup_free)
- btrfs_qgroup_free_meta_prealloc(inode->root, released);
+ btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
else
- btrfs_qgroup_convert_reserved_meta(inode->root, released);
+ btrfs_qgroup_convert_reserved_meta(inode->root,
+ qgroup_to_release);
}
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -5802,7 +5841,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
if (global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
- block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
+ block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
}
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5882,7 +5921,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
- (u64)-1);
+ (u64)-1, NULL);
WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
@@ -5906,7 +5945,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
WARN_ON_ONCE(!list_empty(&trans->new_bgs));
block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
- trans->chunk_bytes_reserved);
+ trans->chunk_bytes_reserved, NULL);
trans->chunk_bytes_reserved = 0;
}
@@ -6011,6 +6050,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
{
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 reserve_size = 0;
+ u64 qgroup_rsv_size = 0;
u64 csum_leaves;
unsigned outstanding_extents;
@@ -6023,9 +6063,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
inode->csum_bytes);
reserve_size += btrfs_calc_trans_metadata_size(fs_info,
csum_leaves);
+ /*
+ * For qgroup rsv, the calculation is very simple:
+ * account one nodesize for each outstanding extent
+ *
+ * This is overestimating in most cases.
+ */
+ qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
spin_lock(&block_rsv->lock);
block_rsv->size = reserve_size;
+ block_rsv->qgroup_rsv_size = qgroup_rsv_size;
spin_unlock(&block_rsv->lock);
}
@@ -8403,7 +8451,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
- block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
+ block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
}
/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329002cf..56d32bb462f9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -26,7 +26,7 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
-static struct bio_set *btrfs_bioset;
+static struct bio_set btrfs_bioset;
static inline bool extent_state_in_tree(const struct extent_state *state)
{
@@ -162,20 +162,18 @@ int __init extent_io_init(void)
if (!extent_buffer_cache)
goto free_state_cache;
- btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio),
- BIOSET_NEED_BVECS);
- if (!btrfs_bioset)
+ if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS))
goto free_buffer_cache;
- if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+ if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
goto free_bioset;
return 0;
free_bioset:
- bioset_free(btrfs_bioset);
- btrfs_bioset = NULL;
+ bioset_exit(&btrfs_bioset);
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
@@ -198,8 +196,7 @@ void __cold extent_io_exit(void)
rcu_barrier();
kmem_cache_destroy(extent_state_cache);
kmem_cache_destroy(extent_buffer_cache);
- if (btrfs_bioset)
- bioset_free(btrfs_bioset);
+ bioset_exit(&btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -2679,7 +2676,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
{
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
@@ -2692,7 +2689,7 @@ struct bio *btrfs_bio_clone(struct bio *bio)
struct bio *new;
/* Bio allocation backed by a bioset does not fail */
- new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
+ new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
btrfs_bio = btrfs_io_bio(new);
btrfs_io_bio_init(btrfs_bio);
btrfs_bio->iter = bio->bi_iter;
@@ -2704,7 +2701,7 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
struct bio *bio;
/* Bio allocation backed by a bioset does not fail */
- bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
@@ -2715,7 +2712,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
struct btrfs_io_bio *btrfs_bio;
/* this will never fail when it's backed by a bioset */
- bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
+ bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
btrfs_bio = btrfs_io_bio(bio);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0167a9c97c9c..f660ba1e5e58 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1748,7 +1748,7 @@ again:
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
- (ret != 0));
+ true);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e064c49c9a9a..0b86cf10cf2a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -31,6 +31,7 @@
#include <linux/uio.h>
#include <linux/magic.h>
#include <linux/iversion.h>
+#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -1741,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
spin_unlock(&root->delalloc_lock);
}
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- spin_lock(&root->delalloc_lock);
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
@@ -1759,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
spin_unlock(&fs_info->delalloc_root_lock);
}
}
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
+{
+ spin_lock(&root->delalloc_lock);
+ __btrfs_del_delalloc_inode(root, inode);
spin_unlock(&root->delalloc_lock);
}
@@ -5905,11 +5913,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
struct dir_entry *entry = addr;
char *name = (char *)(entry + 1);
- ctx->pos = entry->offset;
- if (!dir_emit(ctx, name, entry->name_len, entry->ino,
- entry->type))
+ ctx->pos = get_unaligned(&entry->offset);
+ if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
+ get_unaligned(&entry->ino),
+ get_unaligned(&entry->type)))
return 1;
- addr += sizeof(struct dir_entry) + entry->name_len;
+ addr += sizeof(struct dir_entry) +
+ get_unaligned(&entry->name_len);
ctx->pos++;
}
return 0;
@@ -5999,14 +6009,15 @@ again:
}
entry = addr;
- entry->name_len = name_len;
+ put_unaligned(name_len, &entry->name_len);
name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len);
- entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
+ put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
+ &entry->type);
btrfs_dir_item_key_to_cpu(leaf, di, &location);
- entry->ino = location.objectid;
- entry->offset = found_key.offset;
+ put_unaligned(location.objectid, &entry->ino);
+ put_unaligned(found_key.offset, &entry->offset);
entries++;
addr += sizeof(struct dir_entry) + name_len;
total_len += sizeof(struct dir_entry) + name_len;
@@ -6575,8 +6586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
} else {
btrfs_update_inode(trans, root, inode);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
}
out_unlock:
@@ -6652,8 +6662,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
@@ -6798,12 +6807,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
goto out_fail_inode;
- d_instantiate(dentry, inode);
- /*
- * mkdir is special. We're unlocking after we call d_instantiate
- * to avoid a race with nfsd calling d_instantiate.
- */
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
drop_on_err = 0;
out_fail:
@@ -9113,7 +9117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
BTRFS_EXTENT_DATA_KEY);
trans->block_rsv = &fs_info->trans_block_rsv;
if (ret != -ENOSPC && ret != -EAGAIN) {
- err = ret;
+ if (ret < 0)
+ err = ret;
break;
}
@@ -10246,8 +10251,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 124276bba8cf..21a831d3d087 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l)
fs_info = l->fs_info;
nr = btrfs_header_nritems(l);
- btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d",
- btrfs_header_bytenr(l), nr,
- btrfs_leaf_free_space(fs_info, l));
+ btrfs_info(fs_info,
+ "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
+ btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
+ btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i);
@@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
}
}
-void btrfs_print_tree(struct extent_buffer *c)
+void btrfs_print_tree(struct extent_buffer *c, bool follow)
{
struct btrfs_fs_info *fs_info;
int i; u32 nr;
@@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c)
return;
}
btrfs_info(fs_info,
- "node %llu level %d total ptrs %d free spc %u",
- btrfs_header_bytenr(c), level, nr,
- (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr);
+ "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu",
+ btrfs_header_bytenr(c), level, btrfs_header_generation(c),
+ nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
+ btrfs_header_owner(c));
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
- pr_info("\tkey %d (%llu %u %llu) block %llu\n",
+ pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
i, key.objectid, key.type, key.offset,
- btrfs_node_blockptr(c, i));
+ btrfs_node_blockptr(c, i),
+ btrfs_node_ptr_generation(c, i));
}
+ if (!follow)
+ return;
for (i = 0; i < nr; i++) {
struct btrfs_key first_key;
struct extent_buffer *next;
@@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c)
if (btrfs_header_level(next) !=
level - 1)
BUG();
- btrfs_print_tree(next);
+ btrfs_print_tree(next, follow);
free_extent_buffer(next);
}
}
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index 4a98481688f4..e6bb38fd75ad 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -7,6 +7,6 @@
#define BTRFS_PRINT_TREE_H
void btrfs_print_leaf(struct extent_buffer *l);
-void btrfs_print_tree(struct extent_buffer *c);
+void btrfs_print_tree(struct extent_buffer *c, bool follow);
#endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 53a8c95828e3..dc6140013ae8 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int type;
if (len == 0) {
@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
return 0;
}
- if (!strncmp("lzo", value, 3))
+ if (!strncmp("lzo", value, 3)) {
type = BTRFS_COMPRESS_LZO;
- else if (!strncmp("zlib", value, 4))
+ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+ } else if (!strncmp("zlib", value, 4)) {
type = BTRFS_COMPRESS_ZLIB;
- else if (!strncmp("zstd", value, len))
+ } else if (!strncmp("zstd", value, len)) {
type = BTRFS_COMPRESS_ZSTD;
- else
+ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+ } else {
return -EINVAL;
+ }
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 09c7e4fd550f..9fb758d5077a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/btrfs.h>
+#include <linux/sizes.h>
#include "ctree.h"
#include "transaction.h"
@@ -2375,8 +2376,21 @@ out:
return ret;
}
-static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
+/*
+ * Two limits to commit transaction in advance.
+ *
+ * For RATIO, it will be 1/RATIO of the remaining limit
+ * (excluding data and prealloc meta) as threshold.
+ * For SIZE, it will be in byte unit as threshold.
+ */
+#define QGROUP_PERTRANS_RATIO 32
+#define QGROUP_PERTRANS_SIZE SZ_32M
+static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
+ const struct btrfs_qgroup *qg, u64 num_bytes)
{
+ u64 limit;
+ u64 threshold;
+
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
return false;
@@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
return false;
+ /*
+ * Even if we passed the check, it's better to check if reservation
+ * for meta_pertrans is pushing us near limit.
+ * If there is too much pertrans reservation or it's near the limit,
+ * let's try commit transaction to free some, using transaction_kthread
+ */
+ if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
+ BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
+ if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ limit = qg->max_excl;
+ else
+ limit = qg->max_rfer;
+ threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] -
+ qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) /
+ QGROUP_PERTRANS_RATIO;
+ threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE);
+
+ /*
+ * Use transaction_kthread to commit transaction, so we no
+ * longer need to bother nested transaction nor lock context.
+ */
+ if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold)
+ btrfs_commit_transaction_locksafe(fs_info);
+ }
+
return true;
}
@@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
qg = unode_aux_to_qgroup(unode);
- if (enforce && !qgroup_check_limits(qg, num_bytes)) {
+ if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
ret = -EDQUOT;
goto out;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 00b7d3231821..b041b945a7ae 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1841,7 +1841,7 @@ again:
old_bytenr = btrfs_node_blockptr(parent, slot);
blocksize = fs_info->nodesize;
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
- btrfs_node_key_to_cpu(parent, &key, slot);
+ btrfs_node_key_to_cpu(parent, &first_key, slot);
if (level <= max_level) {
eb = path->nodes[level];
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 221e5cdb060b..c0074d2d7d6d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5236,6 +5236,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
}
+ if (offset >= sctx->cur_inode_size) {
+ ret = 0;
+ goto out;
+ }
if (offset + len > sctx->cur_inode_size)
len = sctx->cur_inode_size - offset;
if (len == 0) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 63fdcab64b01..c944b4769e3c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
+ clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
spin_lock(&fs_info->trans_lock);
list_del_init(&cur_trans->list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c88fccd80bc5..d8c0826bc2c7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
int wait_for_unblock);
+
+/*
+ * Try to commit transaction asynchronously, so this is safe to call
+ * even holding a spinlock.
+ *
+ * It's done by informing transaction_kthread to commit transaction without
+ * waiting for commit interval.
+ */
+static inline void btrfs_commit_transaction_locksafe(
+ struct btrfs_fs_info *fs_info)
+{
+ set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
+ wake_up_process(fs_info->transaction_kthread);
+}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
void btrfs_throttle(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 43758e30aa7a..8f23a94dab77 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+ * lose them after doing a fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+ * are not in the extent map tree's list of modified extents.
+ */
+static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key;
+ const u64 i_size = i_size_read(&inode->vfs_inode);
+ const u64 ino = btrfs_ino(inode);
+ struct btrfs_path *dst_path = NULL;
+ u64 last_extent = (u64)-1;
+ int ins_nr = 0;
+ int start_slot;
+ int ret;
+
+ if (!(inode->flags & BTRFS_INODE_PREALLOC))
+ return 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = i_size;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ if (ret < 0)
+ goto out;
+ ins_nr = 0;
+ }
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid > ino)
+ break;
+ if (WARN_ON_ONCE(key.objectid < ino) ||
+ key.type < BTRFS_EXTENT_DATA_KEY ||
+ key.offset < i_size) {
+ path->slots[0]++;
+ continue;
+ }
+ if (last_extent == (u64)-1) {
+ last_extent = key.offset;
+ /*
+ * Avoid logging extent items logged in past fsync calls
+ * and leading to duplicate keys in the log tree.
+ */
+ do {
+ ret = btrfs_truncate_inode_items(trans,
+ root->log_root,
+ &inode->vfs_inode,
+ i_size,
+ BTRFS_EXTENT_DATA_KEY);
+ } while (ret == -EAGAIN);
+ if (ret)
+ goto out;
+ }
+ if (ins_nr == 0)
+ start_slot = slot;
+ ins_nr++;
+ path->slots[0]++;
+ if (!dst_path) {
+ dst_path = btrfs_alloc_path();
+ if (!dst_path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ start_slot, ins_nr, 1, 0);
+ if (ret > 0)
+ ret = 0;
+ }
+out:
+ btrfs_release_path(path);
+ btrfs_free_path(dst_path);
+ return ret;
+}
+
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
@@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
if (em->generation <= test_gen)
continue;
+ /* We log prealloc extents beyond eof later. */
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
+ em->start >= i_size_read(&inode->vfs_inode))
+ continue;
+
if (em->start < logged_start)
logged_start = em->start;
if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
num++;
}
- /*
- * Add all prealloc extents beyond the inode's i_size to make sure we
- * don't lose them after doing a fast fsync and replaying the log.
- */
- if (inode->flags & BTRFS_INODE_PREALLOC) {
- struct rb_node *node;
-
- for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
- em = rb_entry(node, struct extent_map, rb_node);
- if (em->start < i_size_read(&inode->vfs_inode))
- break;
- if (!list_empty(&em->list))
- continue;
- /* Same as above loop. */
- if (++num > 32768) {
- list_del_init(&tree->modified_extents);
- ret = -EFBIG;
- goto process;
- }
- refcount_inc(&em->refs);
- set_bit(EXTENT_FLAG_LOGGING, &em->flags);
- list_add_tail(&em->list, &extents);
- }
- }
-
list_sort(NULL, &extents, extent_cmp);
btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
/*
@@ -4443,6 +4527,9 @@ process:
up_write(&inode->dio_sem);
btrfs_release_path(path);
+ if (!ret)
+ ret = btrfs_log_prealloc_extents(trans, inode, path);
+
return ret;
}
@@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
+ bool xattrs_logged = false;
path = btrfs_alloc_path();
if (!path)
@@ -5128,6 +5216,7 @@ next_key:
err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
if (err)
goto out_unlock;
+ xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
@@ -5140,6 +5229,11 @@ log_extents:
btrfs_release_path(dst_path);
if (need_log_inode_item) {
err = log_inode_item(trans, log, dst_path, inode);
+ if (!err && !xattrs_logged) {
+ err = btrfs_log_all_xattrs(trans, root, inode, path,
+ dst_path);
+ btrfs_release_path(path);
+ }
if (err)
goto out_unlock;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 292266f6ab9c..be3fc701f389 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
return 0;
}
+ /*
+ * A ro->rw remount sequence should continue with the paused balance
+ * regardless of who pauses it, system or the user as of now, so set
+ * the resume flag.
+ */
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
+ spin_unlock(&fs_info->balance_lock);
+
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
return PTR_ERR_OR_ZERO(tsk);
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 0daa1e3fe0df..ab0bbe93b398 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -572,6 +572,11 @@ lookup_again:
if (ret < 0)
goto create_error;
+ if (unlikely(d_unhashed(next))) {
+ dput(next);
+ inode_unlock(d_inode(dir));
+ goto lookup_again;
+ }
ASSERT(d_backing_inode(next));
_debug("mkdir -> %p{%p{ino=%lu}}",
@@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
/* search the current directory for the element name */
inode_lock(d_inode(dir));
+retry:
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
cachefiles_hist(cachefiles_lookup_histogram, start);
@@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
if (ret < 0)
goto mkdir_error;
+ if (unlikely(d_unhashed(subdir))) {
+ dput(subdir);
+ goto retry;
+ }
ASSERT(d_backing_inode(subdir));
_debug("mkdir -> %p{%p{ino=%lu}}",
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
index 125b90f6c796..0ce1aa56b67f 100644
--- a/fs/cachefiles/proc.c
+++ b/fs/cachefiles/proc.c
@@ -85,21 +85,6 @@ static const struct seq_operations cachefiles_histogram_ops = {
};
/*
- * open "/proc/fs/cachefiles/XXX" which provide statistics summaries
- */
-static int cachefiles_histogram_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &cachefiles_histogram_ops);
-}
-
-static const struct file_operations cachefiles_histogram_fops = {
- .open = cachefiles_histogram_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
* initialise the /proc/fs/cachefiles/ directory
*/
int __init cachefiles_proc_init(void)
@@ -109,8 +94,8 @@ int __init cachefiles_proc_init(void)
if (!proc_mkdir("fs/cachefiles", NULL))
goto error_dir;
- if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
- &cachefiles_histogram_fops))
+ if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
+ &cachefiles_histogram_ops))
goto error_histogram;
_leave(" = 0");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f85040d73e3d..cf0e45b10121 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -70,69 +70,104 @@ static __le32 ceph_flags_sys2wire(u32 flags)
*/
/*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
+ * How many pages to get in one call to iov_iter_get_pages(). This
+ * determines the size of the on-stack array used as a buffer.
*/
-static size_t dio_get_pagev_size(const struct iov_iter *it)
+#define ITER_GET_BVECS_PAGES 64
+
+static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
+ struct bio_vec *bvecs)
{
- const struct iovec *iov = it->iov;
- const struct iovec *iovend = iov + it->nr_segs;
- size_t size;
-
- size = iov->iov_len - it->iov_offset;
- /*
- * An iov can be page vectored when both the current tail
- * and the next base are page aligned.
- */
- while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
- (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
- size += iov->iov_len;
- }
- dout("dio_get_pagevlen len = %zu\n", size);
- return size;
+ size_t size = 0;
+ int bvec_idx = 0;
+
+ if (maxsize > iov_iter_count(iter))
+ maxsize = iov_iter_count(iter);
+
+ while (size < maxsize) {
+ struct page *pages[ITER_GET_BVECS_PAGES];
+ ssize_t bytes;
+ size_t start;
+ int idx = 0;
+
+ bytes = iov_iter_get_pages(iter, pages, maxsize - size,
+ ITER_GET_BVECS_PAGES, &start);
+ if (bytes < 0)
+ return size ?: bytes;
+
+ iov_iter_advance(iter, bytes);
+ size += bytes;
+
+ for ( ; bytes; idx++, bvec_idx++) {
+ struct bio_vec bv = {
+ .bv_page = pages[idx],
+ .bv_len = min_t(int, bytes, PAGE_SIZE - start),
+ .bv_offset = start,
+ };
+
+ bvecs[bvec_idx] = bv;
+ bytes -= bv.bv_len;
+ start = 0;
+ }
+ }
+
+ return size;
}
/*
- * Allocate a page vector based on (@it, @nbytes).
- * The return value is the tuple describing a page vector,
- * that is (@pages, @page_align, @num_pages).
+ * iov_iter_get_pages() only considers one iov_iter segment, no matter
+ * what maxsize or maxpages are given. For ITER_BVEC that is a single
+ * page.
+ *
+ * Attempt to get up to @maxsize bytes worth of pages from @iter.
+ * Return the number of bytes in the created bio_vec array, or an error.
*/
-static struct page **
-dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
- size_t *page_align, int *num_pages)
+static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
+ struct bio_vec **bvecs, int *num_bvecs)
{
- struct iov_iter tmp_it = *it;
- size_t align;
- struct page **pages;
- int ret = 0, idx, npages;
+ struct bio_vec *bv;
+ size_t orig_count = iov_iter_count(iter);
+ ssize_t bytes;
+ int npages;
- align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
- (PAGE_SIZE - 1);
- npages = calc_pages_for(align, nbytes);
- pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL);
- if (!pages)
- return ERR_PTR(-ENOMEM);
+ iov_iter_truncate(iter, maxsize);
+ npages = iov_iter_npages(iter, INT_MAX);
+ iov_iter_reexpand(iter, orig_count);
- for (idx = 0; idx < npages; ) {
- size_t start;
- ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes,
- npages - idx, &start);
- if (ret < 0)
- goto fail;
+ /*
+ * __iter_get_bvecs() may populate only part of the array -- zero it
+ * out.
+ */
+ bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO);
+ if (!bv)
+ return -ENOMEM;
- iov_iter_advance(&tmp_it, ret);
- nbytes -= ret;
- idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE;
+ bytes = __iter_get_bvecs(iter, maxsize, bv);
+ if (bytes < 0) {
+ /*
+ * No pages were pinned -- just free the array.
+ */
+ kvfree(bv);
+ return bytes;
}
- BUG_ON(nbytes != 0);
- *num_pages = npages;
- *page_align = align;
- dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align);
- return pages;
-fail:
- ceph_put_page_vector(pages, idx, false);
- return ERR_PTR(ret);
+ *bvecs = bv;
+ *num_bvecs = npages;
+ return bytes;
+}
+
+static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty)
+{
+ int i;
+
+ for (i = 0; i < num_bvecs; i++) {
+ if (bvecs[i].bv_page) {
+ if (should_dirty)
+ set_page_dirty_lock(bvecs[i].bv_page);
+ put_page(bvecs[i].bv_page);
+ }
+ }
+ kvfree(bvecs);
}
/*
@@ -746,11 +781,12 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
struct inode *inode = req->r_inode;
struct ceph_aio_request *aio_req = req->r_priv;
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
- int num_pages = calc_pages_for((u64)osd_data->alignment,
- osd_data->length);
- dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
- inode, rc, osd_data->length);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
+ BUG_ON(!osd_data->num_bvecs);
+
+ dout("ceph_aio_complete_req %p rc %d bytes %u\n",
+ inode, rc, osd_data->bvec_pos.iter.bi_size);
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
@@ -768,9 +804,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
} else if (!aio_req->write) {
if (rc == -ENOENT)
rc = 0;
- if (rc >= 0 && osd_data->length > rc) {
- int zoff = osd_data->alignment + rc;
- int zlen = osd_data->length - rc;
+ if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) {
+ struct iov_iter i;
+ int zlen = osd_data->bvec_pos.iter.bi_size - rc;
+
/*
* If read is satisfied by single OSD request,
* it can pass EOF. Otherwise read is within
@@ -785,13 +822,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
aio_req->total_len = rc + zlen;
}
- if (zlen > 0)
- ceph_zero_page_vector_range(zoff, zlen,
- osd_data->pages);
+ iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+ osd_data->num_bvecs,
+ osd_data->bvec_pos.iter.bi_size);
+ iov_iter_advance(&i, rc);
+ iov_iter_zero(zlen, &i);
}
}
- ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
+ put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
+ aio_req->should_dirty);
ceph_osdc_put_request(req);
if (rc < 0)
@@ -879,7 +919,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_vino vino;
struct ceph_osd_request *req;
- struct page **pages;
+ struct bio_vec *bvecs;
struct ceph_aio_request *aio_req = NULL;
int num_pages = 0;
int flags;
@@ -914,10 +954,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
}
while (iov_iter_count(iter) > 0) {
- u64 size = dio_get_pagev_size(iter);
- size_t start = 0;
+ u64 size = iov_iter_count(iter);
ssize_t len;
+ if (write)
+ size = min_t(u64, size, fsc->mount_options->wsize);
+ else
+ size = min_t(u64, size, fsc->mount_options->rsize);
+
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
vino, pos, &size, 0,
@@ -933,18 +977,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
break;
}
- if (write)
- size = min_t(u64, size, fsc->mount_options->wsize);
- else
- size = min_t(u64, size, fsc->mount_options->rsize);
-
- len = size;
- pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
- if (IS_ERR(pages)) {
+ len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
+ if (len < 0) {
ceph_osdc_put_request(req);
- ret = PTR_ERR(pages);
+ ret = len;
break;
}
+ if (len != size)
+ osd_req_op_extent_update(req, 0, len);
/*
* To simplify error handling, allow AIO when IO within i_size
@@ -977,8 +1017,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
req->r_mtime = mtime;
}
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
- false, false);
+ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
if (aio_req) {
aio_req->total_len += len;
@@ -991,7 +1030,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
pos += len;
- iov_iter_advance(iter, len);
continue;
}
@@ -1004,25 +1042,26 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret == -ENOENT)
ret = 0;
if (ret >= 0 && ret < len && pos + ret < size) {
+ struct iov_iter i;
int zlen = min_t(size_t, len - ret,
size - pos - ret);
- ceph_zero_page_vector_range(start + ret, zlen,
- pages);
+
+ iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
+ len);
+ iov_iter_advance(&i, ret);
+ iov_iter_zero(zlen, &i);
ret += zlen;
}
if (ret >= 0)
len = ret;
}
- ceph_put_page_vector(pages, num_pages, should_dirty);
-
+ put_bvecs(bvecs, num_pages, should_dirty);
ceph_osdc_put_request(req);
if (ret < 0)
break;
pos += len;
- iov_iter_advance(iter, len);
-
if (!write && pos >= size)
break;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8bf60250309e..ae056927080d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_FILE_BUFFER|
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
- if (timespec_compare(ctime, &inode->i_ctime) > 0) {
+ if (ci->i_version == 0 ||
+ timespec_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
ctime->tv_sec, ctime->tv_nsec);
inode->i_ctime = *ctime;
}
- if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
+ if (ci->i_version == 0 ||
+ ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */
dout("mtime %ld.%09ld -> %ld.%09ld "
"tw %d -> %d\n",
@@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
new_issued = ~issued & le32_to_cpu(info->cap.caps);
/* update inode */
- ci->i_version = le64_to_cpu(info->version);
inode->i_rdev = le32_to_cpu(info->rdev);
inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
@@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
xattr_blob = NULL;
}
+ /* finally update i_version */
+ ci->i_version = le64_to_cpu(info->version);
+
inode->i_mapping->a_ops = &ceph_aops;
switch (inode->i_mode & S_IFMT) {
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 7e72348639e4..315f7e63e7cc 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -228,7 +228,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
{
- return (ci->i_max_files || ci->i_max_bytes);
+ bool ret = false;
+ spin_lock(&ci->i_ceph_lock);
+ if ((ci->i_max_files || ci->i_max_bytes) &&
+ ci->i_vino.snap == CEPH_NOSNAP &&
+ ci->i_snap_realm &&
+ ci->i_snap_realm->ino == ci->i_vino.ino)
+ ret = true;
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
}
static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
@@ -1008,14 +1016,19 @@ int __ceph_setxattr(struct inode *inode, const char *name,
char *newval = NULL;
struct ceph_inode_xattr *xattr = NULL;
int required_blob_size;
+ bool check_realm = false;
bool lock_snap_rwsem = false;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
vxattr = ceph_match_vxattr(inode, name);
- if (vxattr && vxattr->readonly)
- return -EOPNOTSUPP;
+ if (vxattr) {
+ if (vxattr->readonly)
+ return -EOPNOTSUPP;
+ if (value && !strncmp(vxattr->name, "ceph.quota", 10))
+ check_realm = true;
+ }
/* pass any unhandled ceph.* xattrs through to the MDS */
if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
@@ -1109,6 +1122,15 @@ do_sync_unlocked:
err = -EBUSY;
} else {
err = ceph_sync_setxattr(inode, name, value, size, flags);
+ if (err >= 0 && check_realm) {
+ /* check if snaprealm was created for quota inode */
+ spin_lock(&ci->i_ceph_lock);
+ if ((ci->i_max_files || ci->i_max_bytes) &&
+ !(ci->i_snap_realm &&
+ ci->i_snap_realm->ino == ci->i_vino.ino))
+ err = -EOPNOTSUPP;
+ spin_unlock(&ci->i_ceph_lock);
+ }
}
out:
ceph_free_cap_flush(prealloc_cf);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 741749a98614..5f132d59dfc2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -197,7 +197,7 @@ config CIFS_SMB311
config CIFS_SMB_DIRECT
bool "SMB Direct support (Experimental)"
- depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y
+ depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
help
Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
SMB Direct allows transferring SMB packets over RDMA. If unsure,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9d69ea433330..4bc4a7ac61d9 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -314,18 +314,6 @@ skip_rdma:
return 0;
}
-static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cifs_debug_data_proc_show, NULL);
-}
-
-static const struct file_operations cifs_debug_data_proc_fops = {
- .open = cifs_debug_data_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_CIFS_STATS
static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
@@ -497,7 +485,8 @@ cifs_proc_init(void)
if (proc_fs_cifs == NULL)
return;
- proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
+ proc_create_single("DebugData", 0, proc_fs_cifs,
+ cifs_debug_data_proc_show);
#ifdef CONFIG_CIFS_STATS
proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index fe5567655662..0e74690d11bc 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -54,7 +54,7 @@ do { \
pr_debug_ ## ratefunc("%s: " \
fmt, __FILE__, ##__VA_ARGS__); \
} else if ((type) & VFS) { \
- pr_err_ ## ratefunc("CuIFS VFS: " \
+ pr_err_ ## ratefunc("CIFS VFS: " \
fmt, ##__VA_ARGS__); \
} else if ((type) & NOISY && (NOISY != 0)) { \
pr_debug_ ## ratefunc(fmt, ##__VA_ARGS__); \
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f715609b13f3..5a5a0158cc8f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1047,6 +1047,18 @@ out:
return rc;
}
+/*
+ * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync()
+ * is a dummy operation.
+ */
+static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n",
+ file, datasync);
+
+ return 0;
+}
+
static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff,
size_t len, unsigned int flags)
@@ -1181,6 +1193,7 @@ const struct file_operations cifs_dir_ops = {
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.llseek = generic_file_llseek,
+ .fsync = cifs_dir_fsync,
};
static void
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6d3e40d7029c..1529a088383d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -455,6 +455,9 @@ cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required)
server->sign = true;
}
+ if (cifs_rdma_enabled(server) && server->sign)
+ cifs_dbg(VFS, "Signing is enabled, and RDMA read/write will be disabled");
+
return 0;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e8830f076a7f..7a10a5d0731f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1977,14 +1977,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (vol->rdma && vol->sign) {
- cifs_dbg(VFS, "Currently SMB direct doesn't support signing."
- " This is being fixed\n");
- goto cifs_parse_mount_err;
- }
-#endif
-
#ifndef CONFIG_KEYS
/* Muliuser mounts require CONFIG_KEYS support */
if (vol->multiuser) {
@@ -2959,6 +2951,22 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
}
}
+ if (volume_info->seal) {
+ if (ses->server->vals->protocol_id == 0) {
+ cifs_dbg(VFS,
+ "SMB3 or later required for encryption\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
+ } else if (tcon->ses->server->capabilities &
+ SMB2_GLOBAL_CAP_ENCRYPTION)
+ tcon->seal = true;
+ else {
+ cifs_dbg(VFS, "Encryption is not supported on share\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
+ }
+ }
+
/*
* BB Do we need to wrap session_mutex around this TCon call and Unix
* SetFS as we do on SessSetup and reconnect?
@@ -3007,22 +3015,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
tcon->use_resilient = true;
}
- if (volume_info->seal) {
- if (ses->server->vals->protocol_id == 0) {
- cifs_dbg(VFS,
- "SMB3 or later required for encryption\n");
- rc = -EOPNOTSUPP;
- goto out_fail;
- } else if (tcon->ses->server->capabilities &
- SMB2_GLOBAL_CAP_ENCRYPTION)
- tcon->seal = true;
- else {
- cifs_dbg(VFS, "Encryption is not supported on share\n");
- rc = -EOPNOTSUPP;
- goto out_fail;
- }
- }
-
/*
* We can have only one retry value for a connection to a share so for
* resources mounted more than once to the same server share the last
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 81ba6e0d88d8..925844343038 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -684,6 +684,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
goto mknod_out;
}
+ if (!S_ISCHR(mode) && !S_ISBLK(mode))
+ goto mknod_out;
+
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
goto mknod_out;
@@ -692,10 +695,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
if (buf == NULL) {
- kfree(full_path);
rc = -ENOMEM;
- free_xid(xid);
- return rc;
+ goto mknod_out;
}
if (backup_cred(cifs_sb))
@@ -742,7 +743,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
pdev->minor = cpu_to_le64(MINOR(device_number));
rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
&bytes_written, iov, 1);
- } /* else if (S_ISFIFO) */
+ }
tcon->ses->server->ops->close(xid, tcon, &fid);
d_drop(direntry);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4bcd4e838b47..23fd430fe74a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3462,7 +3462,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
* If the page is mmap'ed into a process' page tables, then we need to make
* sure that it doesn't change while being written back.
*/
-static int
+static vm_fault_t
cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b4ae932ea134..9c6d95ffca97 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -252,9 +252,14 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
#ifdef CONFIG_CIFS_SMB_DIRECT
- if (server->rdma)
- wsize = min_t(unsigned int,
+ if (server->rdma) {
+ if (server->sign)
+ wsize = min_t(unsigned int,
+ wsize, server->smbd_conn->max_fragmented_send_size);
+ else
+ wsize = min_t(unsigned int,
wsize, server->smbd_conn->max_readwrite_size);
+ }
#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
@@ -272,9 +277,14 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
#ifdef CONFIG_CIFS_SMB_DIRECT
- if (server->rdma)
- rsize = min_t(unsigned int,
+ if (server->rdma) {
+ if (server->sign)
+ rsize = min_t(unsigned int,
+ rsize, server->smbd_conn->max_fragmented_recv_size);
+ else
+ rsize = min_t(unsigned int,
rsize, server->smbd_conn->max_readwrite_size);
+ }
#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
@@ -579,9 +589,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ /*
+ * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's
+ * not an error. Otherwise, the specified ea_name was not found.
+ */
if (!rc)
rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data,
SMB2_MAX_EA_BUF, ea_name);
+ else if (!ea_name && rc == -ENODATA)
+ rc = 0;
kfree(smb2_data);
return rc;
@@ -1452,7 +1468,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
struct kvec err_iov = {NULL, 0};
- struct smb2_err_rsp *err_buf = NULL;
+ struct smb2_err_rsp *err_buf;
struct smb2_symlink_err_rsp *symlink;
unsigned int sub_len;
unsigned int sub_offset;
@@ -1476,7 +1492,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov);
- if (!rc || !err_buf) {
+ if (!rc || !err_iov.iov_base) {
kfree(utf16_path);
return -ENOENT;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0f044c4a2dc9..0f48741a0130 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -383,10 +383,10 @@ static void
build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
{
pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
- pneg_ctxt->DataLength = cpu_to_le16(6);
- pneg_ctxt->CipherCount = cpu_to_le16(2);
- pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
- pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+ pneg_ctxt->DataLength = cpu_to_le16(4); /* Cipher Count + le16 cipher */
+ pneg_ctxt->CipherCount = cpu_to_le16(1);
+/* pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;*/ /* not supported yet */
+ pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_CCM;
}
static void
@@ -444,6 +444,7 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
return -EINVAL;
}
server->cipher_type = ctxt->Ciphers[0];
+ server->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
return 0;
}
@@ -729,19 +730,14 @@ neg_exit:
int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
{
- int rc = 0;
- struct validate_negotiate_info_req vneg_inbuf;
+ int rc;
+ struct validate_negotiate_info_req *pneg_inbuf;
struct validate_negotiate_info_rsp *pneg_rsp = NULL;
u32 rsplen;
u32 inbuflen; /* max of 4 dialects */
cifs_dbg(FYI, "validate negotiate\n");
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (tcon->ses->server->rdma)
- return 0;
-#endif
-
/* In SMB3.11 preauth integrity supersedes validate negotiate */
if (tcon->ses->server->dialect == SMB311_PROT_ID)
return 0;
@@ -764,63 +760,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
- vneg_inbuf.Capabilities =
+ pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS);
+ if (!pneg_inbuf)
+ return -ENOMEM;
+
+ pneg_inbuf->Capabilities =
cpu_to_le32(tcon->ses->server->vals->req_capabilities);
- memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
+ memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid,
SMB2_CLIENT_GUID_SIZE);
if (tcon->ses->sign)
- vneg_inbuf.SecurityMode =
+ pneg_inbuf->SecurityMode =
cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
else if (global_secflags & CIFSSEC_MAY_SIGN)
- vneg_inbuf.SecurityMode =
+ pneg_inbuf->SecurityMode =
cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
else
- vneg_inbuf.SecurityMode = 0;
+ pneg_inbuf->SecurityMode = 0;
if (strcmp(tcon->ses->server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
- vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
- vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- vneg_inbuf.DialectCount = cpu_to_le16(2);
+ pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+ pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(2);
/* structure is big enough for 3 dialects, sending only 2 */
- inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+ inbuflen = sizeof(*pneg_inbuf) -
+ sizeof(pneg_inbuf->Dialects[0]);
} else if (strcmp(tcon->ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
- vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
- vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
- vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- vneg_inbuf.DialectCount = cpu_to_le16(3);
+ pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+ pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+ pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(3);
/* structure is big enough for 3 dialects */
- inbuflen = sizeof(struct validate_negotiate_info_req);
+ inbuflen = sizeof(*pneg_inbuf);
} else {
/* otherwise specific dialect was requested */
- vneg_inbuf.Dialects[0] =
+ pneg_inbuf->Dialects[0] =
cpu_to_le16(tcon->ses->server->vals->protocol_id);
- vneg_inbuf.DialectCount = cpu_to_le16(1);
+ pneg_inbuf->DialectCount = cpu_to_le16(1);
/* structure is big enough for 3 dialects, sending only 1 */
- inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+ inbuflen = sizeof(*pneg_inbuf) -
+ sizeof(pneg_inbuf->Dialects[0]) * 2;
}
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
- (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
- (char **)&pneg_rsp, &rsplen);
+ (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen);
if (rc != 0) {
cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
- return -EIO;
+ rc = -EIO;
+ goto out_free_inbuf;
}
- if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+ rc = -EIO;
+ if (rsplen != sizeof(*pneg_rsp)) {
cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
rsplen);
/* relax check since Mac returns max bufsize allowed on ioctl */
- if ((rsplen > CIFSMaxBufSize)
- || (rsplen < sizeof(struct validate_negotiate_info_rsp)))
- goto err_rsp_free;
+ if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp))
+ goto out_free_rsp;
}
/* check validate negotiate info response matches what we got earlier */
@@ -837,15 +839,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
goto vneg_out;
/* validate negotiate successful */
+ rc = 0;
cifs_dbg(FYI, "validate negotiate info successful\n");
- kfree(pneg_rsp);
- return 0;
+ goto out_free_rsp;
vneg_out:
cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
-err_rsp_free:
+out_free_rsp:
kfree(pneg_rsp);
- return -EIO;
+out_free_inbuf:
+ kfree(pneg_inbuf);
+ return rc;
}
enum securityEnum
@@ -2590,7 +2594,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
* If we want to do a RDMA write, fill in and append
* smbd_buffer_descriptor_v1 to the end of read request
*/
- if (server->rdma && rdata &&
+ if (server->rdma && rdata && !server->sign &&
rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) {
struct smbd_buffer_descriptor_v1 *v1;
@@ -2968,7 +2972,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
* If we want to do a server RDMA read, fill in and append
* smbd_buffer_descriptor_v1 to the end of write request
*/
- if (server->rdma && wdata->bytes >=
+ if (server->rdma && !server->sign && wdata->bytes >=
server->smbd_conn->rdma_readwrite_threshold) {
struct smbd_buffer_descriptor_v1 *v1;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 6093e5142b2b..d28f358022c5 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -297,7 +297,7 @@ struct smb2_encryption_neg_context {
__le16 DataLength;
__le32 Reserved;
__le16 CipherCount; /* AES-128-GCM and AES-128-CCM */
- __le16 Ciphers[2]; /* Ciphers[0] since only one used now */
+ __le16 Ciphers[1]; /* Ciphers[0] since only one used now */
} __packed;
struct smb2_negotiate_rsp {
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5008af546dd1..c62f7c95683c 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1028,7 +1028,7 @@ static int smbd_post_send(struct smbd_connection *info,
for (i = 0; i < request->num_sge; i++) {
log_rdma_send(INFO,
"rdma_request sge[%d] addr=%llu length=%u\n",
- i, request->sge[0].addr, request->sge[0].length);
+ i, request->sge[i].addr, request->sge[i].length);
ib_dma_sync_single_for_device(
info->id->device,
request->sge[i].addr,
@@ -2086,7 +2086,7 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
int start, i, j;
int max_iov_size =
info->max_send_size - sizeof(struct smbd_data_transfer);
- struct kvec iov[SMBDIRECT_MAX_SGE];
+ struct kvec *iov;
int rc;
info->smbd_send_pending++;
@@ -2096,32 +2096,20 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
}
/*
- * This usually means a configuration error
- * We use RDMA read/write for packet size > rdma_readwrite_threshold
- * as long as it's properly configured we should never get into this
- * situation
- */
- if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) {
- log_write(ERR, "maximum send segment %x exceeding %x\n",
- rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE);
- rc = -EINVAL;
- goto done;
- }
-
- /*
- * Remove the RFC1002 length defined in MS-SMB2 section 2.1
- * It is used only for TCP transport
+ * Skip the RFC1002 length defined in MS-SMB2 section 2.1
+ * It is used only for TCP transport in the iov[0]
* In future we may want to add a transport layer under protocol
* layer so this will only be issued to TCP transport
*/
- iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4;
- iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
- buflen += iov[0].iov_len;
+
+ if (rqst->rq_iov[0].iov_len != 4) {
+ log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len);
+ return -EINVAL;
+ }
+ iov = &rqst->rq_iov[1];
/* total up iov array first */
- for (i = 1; i < rqst->rq_nvec; i++) {
- iov[i].iov_base = rqst->rq_iov[i].iov_base;
- iov[i].iov_len = rqst->rq_iov[i].iov_len;
+ for (i = 0; i < rqst->rq_nvec-1; i++) {
buflen += iov[i].iov_len;
}
@@ -2139,6 +2127,10 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
goto done;
}
+ cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen);
+ for (i = 0; i < rqst->rq_nvec-1; i++)
+ dump_smb(iov[i].iov_base, iov[i].iov_len);
+
remaining_data_length = buflen;
log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
@@ -2194,12 +2186,14 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
goto done;
}
i++;
+ if (i == rqst->rq_nvec-1)
+ break;
}
start = i;
buflen = 0;
} else {
i++;
- if (i == rqst->rq_nvec) {
+ if (i == rqst->rq_nvec-1) {
/* send out all remaining vecs */
remaining_data_length -= buflen;
log_write(INFO,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 8f6f25918229..927226a2122f 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -753,7 +753,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
goto out;
#ifdef CONFIG_CIFS_SMB311
- if (ses->status == CifsNew)
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
smb311_update_preauth_hash(ses, rqst->rq_iov+1,
rqst->rq_nvec-1);
#endif
@@ -798,7 +798,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
*resp_buf_type = CIFS_SMALL_BUFFER;
#ifdef CONFIG_CIFS_SMB311
- if (ses->status == CifsNew) {
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
struct kvec iov = {
.iov_base = buf + 4,
.iov_len = get_rfc1002_length(buf)
@@ -834,8 +834,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1),
GFP_KERNEL);
- if (!new_iov)
+ if (!new_iov) {
+ /* otherwise cifs_send_recv below sets resp_buf_type */
+ *resp_buf_type = CIFS_NO_BUFFER;
return -ENOMEM;
+ }
} else
new_iov = s_iov;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 017b0ab19bc4..124b093d14e5 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
- if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) {
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
if (sbi && sbi->mtd_point_size)
mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
kill_mtd_super(sb);
diff --git a/fs/dcache.c b/fs/dcache.c
index e9476e94372a..0e8e5de3c48a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -580,6 +580,7 @@ static void __dentry_kill(struct dentry *dentry)
spin_unlock(&dentry->d_lock);
if (likely(can_free))
dentry_free(dentry);
+ cond_resched();
}
static struct dentry *__lock_parent(struct dentry *dentry)
@@ -827,30 +828,24 @@ static inline bool fast_dput(struct dentry *dentry)
*/
void dput(struct dentry *dentry)
{
- if (unlikely(!dentry))
- return;
+ while (dentry) {
+ might_sleep();
-repeat:
- might_sleep();
+ rcu_read_lock();
+ if (likely(fast_dput(dentry))) {
+ rcu_read_unlock();
+ return;
+ }
- rcu_read_lock();
- if (likely(fast_dput(dentry))) {
+ /* Slow case: now with the dentry lock held */
rcu_read_unlock();
- return;
- }
-
- /* Slow case: now with the dentry lock held */
- rcu_read_unlock();
- if (likely(retain_dentry(dentry))) {
- spin_unlock(&dentry->d_lock);
- return;
- }
+ if (likely(retain_dentry(dentry))) {
+ spin_unlock(&dentry->d_lock);
+ return;
+ }
- dentry = dentry_kill(dentry);
- if (dentry) {
- cond_resched();
- goto repeat;
+ dentry = dentry_kill(dentry);
}
}
EXPORT_SYMBOL(dput);
@@ -1066,8 +1061,6 @@ static void shrink_dentry_list(struct list_head *list)
while (!list_empty(list)) {
struct dentry *dentry, *parent;
- cond_resched();
-
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
rcu_read_lock();
@@ -1244,13 +1237,11 @@ enum d_walk_ret {
* @parent: start of walk
* @data: data passed to @enter() and @finish()
* @enter: callback when first entering the dentry
- * @finish: callback when successfully finished the walk
*
- * The @enter() and @finish() callbacks are called with d_lock held.
+ * The @enter() callbacks are called with d_lock held.
*/
static void d_walk(struct dentry *parent, void *data,
- enum d_walk_ret (*enter)(void *, struct dentry *),
- void (*finish)(void *))
+ enum d_walk_ret (*enter)(void *, struct dentry *))
{
struct dentry *this_parent;
struct list_head *next;
@@ -1339,8 +1330,6 @@ ascend:
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
rcu_read_unlock();
- if (finish)
- finish(data);
out_unlock:
spin_unlock(&this_parent->d_lock);
@@ -1389,7 +1378,7 @@ int path_has_submounts(const struct path *parent)
struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
read_seqlock_excl(&mount_lock);
- d_walk(parent->dentry, &data, path_check_mount, NULL);
+ d_walk(parent->dentry, &data, path_check_mount);
read_sequnlock_excl(&mount_lock);
return data.mounted;
@@ -1497,11 +1486,16 @@ void shrink_dcache_parent(struct dentry *parent)
data.start = parent;
data.found = 0;
- d_walk(parent, &data, select_collect, NULL);
+ d_walk(parent, &data, select_collect);
+
+ if (!list_empty(&data.dispose)) {
+ shrink_dentry_list(&data.dispose);
+ continue;
+ }
+
+ cond_resched();
if (!data.found)
break;
-
- shrink_dentry_list(&data.dispose);
}
}
EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1532,7 +1526,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
static void do_one_tree(struct dentry *dentry)
{
shrink_dcache_parent(dentry);
- d_walk(dentry, dentry, umount_check, NULL);
+ d_walk(dentry, dentry, umount_check);
d_drop(dentry);
dput(dentry);
}
@@ -1556,78 +1550,48 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
}
-struct detach_data {
- struct select_data select;
- struct dentry *mountpoint;
-};
-static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
+static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
{
- struct detach_data *data = _data;
-
+ struct dentry **victim = _data;
if (d_mountpoint(dentry)) {
__dget_dlock(dentry);
- data->mountpoint = dentry;
+ *victim = dentry;
return D_WALK_QUIT;
}
-
- return select_collect(&data->select, dentry);
-}
-
-static void check_and_drop(void *_data)
-{
- struct detach_data *data = _data;
-
- if (!data->mountpoint && list_empty(&data->select.dispose))
- __d_drop(data->select.start);
+ return D_WALK_CONTINUE;
}
/**
* d_invalidate - detach submounts, prune dcache, and drop
* @dentry: dentry to invalidate (aka detach, prune and drop)
- *
- * no dcache lock.
- *
- * The final d_drop is done as an atomic operation relative to
- * rename_lock ensuring there are no races with d_set_mounted. This
- * ensures there are no unhashed dentries on the path to a mountpoint.
*/
void d_invalidate(struct dentry *dentry)
{
- /*
- * If it's already been dropped, return OK.
- */
+ bool had_submounts = false;
spin_lock(&dentry->d_lock);
if (d_unhashed(dentry)) {
spin_unlock(&dentry->d_lock);
return;
}
+ __d_drop(dentry);
spin_unlock(&dentry->d_lock);
/* Negative dentries can be dropped without further checks */
- if (!dentry->d_inode) {
- d_drop(dentry);
+ if (!dentry->d_inode)
return;
- }
+ shrink_dcache_parent(dentry);
for (;;) {
- struct detach_data data;
-
- data.mountpoint = NULL;
- INIT_LIST_HEAD(&data.select.dispose);
- data.select.start = dentry;
- data.select.found = 0;
-
- d_walk(dentry, &data, detach_and_collect, check_and_drop);
-
- if (!list_empty(&data.select.dispose))
- shrink_dentry_list(&data.select.dispose);
- else if (!data.mountpoint)
+ struct dentry *victim = NULL;
+ d_walk(dentry, &victim, find_submount);
+ if (!victim) {
+ if (had_submounts)
+ shrink_dcache_parent(dentry);
return;
-
- if (data.mountpoint) {
- detach_mounts(data.mountpoint);
- dput(data.mountpoint);
}
+ had_submounts = true;
+ detach_mounts(victim);
+ dput(victim);
}
}
EXPORT_SYMBOL(d_invalidate);
@@ -1913,6 +1877,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
}
EXPORT_SYMBOL(d_instantiate);
+/*
+ * This should be equivalent to d_instantiate() + unlock_new_inode(),
+ * with lockdep-related part of unlock_new_inode() done before
+ * anything else. Use that instead of open-coding d_instantiate()/
+ * unlock_new_inode() combinations.
+ */
+void d_instantiate_new(struct dentry *entry, struct inode *inode)
+{
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+ BUG_ON(!inode);
+ lockdep_annotate_inode_mutex_key(inode);
+ security_d_instantiate(entry, inode);
+ spin_lock(&inode->i_lock);
+ __d_instantiate(entry, inode);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_NEW);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(d_instantiate_new);
+
/**
* d_instantiate_no_diralias - instantiate a non-aliased dentry
* @entry: dentry to complete
@@ -3097,7 +3083,7 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
void d_genocide(struct dentry *parent)
{
- d_walk(parent, parent, d_genocide_kill, NULL);
+ d_walk(parent, parent, d_genocide_kill);
}
EXPORT_SYMBOL(d_genocide);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 874607bb6e02..093fb54cd316 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -432,8 +432,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct bio *bio;
/*
- * bio_alloc() is guaranteed to return a bio when called with
- * __GFP_RECLAIM and we request a valid number of vectors.
+ * bio_alloc() is guaranteed to return a bio when allowed to sleep and
+ * we request a valid number of vectors.
*/
bio = bio_alloc(GFP_KERNEL, nr_vecs);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 846ca150d52e..4dd842f72846 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1997,6 +1997,16 @@ out:
return rc;
}
+static bool is_dot_dotdot(const char *name, size_t name_size)
+{
+ if (name_size == 1 && name[0] == '.')
+ return true;
+ else if (name_size == 2 && name[0] == '.' && name[1] == '.')
+ return true;
+
+ return false;
+}
+
/**
* ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext
* @plaintext_name: The plaintext name
@@ -2021,13 +2031,21 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
size_t packet_size;
int rc = 0;
- if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
- && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
- && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
- ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
- const char *orig_name = name;
- size_t orig_name_size = name_size;
+ if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) &&
+ !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)) {
+ if (is_dot_dotdot(name, name_size)) {
+ rc = ecryptfs_copy_filename(plaintext_name,
+ plaintext_name_size,
+ name, name_size);
+ goto out;
+ }
+
+ if (name_size <= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE ||
+ strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
+ ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)) {
+ rc = -EINVAL;
+ goto out;
+ }
name += ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
name_size -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
@@ -2047,12 +2065,9 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
decoded_name,
decoded_name_size);
if (rc) {
- printk(KERN_INFO "%s: Could not parse tag 70 packet "
- "from filename; copying through filename "
- "as-is\n", __func__);
- rc = ecryptfs_copy_filename(plaintext_name,
- plaintext_name_size,
- orig_name, orig_name_size);
+ ecryptfs_printk(KERN_DEBUG,
+ "%s: Could not parse tag 70 packet from filename\n",
+ __func__);
goto out_free;
}
} else {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c74ed3ca3372..b76a9853325e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -82,17 +82,28 @@ ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
buf->sb, lower_name,
lower_namelen);
if (rc) {
- printk(KERN_ERR "%s: Error attempting to decode and decrypt "
- "filename [%s]; rc = [%d]\n", __func__, lower_name,
- rc);
- goto out;
+ if (rc != -EINVAL) {
+ ecryptfs_printk(KERN_DEBUG,
+ "%s: Error attempting to decode and decrypt filename [%s]; rc = [%d]\n",
+ __func__, lower_name, rc);
+ return rc;
+ }
+
+ /* Mask -EINVAL errors as these are most likely due a plaintext
+ * filename present in the lower filesystem despite filename
+ * encryption being enabled. One unavoidable example would be
+ * the "lost+found" dentry in the root directory of an Ext4
+ * filesystem.
+ */
+ return 0;
}
+
buf->caller->pos = buf->ctx.pos;
rc = !dir_emit(buf->caller, name, name_size, ino, d_type);
kfree(name);
if (!rc)
buf->entries_written++;
-out:
+
return rc;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 847904aa63a9..49121e5a8de2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
iget_failed(ecryptfs_inode);
goto out;
}
- unlock_new_inode(ecryptfs_inode);
- d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+ d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
out:
return rc;
}
@@ -395,8 +394,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
mount_crypt_stat = &ecryptfs_superblock_to_private(
ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if (mount_crypt_stat
- && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) {
+ if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
rc = ecryptfs_encrypt_and_encode_filename(
&encrypted_and_encoded_name, &len,
mount_crypt_stat, name, len);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c89a58cfc991..e74fe84d0886 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1880,7 +1880,7 @@ find_next_matching_auth_tok:
candidate_auth_tok = &auth_tok_list_item->auth_tok;
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG,
- "Considering cadidate auth tok:\n");
+ "Considering candidate auth tok:\n");
ecryptfs_dump_auth_tok(candidate_auth_tok);
}
rc = ecryptfs_get_auth_tok_sig(&candidate_auth_tok_sig,
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 3c6a9c156b7a..ddbf87246898 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -790,7 +790,7 @@ int ore_create(struct ore_io_state *ios)
for (i = 0; i < ios->oc->numdevs; i++) {
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, i));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -815,7 +815,7 @@ int ore_remove(struct ore_io_state *ios)
for (i = 0; i < ios->oc->numdevs; i++) {
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, i));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -847,7 +847,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, dev));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -966,7 +966,7 @@ int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
return 0; /* Just an empty slot */
first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
- or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, first_dev));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
return -ENOMEM;
@@ -1060,7 +1060,7 @@ static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, cur_comp));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
return -ENOMEM;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 179cd5c2f52a..719a3152da80 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -229,7 +229,7 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length)
{
- struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+ struct osd_request *or = osd_start_request(od);
/* struct osd_sense_info osi = {.key = 0};*/
int ret;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 09640220fda8..047c327a6b23 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -88,11 +88,11 @@ out_unlock:
* The default page_lock and i_size verification done by non-DAX fault paths
* is sufficient because ext2 doesn't support hole punching.
*/
-static int ext2_dax_fault(struct vm_fault *vmf)
+static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
- int ret;
+ vm_fault_t ret;
if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1e01fabef130..71635909df3b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1264,21 +1264,11 @@ do_indirects:
static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
- /*
- * XXX: it seems like a bug here that we don't allow
- * IS_APPEND inode to have blocks-past-i_size trimmed off.
- * review and fix this.
- *
- * Also would be nice to be able to handle IO errors and such,
- * but that's probably too much to ask.
- */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return;
if (ext2_inode_is_fast_symlink(inode))
return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
dax_sem_down_write(EXT2_I(inode));
__ext2_truncate_blocks(inode, offset);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 55f7caadb093..152453a91877 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ext2_add_link(dentry, inode);
if (!err) {
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out:
return err;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index a33d8fb1bf2a..508b905d744d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -321,6 +321,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_grpblk_t offset;
ext4_grpblk_t next_zero_bit;
+ ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_fsblk_t blk;
ext4_fsblk_t group_first_block;
@@ -338,7 +339,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
/* check whether block bitmap block number is set */
blk = ext4_block_bitmap(sb, desc);
offset = blk - group_first_block;
- if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+ if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
/* bad block bitmap */
return blk;
@@ -346,7 +347,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
/* check whether the inode bitmap block number is set */
blk = ext4_inode_bitmap(sb, desc);
offset = blk - group_first_block;
- if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+ if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
/* bad block bitmap */
return blk;
@@ -354,8 +355,8 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
/* check whether the inode table block number is set */
blk = ext4_inode_table(sb, desc);
offset = blk - group_first_block;
- if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
- EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= sb->s_blocksize)
+ if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
+ EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit)
return blk;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a42e71203e53..229ea4da6785 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2390,7 +2390,7 @@ extern int ext4_init_inode_table(struct super_block *sb,
extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
-extern const struct file_operations ext4_seq_mb_groups_fops;
+extern const struct seq_operations ext4_mb_seq_groups_ops;
extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan;
extern int ext4_mb_init(struct super_block *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0a7315961bac..c969275ce3ee 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5329,8 +5329,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
stop = le32_to_cpu(extent->ee_block);
/*
- * In case of left shift, Don't start shifting extents until we make
- * sure the hole is big enough to accommodate the shift.
+ * For left shifts, make sure the hole on the left is big enough to
+ * accommodate the shift. For right shifts, make sure the last extent
+ * won't be shifted beyond EXT_MAX_BLOCKS.
*/
if (SHIFT == SHIFT_LEFT) {
path = ext4_find_extent(inode, start - 1, &path,
@@ -5350,9 +5351,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if ((start == ex_start && shift > ex_start) ||
(shift > start - ex_end)) {
- ext4_ext_drop_refs(path);
- kfree(path);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
+ }
+ } else {
+ if (shift > EXT_MAX_BLOCKS -
+ (stop + ext4_ext_get_actual_len(extent))) {
+ ret = -EINVAL;
+ goto out;
}
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 769a62708b1c..6884e81c1465 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2254,7 +2254,7 @@ out:
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group;
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2265,7 +2265,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group;
++*pos;
@@ -2277,7 +2277,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
int err, buddy_loaded = 0;
@@ -2330,34 +2330,13 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
{
}
-static const struct seq_operations ext4_mb_seq_groups_ops = {
+const struct seq_operations ext4_mb_seq_groups_ops = {
.start = ext4_mb_seq_groups_start,
.next = ext4_mb_seq_groups_next,
.stop = ext4_mb_seq_groups_stop,
.show = ext4_mb_seq_groups_show,
};
-static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
-{
- struct super_block *sb = PDE_DATA(inode);
- int rc;
-
- rc = seq_open(file, &ext4_mb_seq_groups_ops);
- if (rc == 0) {
- struct seq_file *m = file->private_data;
- m->private = sb;
- }
- return rc;
-
-}
-
-const struct file_operations ext4_seq_mb_groups_fops = {
- .open = ext4_mb_seq_groups_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
{
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b1f21e3a0763..4a09063ce1d2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle,
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -2651,8 +2650,7 @@ out_clear_inode:
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 185f7e61f4cf..eb104e8476f0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5886,5 +5886,6 @@ static void __exit ext4_exit_fs(void)
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
module_init(ext4_init_fs)
module_exit(ext4_exit_fs)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9ebd26c957c2..f34da0bb8f17 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -346,39 +346,9 @@ static struct kobject *ext4_root;
static struct kobject *ext4_feat;
-#define PROC_FILE_SHOW_DEFN(name) \
-static int name##_open(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \
-} \
-\
-static const struct file_operations ext4_seq_##name##_fops = { \
- .open = name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-}
-
-#define PROC_FILE_LIST(name) \
- { __stringify(name), &ext4_seq_##name##_fops }
-
-PROC_FILE_SHOW_DEFN(es_shrinker_info);
-PROC_FILE_SHOW_DEFN(options);
-
-static const struct ext4_proc_files {
- const char *name;
- const struct file_operations *fops;
-} proc_files[] = {
- PROC_FILE_LIST(options),
- PROC_FILE_LIST(es_shrinker_info),
- PROC_FILE_LIST(mb_groups),
- { NULL, NULL },
-};
-
int ext4_register_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct ext4_proc_files *p;
int err;
init_completion(&sbi->s_kobj_unregister);
@@ -392,11 +362,14 @@ int ext4_register_sysfs(struct super_block *sb)
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-
if (sbi->s_proc) {
- for (p = proc_files; p->name; p++)
- proc_create_data(p->name, S_IRUGO, sbi->s_proc,
- p->fops, sb);
+ proc_create_single_data("options", S_IRUGO, sbi->s_proc,
+ ext4_seq_options_show, sb);
+ proc_create_single_data("es_shrinker_info", S_IRUGO,
+ sbi->s_proc, ext4_seq_es_shrinker_info_show,
+ sb);
+ proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+ &ext4_mb_seq_groups_ops, sb);
}
return 0;
}
@@ -404,13 +377,9 @@ int ext4_register_sysfs(struct super_block *sb)
void ext4_unregister_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct ext4_proc_files *p;
- if (sbi->s_proc) {
- for (p = proc_files; p->name; p++)
- remove_proc_entry(p->name, sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
+ if (sbi->s_proc)
+ remove_proc_subtree(sb->s_id, ext4_proc_root);
kobject_del(&sbi->s_kobj);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d5098efe577c..75e37fd720b2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
alloc_nid_done(sbi, ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
/*
* Let's flush symlink data in order to avoid broken symlink as much as
@@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
alloc_nid_done(sbi, inode->i_ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
alloc_nid_done(sbi, inode->i_ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f33a56d6e6dd..4b47ca6296a7 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -572,23 +572,6 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-#define F2FS_PROC_FILE_DEF(_name) \
-static int _name##_open_fs(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
-} \
- \
-static const struct file_operations f2fs_seq_##_name##_fops = { \
- .open = _name##_open_fs, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-F2FS_PROC_FILE_DEF(iostat_info);
-
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -632,12 +615,12 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
if (sbi->s_proc) {
- proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_info_fops, sb);
- proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_bits_fops, sb);
- proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_iostat_info_fops, sb);
+ proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
+ segment_info_seq_show, sb);
+ proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
+ segment_bits_seq_show, sb);
+ proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
+ iostat_info_seq_show, sb);
}
return 0;
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index f2728a4a03a1..b03f57b1105b 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -238,21 +238,9 @@ static int filesystems_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int filesystems_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, filesystems_proc_show, NULL);
-}
-
-static const struct file_operations filesystems_proc_fops = {
- .open = filesystems_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_filesystems_init(void)
{
- proc_create("filesystems", 0, NULL, &filesystems_proc_fops);
+ proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
return 0;
}
module_init(proc_filesystems_init);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4b12ba70a895..471d863958bc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits)
*/
if (inode && inode_to_wb_is_valid(inode)) {
struct bdi_writeback *wb;
- bool locked, congested;
+ struct wb_lock_cookie lock_cookie = {};
+ bool congested;
- wb = unlocked_inode_to_wb_begin(inode, &locked);
+ wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
congested = wb_congested(wb, cong_bits);
- unlocked_inode_to_wb_end(inode, locked);
+ unlocked_inode_to_wb_end(inode, &lock_cookie);
return congested;
}
@@ -1960,7 +1961,7 @@ void wb_workfn(struct work_struct *work)
}
if (!list_empty(&wb->work_list))
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ wb_wakeup(wb);
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
wb_wakeup_delayed(wb);
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
index 15a3d042247e..9a13e9e15b69 100644
--- a/fs/fscache/histogram.c
+++ b/fs/fscache/histogram.c
@@ -83,24 +83,9 @@ static void fscache_histogram_stop(struct seq_file *m, void *v)
{
}
-static const struct seq_operations fscache_histogram_ops = {
+const struct seq_operations fscache_histogram_ops = {
.start = fscache_histogram_start,
.stop = fscache_histogram_stop,
.next = fscache_histogram_next,
.show = fscache_histogram_show,
};
-
-/*
- * open "/proc/fs/fscache/histogram" to provide latency data
- */
-static int fscache_histogram_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &fscache_histogram_ops);
-}
-
-const struct file_operations fscache_histogram_fops = {
- .open = fscache_histogram_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 500650f938fe..f83328a7f048 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -31,6 +31,7 @@
#include <linux/fscache-cache.h>
#include <trace/events/fscache.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#define FSCACHE_MIN_THREADS 4
#define FSCACHE_MAX_THREADS 32
@@ -84,7 +85,7 @@ static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif)
atomic_inc(&histogram[jif]);
}
-extern const struct file_operations fscache_histogram_fops;
+extern const struct seq_operations fscache_histogram_ops;
#else
#define fscache_hist(hist, start_jif) do {} while (0)
@@ -294,7 +295,7 @@ static inline void fscache_stat_d(atomic_t *stat)
#define __fscache_stat(stat) (stat)
-extern const struct file_operations fscache_stats_fops;
+int fscache_stats_show(struct seq_file *m, void *v);
#else
#define __fscache_stat(stat) (NULL)
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index 1d9e4951a597..49a8c90414bc 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -26,14 +26,14 @@ int __init fscache_proc_init(void)
goto error_dir;
#ifdef CONFIG_FSCACHE_STATS
- if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL,
- &fscache_stats_fops))
+ if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
+ fscache_stats_show))
goto error_stats;
#endif
#ifdef CONFIG_FSCACHE_HISTOGRAM
- if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL,
- &fscache_histogram_fops))
+ if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL,
+ &fscache_histogram_ops))
goto error_histogram;
#endif
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index fcc8c2f2690e..00564a1dfd76 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -138,7 +138,7 @@ atomic_t fscache_n_cache_culled_objects;
/*
* display the general statistics
*/
-static int fscache_stats_show(struct seq_file *m, void *v)
+int fscache_stats_show(struct seq_file *m, void *v)
{
seq_puts(m, "FS-Cache statistics\n");
@@ -284,18 +284,3 @@ static int fscache_stats_show(struct seq_file *m, void *v)
atomic_read(&fscache_n_cache_culled_objects));
return 0;
}
-
-/*
- * open "/proc/fs/fscache/stats" allowing provision of a statistical summary
- */
-static int fscache_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fscache_stats_show, NULL);
-}
-
-const struct file_operations fscache_stats_fops = {
- .open = fscache_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 513c357c734b..a6c0f54c48c3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
return 0;
out_put_hidden_dir:
+ cancel_delayed_work_sync(&sbi->sync_work);
iput(sbi->hidden_dir);
out_put_root:
dput(sb->s_root);
diff --git a/fs/inode.c b/fs/inode.c
index 13ceb98c3bd3..3b55391072f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -178,6 +178,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
diff --git a/fs/internal.h b/fs/internal.h
index e08972db0303..980d005b21b4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,6 +125,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
+extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 9bb2fe35799d..10205ececc27 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/bio.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/zlib.h>
@@ -59,7 +60,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
>> bufshift;
int haveblocks;
blkcnt_t blocknum;
- struct buffer_head *bhs[needblocks + 1];
+ struct buffer_head **bhs;
int curbh, curpage;
if (block_size > deflateBound(1UL << zisofs_block_shift)) {
@@ -80,7 +81,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
/* Because zlib is not thread-safe, do all the I/O at the top. */
blocknum = block_start >> bufshift;
- memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
+ bhs = kcalloc(needblocks + 1, sizeof(*bhs), GFP_KERNEL);
+ if (!bhs) {
+ *errp = -ENOMEM;
+ return 0;
+ }
haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs);
@@ -190,6 +195,7 @@ z_eio:
b_eio:
for (i = 0; i < haveblocks; i++)
brelse(bhs[i]);
+ kfree(bhs);
return stream.total_out;
}
@@ -305,7 +311,7 @@ static int zisofs_readpage(struct file *file, struct page *page)
unsigned int zisofs_pages_per_cblock =
PAGE_SHIFT <= zisofs_block_shift ?
(1 << (zisofs_block_shift - PAGE_SHIFT)) : 0;
- struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
+ struct page **pages;
pgoff_t index = page->index, end_index;
end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -330,6 +336,12 @@ static int zisofs_readpage(struct file *file, struct page *page)
full_page = 0;
pcount = 1;
}
+ pages = kcalloc(max_t(unsigned int, zisofs_pages_per_cblock, 1),
+ sizeof(*pages), GFP_KERNEL);
+ if (!pages) {
+ unlock_page(page);
+ return -ENOMEM;
+ }
pages[full_page] = page;
for (i = 0; i < pcount; i++, index++) {
@@ -357,6 +369,7 @@ static int zisofs_readpage(struct file *file, struct page *page)
}
/* At this point, err contains 0 or -EIO depending on the "critical" page */
+ kfree(pages);
return err;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bc258a4402f6..ec3fba7d492f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -394,7 +394,10 @@ static int parse_options(char *options, struct iso9660_options *popt)
break;
#ifdef CONFIG_JOLIET
case Opt_iocharset:
+ kfree(popt->iocharset);
popt->iocharset = match_strdup(&args[0]);
+ if (!popt->iocharset)
+ return 0;
break;
#endif
case Opt_map_a:
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index ac311037d7a5..8aa453784402 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -532,6 +532,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
*/
ret = start_this_handle(journal, handle, GFP_NOFS);
if (ret < 0) {
+ handle->h_journal = journal;
jbd2_journal_free_reserved(handle);
return ret;
}
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 0a754f38462e..e5a6deb38e1e 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
__func__, inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f60dee7faf03..87bdf0f4cba1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -342,7 +342,7 @@ static void jffs2_put_super (struct super_block *sb)
static void jffs2_kill_sb(struct super_block *sb)
{
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
- if (!sb_rdonly(sb))
+ if (c && !sb_rdonly(sb))
jffs2_stop_garbage_collect_thread(c);
kill_mtd_super(sb);
kfree(c);
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index a70907606025..35a5b2a81ae0 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -29,7 +29,6 @@
#ifdef PROC_FS_JFS /* see jfs_debug.h */
-static struct proc_dir_entry *base;
#ifdef CONFIG_JFS_DEBUG
static int jfs_loglevel_proc_show(struct seq_file *m, void *v)
{
@@ -66,43 +65,29 @@ static const struct file_operations jfs_loglevel_proc_fops = {
};
#endif
-static struct {
- const char *name;
- const struct file_operations *proc_fops;
-} Entries[] = {
-#ifdef CONFIG_JFS_STATISTICS
- { "lmstats", &jfs_lmstats_proc_fops, },
- { "txstats", &jfs_txstats_proc_fops, },
- { "xtstat", &jfs_xtstat_proc_fops, },
- { "mpstat", &jfs_mpstat_proc_fops, },
-#endif
-#ifdef CONFIG_JFS_DEBUG
- { "TxAnchor", &jfs_txanchor_proc_fops, },
- { "loglevel", &jfs_loglevel_proc_fops }
-#endif
-};
-#define NPROCENT ARRAY_SIZE(Entries)
-
void jfs_proc_init(void)
{
- int i;
+ struct proc_dir_entry *base;
- if (!(base = proc_mkdir("fs/jfs", NULL)))
+ base = proc_mkdir("fs/jfs", NULL);
+ if (!base)
return;
- for (i = 0; i < NPROCENT; i++)
- proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
+#ifdef CONFIG_JFS_STATISTICS
+ proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show);
+ proc_create_single("txstats", 0, base, jfs_txstats_proc_show);
+ proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show);
+ proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show);
+#endif
+#ifdef CONFIG_JFS_DEBUG
+ proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show);
+ proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops);
+#endif
}
void jfs_proc_clean(void)
{
- int i;
-
- if (base) {
- for (i = 0; i < NPROCENT; i++)
- remove_proc_entry(Entries[i].name, base);
- remove_proc_entry("fs/jfs", NULL);
- }
+ remove_proc_subtree("fs/jfs", NULL);
}
#endif /* PROC_FS_JFS */
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index eafd1300a00b..0d9e35da8462 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,7 @@ extern void jfs_proc_clean(void);
extern int jfsloglevel;
-extern const struct file_operations jfs_txanchor_proc_fops;
+int jfs_txanchor_proc_show(struct seq_file *m, void *v);
/* information message: e.g., configuration, major event */
#define jfs_info(fmt, arg...) do { \
@@ -105,10 +105,10 @@ extern const struct file_operations jfs_txanchor_proc_fops;
* ----------
*/
#ifdef CONFIG_JFS_STATISTICS
-extern const struct file_operations jfs_lmstats_proc_fops;
-extern const struct file_operations jfs_txstats_proc_fops;
-extern const struct file_operations jfs_mpstat_proc_fops;
-extern const struct file_operations jfs_xtstat_proc_fops;
+int jfs_lmstats_proc_show(struct seq_file *m, void *v);
+int jfs_txstats_proc_show(struct seq_file *m, void *v);
+int jfs_mpstat_proc_show(struct seq_file *m, void *v);
+int jfs_xtstat_proc_show(struct seq_file *m, void *v);
#define INCREMENT(x) ((x)++)
#define DECREMENT(x) ((x)--)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0e5d412c0b01..6b68df395892 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2493,7 +2493,7 @@ exit:
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
+int jfs_lmstats_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Logmgr stats\n"
@@ -2510,16 +2510,4 @@ static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
lmStat.partial_page);
return 0;
}
-
-static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_lmstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_lmstats_proc_fops = {
- .open = jfs_lmstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_JFS_STATISTICS */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 1a3b0cc22ad3..fa2c6824c7f2 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -815,7 +815,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
+int jfs_mpstat_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Metapage statistics\n"
@@ -828,16 +828,4 @@ static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
mpStat.lockwait);
return 0;
}
-
-static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_mpstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_mpstat_proc_fops = {
- .open = jfs_mpstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 4d973524c887..a5663cb621d8 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2998,7 +2998,7 @@ int jfs_sync(void *arg)
}
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
-static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
+int jfs_txanchor_proc_show(struct seq_file *m, void *v)
{
char *freewait;
char *freelockwait;
@@ -3032,22 +3032,10 @@ static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
return 0;
}
-
-static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_txanchor_proc_show, NULL);
-}
-
-const struct file_operations jfs_txanchor_proc_fops = {
- .open = jfs_txanchor_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
-static int jfs_txstats_proc_show(struct seq_file *m, void *v)
+int jfs_txstats_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS TxStats\n"
@@ -3072,16 +3060,4 @@ static int jfs_txstats_proc_show(struct seq_file *m, void *v)
TxStat.txLockAlloc_freelock);
return 0;
}
-
-static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_txstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_txstats_proc_fops = {
- .open = jfs_txstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 5cde6d2fcfca..2c200b5256a6 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3874,7 +3874,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
+int jfs_xtstat_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Xtree statistics\n"
@@ -3887,16 +3887,4 @@ static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
xtStat.split);
return 0;
}
-
-static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_xtstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_xtstat_proc_fops = {
- .open = jfs_xtstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b41596d71858..56c3fcbfe80e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out1:
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 26dd9a50f383..ff2716f9322e 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
+ INIT_LIST_HEAD(&info->node);
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
&init_user_ns, info);
diff --git a/fs/locks.c b/fs/locks.c
index 62bbe8b31f26..05e211be8684 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2788,22 +2788,10 @@ static const struct seq_operations locks_seq_operations = {
.show = locks_show,
};
-static int locks_open(struct inode *inode, struct file *filp)
-{
- return seq_open_private(filp, &locks_seq_operations,
- sizeof(struct locks_iterator));
-}
-
-static const struct file_operations proc_locks_operations = {
- .open = locks_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static int __init proc_locks_init(void)
{
- proc_create("locks", 0, NULL, &proc_locks_operations);
+ proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
+ sizeof(struct locks_iterator), NULL);
return 0;
}
fs_initcall(proc_locks_init);
diff --git a/fs/namei.c b/fs/namei.c
index 6f0dc40f88c5..a59968de1636 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3365,7 +3365,9 @@ finish_open_created:
goto out;
*opened |= FILE_OPENED;
opened:
- error = ima_file_check(file, op->acc_mode, *opened);
+ error = open_check_o_direct(file);
+ if (!error)
+ error = ima_file_check(file, op->acc_mode, *opened);
if (!error && will_truncate)
error = handle_truncate(file);
out:
@@ -3445,6 +3447,9 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = finish_open(file, child, NULL, opened);
if (error)
goto out2;
+ error = open_check_o_direct(file);
+ if (error)
+ fput(file);
out2:
mnt_drop_write(path.mnt);
out:
@@ -3845,11 +3850,11 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
if (error)
goto out;
- shrink_dcache_parent(dentry);
error = dir->i_op->rmdir(dir, dentry);
if (error)
goto out;
+ shrink_dcache_parent(dentry);
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
@@ -4432,8 +4437,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dir->i_nlink >= max_links)
goto out;
}
- if (is_dir && !(flags & RENAME_EXCHANGE) && target)
- shrink_dcache_parent(new_dentry);
if (!is_dir) {
error = try_break_deleg(source, delegated_inode);
if (error)
@@ -4450,8 +4453,10 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
if (!(flags & RENAME_EXCHANGE) && target) {
- if (is_dir)
+ if (is_dir) {
+ shrink_dcache_parent(new_dentry);
target->i_flags |= S_DEAD;
+ }
dont_mount(new_dentry);
detach_mounts(new_dentry);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index e398f32d7541..5f75969adff1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1089,7 +1089,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
goto out_free;
}
- mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+ mnt->mnt.mnt_flags = old->mnt.mnt_flags;
+ mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
/* Don't allow unprivileged users to change mount flags */
if (flag & CL_UNPRIVILEGED) {
mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
@@ -2814,7 +2815,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_STRICTATIME)
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
- if (flags & SB_RDONLY)
+ if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
/* The default atime for remount is preservation */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b9129e2befea..bbc91d7ca1bd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1067,7 +1067,6 @@ void nfs_clients_init(struct net *net)
}
#ifdef CONFIG_PROC_FS
-static int nfs_server_list_open(struct inode *inode, struct file *file);
static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
static void nfs_server_list_stop(struct seq_file *p, void *v);
@@ -1080,14 +1079,6 @@ static const struct seq_operations nfs_server_list_ops = {
.show = nfs_server_list_show,
};
-static const struct file_operations nfs_server_list_fops = {
- .open = nfs_server_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-static int nfs_volume_list_open(struct inode *inode, struct file *file);
static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
static void nfs_volume_list_stop(struct seq_file *p, void *v);
@@ -1100,23 +1091,6 @@ static const struct seq_operations nfs_volume_list_ops = {
.show = nfs_volume_list_show,
};
-static const struct file_operations nfs_volume_list_fops = {
- .open = nfs_volume_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-/*
- * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
- * we're dealing
- */
-static int nfs_server_list_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfs_server_list_ops,
- sizeof(struct seq_net_private));
-}
-
/*
* set up the iterator to start reading from the server list and return the first item
*/
@@ -1185,15 +1159,6 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
- */
-static int nfs_volume_list_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfs_volume_list_ops,
- sizeof(struct seq_net_private));
-}
-
-/*
* set up the iterator to start reading from the volume list and return the first item
*/
static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
@@ -1278,14 +1243,14 @@ int nfs_fs_proc_net_init(struct net *net)
goto error_0;
/* a file of servers with which we're dealing */
- p = proc_create("servers", S_IFREG|S_IRUGO,
- nn->proc_nfsfs, &nfs_server_list_fops);
+ p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+ &nfs_server_list_ops, sizeof(struct seq_net_private));
if (!p)
goto error_1;
/* a file of volumes that we have mounted */
- p = proc_create("volumes", S_IFREG|S_IRUGO,
- nn->proc_nfsfs, &nfs_volume_list_fops);
+ p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+ &nfs_volume_list_ops, sizeof(struct seq_net_private));
if (!p)
goto error_1;
return 0;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 70b8bf781fce..a43dfedd69ec 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -227,7 +227,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
if (!buf)
return -ENOMEM;
- rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+ rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
error = -ENOMEM;
goto out_free_buf;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2410b093a2e6..b0555d7d8200 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
break;
case S_IFDIR:
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+ if (!host_err && unlikely(d_unhashed(dchild))) {
+ struct dentry *d;
+ d = lookup_one_len(dchild->d_name.name,
+ dchild->d_parent,
+ dchild->d_name.len);
+ if (IS_ERR(d)) {
+ host_err = PTR_ERR(d);
+ break;
+ }
+ if (unlikely(d_is_negative(d))) {
+ dput(d);
+ err = nfserr_serverfault;
+ goto out;
+ }
+ dput(resfhp->fh_dentry);
+ resfhp->fh_dentry = dget(d);
+ err = fh_update(resfhp);
+ dput(dchild);
+ dchild = d;
+ if (err)
+ goto out;
+ }
break;
case S_IFCHR:
case S_IFBLK:
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1a2894aa0194..dd52d3f82e8d 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
int err = nilfs_add_link(dentry, inode);
if (!err) {
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
nilfs_mark_inode_dirty(inode);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d51e1bb781cf..d94e8031fe5f 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
u32 event_mask,
const void *data, int data_type)
{
- __u32 marks_mask, marks_ignored_mask;
+ __u32 marks_mask = 0, marks_ignored_mask = 0;
const struct path *path = data;
pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
@@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
!d_can_lookup(path->dentry))
return false;
- if (inode_mark && vfsmnt_mark) {
- marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
- marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
- } else if (inode_mark) {
- /*
- * if the event is for a child and this inode doesn't care about
- * events on the child, don't send it!
- */
- if ((event_mask & FS_EVENT_ON_CHILD) &&
- !(inode_mark->mask & FS_EVENT_ON_CHILD))
- return false;
- marks_mask = inode_mark->mask;
- marks_ignored_mask = inode_mark->ignored_mask;
- } else if (vfsmnt_mark) {
- marks_mask = vfsmnt_mark->mask;
- marks_ignored_mask = vfsmnt_mark->ignored_mask;
- } else {
- BUG();
+ /*
+ * if the event is for a child and this inode doesn't care about
+ * events on the child, don't send it!
+ */
+ if (inode_mark &&
+ (!(event_mask & FS_EVENT_ON_CHILD) ||
+ (inode_mark->mask & FS_EVENT_ON_CHILD))) {
+ marks_mask |= inode_mark->mask;
+ marks_ignored_mask |= inode_mark->ignored_mask;
+ }
+
+ if (vfsmnt_mark) {
+ marks_mask |= vfsmnt_mark->mask;
+ marks_ignored_mask |= vfsmnt_mark->ignored_mask;
}
if (d_is_dir(path->dentry) &&
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 219b269c737e..613ec7e5a465 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -192,8 +192,9 @@ static int send_to_group(struct inode *to_tell,
struct fsnotify_iter_info *iter_info)
{
struct fsnotify_group *group = NULL;
- __u32 inode_test_mask = 0;
- __u32 vfsmount_test_mask = 0;
+ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+ __u32 marks_mask = 0;
+ __u32 marks_ignored_mask = 0;
if (unlikely(!inode_mark && !vfsmount_mark)) {
BUG();
@@ -213,29 +214,25 @@ static int send_to_group(struct inode *to_tell,
/* does the inode mark tell us to do something? */
if (inode_mark) {
group = inode_mark->group;
- inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
- inode_test_mask &= inode_mark->mask;
- inode_test_mask &= ~inode_mark->ignored_mask;
+ marks_mask |= inode_mark->mask;
+ marks_ignored_mask |= inode_mark->ignored_mask;
}
/* does the vfsmount_mark tell us to do something? */
if (vfsmount_mark) {
- vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
group = vfsmount_mark->group;
- vfsmount_test_mask &= vfsmount_mark->mask;
- vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
- if (inode_mark)
- vfsmount_test_mask &= ~inode_mark->ignored_mask;
+ marks_mask |= vfsmount_mark->mask;
+ marks_ignored_mask |= vfsmount_mark->ignored_mask;
}
pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
- " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
+ " vfsmount_mark=%p marks_mask=%x marks_ignored_mask=%x"
" data=%p data_is=%d cookie=%d\n",
- __func__, group, to_tell, mask, inode_mark,
- inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
+ __func__, group, to_tell, mask, inode_mark, vfsmount_mark,
+ marks_mask, marks_ignored_mask, data,
data_is, cookie);
- if (!inode_test_mask && !vfsmount_test_mask)
+ if (!(test_mask & marks_mask & ~marks_ignored_mask))
return 0;
return group->ops->handle_event(group, to_tell, inode_mark,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 91a8889abf9b..ea8c551bcd7e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
current_page, vec_len, vec_start);
len = bio_add_page(bio, page, vec_len, vec_start);
- if (len != vec_len) {
- mlog(ML_ERROR, "Adding page[%d] to bio failed, "
- "page %p, len %d, vec_len %u, vec_start %u, "
- "bi_sector %llu\n", current_page, page, len,
- vec_len, vec_start,
- (unsigned long long)bio->bi_iter.bi_sector);
- bio_put(bio);
- bio = ERR_PTR(-EIO);
- return bio;
- }
+ if (len != vec_len) break;
cs += vec_len / (PAGE_SIZE/spp);
vec_start = 0;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 01c6b3894406..7869622af22a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4250,10 +4250,11 @@ out:
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve)
{
- int error;
+ int error, had_lock;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
+ struct ocfs2_lock_holder oh;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}
+ had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
+ &oh);
+ if (had_lock < 0) {
+ error = had_lock;
+ mlog_errno(error);
+ goto out;
+ }
+
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
if (error)
mlog_errno(error);
}
-out:
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
if (error)
mlog_errno(error);
}
+ ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
+out:
if (new_orphan_inode) {
/*
* We need to open_unlock the inode no matter whether we
diff --git a/fs/open.c b/fs/open.c
index c5ee7cd60424..d0e955b558ad 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,6 +724,16 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}
+int open_check_o_direct(struct file *f)
+{
+ /* NB: we're sure to have correct a_ops only after f_op->open */
+ if (f->f_flags & O_DIRECT) {
+ if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *),
@@ -745,7 +755,7 @@ static int do_dentry_open(struct file *f,
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH;
f->f_op = &empty_fops;
- goto done;
+ return 0;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
@@ -798,12 +808,7 @@ static int do_dentry_open(struct file *f,
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
-done:
- /* NB: we're sure to have correct a_ops only after f_op->open */
- error = -EINVAL;
- if ((f->f_flags & O_DIRECT) &&
- (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO))
- goto out_fput;
+
return 0;
cleanup_all:
@@ -818,9 +823,6 @@ cleanup_file:
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
-out_fput:
- fput(f);
- return error;
}
/**
@@ -918,14 +920,20 @@ struct file *dentry_open(const struct path *path, int flags,
BUG_ON(!path->mnt);
f = get_empty_filp();
- if (IS_ERR(f))
- return f;
-
- f->f_flags = flags;
- error = vfs_open(path, f, cred);
- if (error) {
- put_filp(f);
- return ERR_PTR(error);
+ if (!IS_ERR(f)) {
+ f->f_flags = flags;
+ error = vfs_open(path, f, cred);
+ if (!error) {
+ /* from now on we need fput() to dispose of f */
+ error = open_check_o_direct(f);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ } else {
+ put_filp(f);
+ f = ERR_PTR(error);
+ }
}
return f;
}
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 6e3134e6d98a..1b5707c44c3f 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir,
get_khandle_from_ino(inode),
dentry);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir,
"Assigned symlink inode new number of %pU\n",
get_khandle_from_ino(inode));
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
"Assigned dir inode new number of %pU\n",
get_khandle_from_ino(inode));
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 3ae5fdba0225..10796d3fe27d 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -579,6 +579,11 @@ void orangefs_kill_sb(struct super_block *sb)
/* provided sb cleanup */
kill_anon_super(sb);
+ if (!ORANGEFS_SB(sb)) {
+ mutex_lock(&orangefs_request_mutex);
+ mutex_unlock(&orangefs_request_mutex);
+ return;
+ }
/*
* issue the unmount to userspace to tell it to remove the
* dynamic mount info it has for this superblock
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..e6d7f41b6684 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -85,6 +85,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+ case -EINVAL:
+ seq_printf(m, "unknown");
+ break;
+ case PR_SPEC_NOT_AFFECTED:
+ seq_printf(m, "not vulnerable");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+ seq_printf(m, "thread mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+ seq_printf(m, "thread vulnerable");
+ break;
+ case PR_SPEC_DISABLE:
+ seq_printf(m, "globally mitigated");
+ break;
+ default:
+ seq_printf(m, "vulnerable");
+ break;
+ }
seq_putc(m, '\n');
}
@@ -677,25 +702,22 @@ out:
static int children_seq_show(struct seq_file *seq, void *v)
{
- struct inode *inode = seq->private;
- pid_t pid;
-
- pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
- seq_printf(seq, "%d ", pid);
+ struct inode *inode = file_inode(seq->file);
+ seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
return 0;
}
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
{
- return get_children_pid(seq->private, NULL, *pos);
+ return get_children_pid(file_inode(seq->file), NULL, *pos);
}
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct pid *pid;
- pid = get_children_pid(seq->private, v, *pos + 1);
+ pid = get_children_pid(file_inode(seq->file), v, *pos + 1);
put_pid(v);
++*pos;
@@ -716,17 +738,7 @@ static const struct seq_operations children_seq_ops = {
static int children_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &children_seq_ops);
- if (ret)
- return ret;
-
- m = file->private_data;
- m->private = inode;
-
- return ret;
+ return seq_open(file, &children_seq_ops);
}
const struct file_operations proc_tid_children_operations = {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index eafa39a3a88c..4e35593546b1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
* Inherently racy -- command line shares address space
* with code and data.
*/
- rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+ rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
if (rv <= 0)
goto out_free_page;
@@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
int nr_read;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -698,7 +698,7 @@ static bool has_pid_permissions(struct pid_namespace *pid,
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = inode->i_sb->s_fs_info;
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
bool has_perms;
@@ -733,13 +733,11 @@ static const struct inode_operations proc_def_inode_operations = {
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns;
- struct pid *pid;
+ struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
- ns = inode->i_sb->s_fs_info;
- pid = proc_pid(inode);
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return -ESRCH;
@@ -946,7 +944,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
if (retval <= 0) {
ret = retval;
@@ -1410,7 +1408,7 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
struct task_struct *p;
p = get_proc_task(inode);
@@ -1693,6 +1691,12 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
kuid_t uid;
kgid_t gid;
+ if (unlikely(task->flags & PF_KTHREAD)) {
+ *ruid = GLOBAL_ROOT_UID;
+ *rgid = GLOBAL_ROOT_GID;
+ return;
+ }
+
/* Default to the tasks effective ownership */
rcu_read_lock();
cred = __task_cred(task);
@@ -1776,8 +1780,8 @@ int pid_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
- struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -2331,7 +2335,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = inode->i_sb->s_fs_info;
+ tp->ns = proc_pid_ns(inode);
return 0;
}
@@ -3233,7 +3237,7 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file));
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -3582,7 +3586,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = inode->i_sb->s_fs_info;
+ ns = proc_pid_ns(inode);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 8233e7af9389..fa762c5fbcb2 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cmdline_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cmdline_proc_show, NULL);
-}
-
-static const struct file_operations cmdline_proc_fops = {
- .open = cmdline_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_cmdline_init(void)
{
- proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
+ proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
return 0;
}
fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index a8ac48aebd59..954caf0b7fee 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = {
.show = show_console_dev
};
-static int consoles_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &consoles_op);
-}
-
-static const struct file_operations proc_consoles_operations = {
- .open = consoles_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_consoles_init(void)
{
- proc_create("consoles", 0, NULL, &proc_consoles_operations);
+ proc_create_seq("consoles", 0, NULL, &consoles_op);
return 0;
}
fs_initcall(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 2c7f22b14489..37d38697eaf8 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = {
.show = devinfo_show
};
-static int devinfo_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &devinfo_ops);
-}
-
-static const struct file_operations proc_devinfo_operations = {
- .open = devinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_devices_init(void)
{
- proc_create("devices", 0, NULL, &proc_devinfo_operations);
+ proc_create_seq("devices", 0, NULL, &devinfo_ops);
return 0;
}
fs_initcall(proc_devices_init);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2078e70e1595..02bb1914f5f7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/uaccess.h>
+#include <linux/seq_file.h>
#include "internal.h"
@@ -346,13 +347,12 @@ static const struct inode_operations proc_dir_inode_operations = {
.setattr = proc_notify_change,
};
-static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+/* returns the registered entry, or frees dp and returns NULL on failure */
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp)
{
- int ret;
-
- ret = proc_alloc_inum(&dp->low_ino);
- if (ret)
- return ret;
+ if (proc_alloc_inum(&dp->low_ino))
+ goto out_free_entry;
write_lock(&proc_subdir_lock);
dp->parent = dir;
@@ -360,12 +360,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
write_unlock(&proc_subdir_lock);
- proc_free_inum(dp->low_ino);
- return -EEXIST;
+ goto out_free_inum;
}
write_unlock(&proc_subdir_lock);
- return 0;
+ return dp;
+out_free_inum:
+ proc_free_inum(dp->low_ino);
+out_free_entry:
+ pde_free(dp);
+ return NULL;
}
static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
@@ -443,10 +447,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
if (ent->data) {
strcpy((char*)ent->data,dest);
ent->proc_iops = &proc_link_inode_operations;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
- ent = NULL;
- }
+ ent = proc_register(parent, ent);
} else {
pde_free(ent);
ent = NULL;
@@ -470,11 +471,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
@@ -505,47 +504,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->proc_fops = NULL;
ent->proc_iops = NULL;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
EXPORT_SYMBOL(proc_create_mount_point);
-struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
- struct proc_dir_entry *parent,
- const struct file_operations *proc_fops,
- void *data)
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data)
{
- struct proc_dir_entry *pde;
+ struct proc_dir_entry *p;
+
if ((mode & S_IFMT) == 0)
mode |= S_IFREG;
-
- if (!S_ISREG(mode)) {
- WARN_ON(1); /* use proc_mkdir() */
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
return NULL;
+
+ p = __proc_create(parent, name, mode, 1);
+ if (p) {
+ p->proc_iops = &proc_file_inode_operations;
+ p->data = data;
}
+ return p;
+}
+
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops, void *data)
+{
+ struct proc_dir_entry *p;
BUG_ON(proc_fops == NULL);
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- pde = __proc_create(&parent, name, mode, 1);
- if (!pde)
- goto out;
- pde->proc_fops = proc_fops;
- pde->data = data;
- pde->proc_iops = &proc_file_inode_operations;
- if (proc_register(parent, pde) < 0)
- goto out_free;
- return pde;
-out_free:
- pde_free(pde);
-out:
- return NULL;
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = proc_fops;
+ return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
@@ -557,6 +556,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode,
}
EXPORT_SYMBOL(proc_create);
+static int proc_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ if (de->state_size)
+ return seq_open_private(file, de->seq_ops, de->state_size);
+ return seq_open(file, de->seq_ops);
+}
+
+static const struct file_operations proc_seq_fops = {
+ .open = proc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_seq_private);
+
+static int proc_single_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ return single_open(file, de->single_show, de->data);
+}
+
+static const struct file_operations proc_single_fops = {
+ .open = proc_single_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_single_data);
+
void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
de->size = size;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0f1692e63cb6..a318ae5b36b4 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,12 @@ struct proc_dir_entry {
struct completion *pde_unload_completion;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
+ union {
+ const struct seq_operations *seq_ops;
+ int (*single_show)(struct seq_file *, void *);
+ };
void *data;
+ unsigned int state_size;
unsigned int low_ino;
nlink_t nlink;
kuid_t uid;
@@ -57,9 +62,9 @@ struct proc_dir_entry {
umode_t mode;
u8 namelen;
#ifdef CONFIG_64BIT
-#define SIZEOF_PDE_INLINE_NAME (192-139)
+#define SIZEOF_PDE_INLINE_NAME (192-155)
#else
-#define SIZEOF_PDE_INLINE_NAME (128-87)
+#define SIZEOF_PDE_INLINE_NAME (128-95)
#endif
char inline_name[SIZEOF_PDE_INLINE_NAME];
} __randomize_layout;
@@ -162,6 +167,10 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
/*
* generic.c
*/
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data);
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp);
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
extern int proc_readdir(struct file *, struct dir_context *);
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
index 6a6bee9c603c..cb0edc7cbf09 100644
--- a/fs/proc/interrupts.c
+++ b/fs/proc/interrupts.c
@@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = {
.show = show_interrupts
};
-static int interrupts_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &int_seq_ops);
-}
-
-static const struct file_operations proc_interrupts_operations = {
- .open = interrupts_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_interrupts_init(void)
{
- proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
+ proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
return 0;
}
fs_initcall(proc_interrupts_init);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d1e82761de81..e64ecb9f2720 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
+ struct page *p;
+
+ if (!pfn_valid(pfn))
+ return 1;
+
+ p = pfn_to_page(pfn);
+ if (!memmap_valid_within(pfn, p, page_zone(p)))
+ return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
- ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+ ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- /* Sanity check: Can happen in 32bit arch...maybe */
- if (ent->addr < (unsigned long) __va(0))
+ if (!virt_addr_valid(ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
- /* cut when vmalloc() area is higher than direct-map area */
- if (VMALLOC_START > (unsigned long)__va(0)) {
- if (ent->addr > VMALLOC_START)
- goto free_out;
+ /*
+ * We've already checked virt_addr_valid so we know this address
+ * is a valid pointer, therefore we can check against it to determine
+ * if we need to trim
+ */
+ if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index a000d7547479..d06694757201 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -24,25 +24,13 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
nr_running(), nr_threads,
- idr_get_cursor(&task_active_pid_ns(current)->idr));
+ idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
return 0;
}
-static int loadavg_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, loadavg_proc_show, NULL);
-}
-
-static const struct file_operations loadavg_proc_fops = {
- .open = loadavg_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_loadavg_init(void)
{
- proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
+ proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
return 0;
}
fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 65a72ab57471..2fb04846ed11 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int meminfo_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, meminfo_proc_show, NULL);
-}
-
-static const struct file_operations meminfo_proc_fops = {
- .open = meminfo_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_meminfo_init(void)
{
- proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
+ proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
return 0;
}
fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 75634379f82e..3b63be64e436 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = {
.show = nommu_region_list_show
};
-static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_nommu_region_list_seqop);
-}
-
-static const struct file_operations proc_nommu_region_list_operations = {
- .open = proc_nommu_region_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_nommu_init(void)
{
- proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
+ proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop);
return 0;
}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 1763f370489d..7d94fa005b0d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
}
-int seq_open_net(struct inode *ino, struct file *f,
- const struct seq_operations *ops, int size)
+static int seq_open_net(struct inode *inode, struct file *file)
{
- struct net *net;
+ unsigned int state_size = PDE(inode)->state_size;
struct seq_net_private *p;
+ struct net *net;
- BUG_ON(size < sizeof(*p));
+ WARN_ON_ONCE(state_size < sizeof(*p));
- net = get_proc_net(ino);
- if (net == NULL)
+ net = get_proc_net(inode);
+ if (!net)
return -ENXIO;
- p = __seq_open_private(f, ops, size);
- if (p == NULL) {
+ p = __seq_open_private(file, PDE(inode)->seq_ops, state_size);
+ if (!p) {
put_net(net);
return -ENOMEM;
}
@@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f,
#endif
return 0;
}
-EXPORT_SYMBOL_GPL(seq_open_net);
-int single_open_net(struct inode *inode, struct file *file,
- int (*show)(struct seq_file *, void *))
+static int seq_release_net(struct inode *ino, struct file *f)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, show, net);
- if (err < 0)
- goto err_open;
+ struct seq_file *seq = f->private_data;
+ put_net(seq_file_net(seq));
+ seq_release_private(ino, f);
return 0;
+}
-err_open:
- put_net(net);
-err_net:
- return err;
+static const struct file_operations proc_net_seq_fops = {
+ .open = seq_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
}
-EXPORT_SYMBOL_GPL(single_open_net);
+EXPORT_SYMBOL_GPL(proc_create_net_data);
-int seq_release_net(struct inode *ino, struct file *f)
+static int single_open_net(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ struct proc_dir_entry *de = PDE(inode);
+ struct net *net;
+ int err;
- seq = f->private_data;
+ net = get_proc_net(inode);
+ if (!net)
+ return -ENXIO;
- put_net(seq_file_net(seq));
- seq_release_private(ino, f);
- return 0;
+ err = single_open(file, de->single_show, net);
+ if (err)
+ put_net(net);
+ return err;
}
-EXPORT_SYMBOL_GPL(seq_release_net);
-int single_release_net(struct inode *ino, struct file *f)
+static int single_release_net(struct inode *ino, struct file *f)
{
struct seq_file *seq = f->private_data;
put_net(seq->private);
return single_release(ino, f);
}
-EXPORT_SYMBOL_GPL(single_release_net);
+
+static const struct file_operations proc_net_single_fops = {
+ .open = single_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL_GPL(proc_create_net_single);
static struct net *get_proc_task_net(struct inode *dir)
{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d0cf1c50bb6c..c69ff191e5d8 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = {
.show = show_tty_driver
};
-static int tty_drivers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &tty_drivers_op);
-}
-
-static const struct file_operations proc_tty_drivers_operations = {
- .open = tty_drivers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* This function is called by tty_register_driver() to handle
* registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver)
struct proc_dir_entry *ent;
if (!driver->driver_name || driver->proc_entry ||
- !driver->ops->proc_fops)
+ !driver->ops->proc_show)
return;
- ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
- driver->ops->proc_fops, driver);
+ ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver,
+ driver->ops->proc_show, driver);
driver->proc_entry = ent;
}
@@ -186,6 +174,6 @@ void __init proc_tty_init(void)
* entry.
*/
proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
- proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
- proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
+ proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops);
+ proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op);
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 4d7d061696b3..127265e5c55f 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init;
int proc_setup_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *self;
inode_lock(root_inode);
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 24072cc06e65..12901dcf57e2 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v)
return 0;
}
-static int softirqs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, show_softirqs, NULL);
-}
-
-static const struct file_operations proc_softirqs_operations = {
- .open = softirqs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_softirqs_init(void)
{
- proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
+ proc_create_single("softirqs", 0, NULL, show_softirqs);
return 0;
}
fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 65ae54659833..c486ad4b43f0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1310,9 +1310,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
else if (is_swap_pmd(pmd)) {
swp_entry_t entry = pmd_to_swp_entry(pmd);
+ unsigned long offset = swp_offset(entry);
+ offset += (addr & ~PMD_MASK) >> PAGE_SHIFT;
frame = swp_type(entry) |
- (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ (offset << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
@@ -1332,6 +1334,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
break;
if (pm->show_pfn && (flags & PM_PRESENT))
frame++;
+ else if (flags & PM_SWAP)
+ frame += (1 << MAX_SWAPFILES_SHIFT);
}
spin_unlock(ptl);
return err;
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 9d2efaca499f..b905010ca9eb 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *thread_self;
inode_lock(root_inode);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 95a708d83721..3bd12f955867 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int uptime_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, uptime_proc_show, NULL);
-}
-
-static const struct file_operations uptime_proc_fops = {
- .open = uptime_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_uptime_init(void)
{
- proc_create("uptime", 0, NULL, &uptime_proc_fops);
+ proc_create_single("uptime", 0, NULL, uptime_proc_show);
return 0;
}
fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index 94901e8e700d..b449f186577f 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, version_proc_show, NULL);
-}
-
-static const struct file_operations version_proc_fops = {
- .open = version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_version_init(void)
{
- proc_create("version", 0, NULL, &version_proc_fops);
+ proc_create_single("version", 0, NULL, version_proc_show);
return 0;
}
fs_initcall(proc_version_init);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 020c597ef9b6..d88231e3b2be 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2966,7 +2966,7 @@ static int __init dquot_init(void)
NULL);
order = 0;
- dquot_hash = (struct hlist_head *)__get_free_pages(GFP_ATOMIC, order);
+ dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order);
if (!dquot_hash)
panic("Cannot create dquot hash table");
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index bd39a998843d..5089dac02660 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
reiserfs_update_inode_transaction(inode);
reiserfs_update_inode_transaction(dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
goto out_failed;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
/* the above add_entry did not update dir's stat data */
reiserfs_update_sd(&th, dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(dir->i_sb);
@@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
goto out_failed;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index fe999157dd97..e39b3910d24d 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -389,27 +389,13 @@ static int show_journal(struct seq_file *m, void *unused)
return 0;
}
-static int r_open(struct inode *inode, struct file *file)
-{
- return single_open(file, PDE_DATA(inode),
- proc_get_parent_data(inode));
-}
-
-static const struct file_operations r_file_operations = {
- .open = r_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static struct proc_dir_entry *proc_info_root = NULL;
static const char proc_info_root_name[] = "fs/reiserfs";
static void add_file(struct super_block *sb, char *name,
int (*func) (struct seq_file *, void *))
{
- proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
- &r_file_operations, func);
+ proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb);
}
int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index c6c27f1f9c98..4cc090b50cc5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
if (m->count + width >= m->size)
goto overflow;
- if (num < 10) {
- m->buf[m->count++] = num + '0';
- return;
- }
-
len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
if (!len)
goto overflow;
diff --git a/fs/super.c b/fs/super.c
index c9e34cd2b759..50728d9c1a05 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink,
sb = container_of(shrink, struct super_block, s_shrink);
/*
- * Don't call trylock_super as it is a potential
- * scalability bottleneck. The counts could get updated
- * between super_cache_count and super_cache_scan anyway.
- * Call to super_cache_count with shrinker_rwsem held
- * ensures the safety of call to list_lru_shrink_count() and
- * s_op->nr_cached_objects().
+ * We don't call trylock_super() here as it is a scalability bottleneck,
+ * so we're exposed to partial setup state. The shrinker rwsem does not
+ * protect filesystem operations backing list_lru_shrink_count() or
+ * s_op->nr_cached_objects(). Counts can change between
+ * super_cache_count and super_cache_scan, so we really don't need locks
+ * here.
+ *
+ * However, if we are currently mounting the superblock, the underlying
+ * filesystem might be in a state of partial construction and hence it
+ * is dangerous to access it. trylock_super() uses a SB_BORN check to
+ * avoid this situation, so do the same here. The memory barrier is
+ * matched with the one in mount_fs() as we don't hold locks here.
*/
+ if (!(sb->s_flags & SB_BORN))
+ return 0;
+ smp_rmb();
+
if (sb->s_op && sb->s_op->nr_cached_objects)
total_objects = sb->s_op->nr_cached_objects(sb, sc);
@@ -167,6 +177,7 @@ static void destroy_unused_super(struct super_block *s)
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
+ free_prealloced_shrinker(&s->s_shrink);
/* no delays needed */
destroy_super_work(&s->destroy_work);
}
@@ -252,6 +263,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_shrink.count_objects = super_cache_count;
s->s_shrink.batch = 1024;
s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
+ if (prealloc_shrinker(&s->s_shrink))
+ goto fail;
return s;
fail:
@@ -518,11 +531,7 @@ retry:
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
- err = register_shrinker(&s->s_shrink);
- if (err) {
- deactivate_locked_super(s);
- s = ERR_PTR(err);
- }
+ register_shrinker_prepared(&s->s_shrink);
return s;
}
@@ -1273,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
+
+ /*
+ * Write barrier is for super_cache_count(). We place it before setting
+ * SB_BORN as the data dependency between the two functions is the
+ * superblock structure contents that we just set up, not the SB_BORN
+ * flag.
+ */
+ smp_wmb();
sb->s_flags |= SB_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index b428d317ae92..92682fcc41f6 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
{
struct dentry *root;
void *ns;
- bool new_sb;
+ bool new_sb = false;
if (!(flags & SB_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
@@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
root = kernfs_mount_ns(fs_type, flags, sysfs_root,
SYSFS_MAGIC, &new_sb, ns);
- if (IS_ERR(root) || !new_sb)
+ if (!new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
- else if (new_sb)
+ else if (!IS_ERR(root))
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 0458dd47e105..c586026508db 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
@@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inc_nlink(dir);
dir->i_ctime = dir->i_mtime = current_time(dir);
mark_inode_dirty(dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index f897e55f2cd0..16a8ad21b77e 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,6 +28,9 @@
#include "udf_sb.h"
+#define SURROGATE_MASK 0xfffff800
+#define SURROGATE_PAIR 0x0000d800
+
static int udf_uni2char_utf8(wchar_t uni,
unsigned char *out,
int boundlen)
@@ -37,6 +40,9 @@ static int udf_uni2char_utf8(wchar_t uni,
if (boundlen <= 0)
return -ENAMETOOLONG;
+ if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
+ return -EINVAL;
+
if (uni < 0x80) {
out[u_len++] = (unsigned char)uni;
} else if (uni < 0x800) {
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 32545cd00ceb..d5f43ba76c59 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ufs_add_link(dentry, inode);
if (!err) {
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
out_fail:
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ce4a34a2751d..35a124400d60 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -511,7 +511,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
- ASSERT(retval == 0);
+ if (retval)
+ return retval;
+ /*
+ * Since we have removed the old attr, clear ATTR_REPLACE so
+ * that the leaf format add routine won't trip over the attr
+ * not being around.
+ */
+ args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6a7c2f03ea11..040eeda8426f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -725,12 +725,16 @@ xfs_bmap_extents_to_btree(
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return -ENOSPC;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index ef68b1de006a..1201107eabc6 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -466,6 +466,8 @@ xfs_dinode_verify(
return __this_address;
if (di_size > XFS_DFORK_DSIZE(dip, mp))
return __this_address;
+ if (dip->di_nextents)
+ return __this_address;
/* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
@@ -484,12 +486,31 @@ xfs_dinode_verify(
if (XFS_DFORK_Q(dip)) {
switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL:
+ if (dip->di_anextents)
+ return __this_address;
+ /* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
break;
default:
return __this_address;
}
+ } else {
+ /*
+ * If there is no fork offset, this may be a freshly-made inode
+ * in a new disk cluster, in which case di_aformat is zeroed.
+ * Otherwise, such an inode must be in EXTENTS format; this goes
+ * for freed inodes as well.
+ */
+ switch (dip->di_aformat) {
+ case 0:
+ case XFS_DINODE_FMT_EXTENTS:
+ break;
+ default:
+ return __this_address;
+ }
+ if (dip->di_anextents)
+ return __this_address;
}
/* only version 3 or greater inodes are extensively verified here */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0ab824f574ed..102463543db3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -594,7 +594,7 @@ xfs_alloc_ioend(
struct xfs_ioend *ioend;
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
xfs_init_bio_from_bh(bio, bh);
ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 69346d460dfa..694c85b03813 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
#ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__
-extern struct bio_set *xfs_ioend_bioset;
+extern struct bio_set xfs_ioend_bioset;
/*
* Types of I/O for bmap clustering and I/O completion tracking.
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 299aee4b7b0b..e70fb8ccecea 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -778,22 +778,26 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
+ loff_t isize = i_size_read(inode);
- new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
goto out_unlock;
}
- /* check the new inode size does not wrap through zero */
- if (new_size > inode->i_sb->s_maxbytes) {
+ /*
+ * New inode size must not exceed ->s_maxbytes, accounting for
+ * possible signed overflow.
+ */
+ if (inode->i_sb->s_maxbytes - isize < len) {
error = -EFBIG;
goto out_unlock;
}
+ new_size = isize + len;
/* Offset should be less than i_size */
- if (offset >= i_size_read(inode)) {
+ if (offset >= isize) {
error = -EINVAL;
goto out_unlock;
}
@@ -876,8 +880,18 @@ xfs_file_dedupe_range(
struct file *dst_file,
u64 dst_loff)
{
+ struct inode *srci = file_inode(src_file);
+ u64 max_dedupe;
int error;
+ /*
+ * Since we have to read all these pages in to compare them, cut
+ * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
+ * That means we won't do more than MAX_RW_COUNT IO per request.
+ */
+ max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
+ if (len > max_dedupe)
+ len = max_dedupe;
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 056e12b421eb..1cc79907b377 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -113,6 +113,7 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
}
}
+#ifdef CONFIG_PROC_FS
/* legacy quota interfaces */
#ifdef CONFIG_XFS_QUOTA
static int xqm_proc_show(struct seq_file *m, void *v)
@@ -124,18 +125,6 @@ static int xqm_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
- .open = xqm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* legacy quota stats interface no 2 */
static int xqmstat_proc_show(struct seq_file *m, void *v)
{
@@ -147,22 +136,8 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
return 0;
}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
- .owner = THIS_MODULE,
- .open = xqmstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_XFS_QUOTA */
-#ifdef CONFIG_PROC_FS
int
xfs_init_procfs(void)
{
@@ -174,11 +149,9 @@ xfs_init_procfs(void)
goto out;
#ifdef CONFIG_XFS_QUOTA
- if (!proc_create("fs/xfs/xqmstat", 0, NULL,
- &xqmstat_proc_fops))
+ if (!proc_create_single("fs/xfs/xqmstat", 0, NULL, xqmstat_proc_show))
goto out;
- if (!proc_create("fs/xfs/xqm", 0, NULL,
- &xqm_proc_fops))
+ if (!proc_create_single("fs/xfs/xqm", 0, NULL, xqm_proc_show))
goto out;
#endif
return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d71424052917..f643d76db516 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -63,7 +63,7 @@
#include <linux/parser.h>
static const struct super_operations xfs_super_operations;
-struct bio_set *xfs_ioend_bioset;
+struct bio_set xfs_ioend_bioset;
static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG
@@ -1845,10 +1845,9 @@ MODULE_ALIAS_FS("xfs");
STATIC int __init
xfs_init_zones(void)
{
- xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
+ if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE,
offsetof(struct xfs_ioend, io_inline_bio),
- BIOSET_NEED_BVECS);
- if (!xfs_ioend_bioset)
+ BIOSET_NEED_BVECS))
goto out;
xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
@@ -1997,7 +1996,7 @@ xfs_init_zones(void)
out_destroy_log_ticket_zone:
kmem_zone_destroy(xfs_log_ticket_zone);
out_free_ioend_bioset:
- bioset_free(xfs_ioend_bioset);
+ bioset_exit(&xfs_ioend_bioset);
out:
return -ENOMEM;
}
@@ -2029,7 +2028,7 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone);
kmem_zone_destroy(xfs_log_ticket_zone);
- bioset_free(xfs_ioend_bioset);
+ bioset_exit(&xfs_ioend_bioset);
}
STATIC int __init